diff options
Diffstat (limited to 'ext')
52 files changed, 9910 insertions, 0 deletions
diff --git a/ext/.gitignore b/ext/.gitignore index d53dccedd8..aaeccced05 100644 --- a/ext/.gitignore +++ b/ext/.gitignore @@ -75,6 +75,7 @@ ppport.h /PerlIO-via/Makefile.PL /Pod-Escapes/Makefile.PL /Pod-LaTeX/Makefile.PL +/Pod-Parser/Makefile.PL /Pod-Perldoc/Makefile.PL /Pod-Plainer/Makefile.PL /Pod-Simple/Makefile.PL diff --git a/ext/Pod-Parser/lib/Pod/Checker.pm b/ext/Pod-Parser/lib/Pod/Checker.pm new file mode 100644 index 0000000000..a2305423b2 --- /dev/null +++ b/ext/Pod-Parser/lib/Pod/Checker.pm @@ -0,0 +1,1268 @@ +############################################################################# +# Pod/Checker.pm -- check pod documents for syntax errors +# +# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::Checker; +use strict; + +use vars qw($VERSION @ISA @EXPORT %VALID_COMMANDS %VALID_SEQUENCES); +$VERSION = '1.45'; ## Current version of this package +require 5.005; ## requires this Perl version or later + +use Pod::ParseUtils; ## for hyperlinks and lists + +=head1 NAME + +Pod::Checker, podchecker() - check pod documents for syntax errors + +=head1 SYNOPSIS + + use Pod::Checker; + + $syntax_okay = podchecker($filepath, $outputpath, %options); + + my $checker = new Pod::Checker %options; + $checker->parse_from_file($filepath, \*STDERR); + +=head1 OPTIONS/ARGUMENTS + +C<$filepath> is the input POD to read and C<$outputpath> is +where to write POD syntax error messages. Either argument may be a scalar +indicating a file-path, or else a reference to an open filehandle. +If unspecified, the input-file it defaults to C<\*STDIN>, and +the output-file defaults to C<\*STDERR>. + +=head2 podchecker() + +This function can take a hash of options: + +=over 4 + +=item B<-warnings> =E<gt> I<val> + +Turn warnings on/off. I<val> is usually 1 for on, but higher values +trigger additional warnings. See L<"Warnings">. + +=back + +=head1 DESCRIPTION + +B<podchecker> will perform syntax checking of Perl5 POD format documentation. + +Curious/ambitious users are welcome to propose additional features they wish +to see in B<Pod::Checker> and B<podchecker> and verify that the checks are +consistent with L<perlpod>. + +The following checks are currently performed: + +=over 4 + +=item * + +Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences, +and unterminated interior sequences. + +=item * + +Check for proper balancing of C<=begin> and C<=end>. The contents of such +a block are generally ignored, i.e. no syntax checks are performed. + +=item * + +Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>. + +=item * + +Check for same nested interior-sequences (e.g. +C<LE<lt>...LE<lt>...E<gt>...E<gt>>). + +=item * + +Check for malformed or non-existing entities C<EE<lt>...E<gt>>. + +=item * + +Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod> +for details. + +=item * + +Check for unresolved document-internal links. This check may also reveal +misspelled links that seem to be internal links but should be links +to something else. + +=back + +=head1 DIAGNOSTICS + +=head2 Errors + +=over 4 + +=item * empty =headn + +A heading (C<=head1> or C<=head2>) without any text? That ain't no +heading! + +=item * =over on line I<N> without closing =back + +The C<=over> command does not have a corresponding C<=back> before the +next heading (C<=head1> or C<=head2>) or the end of the file. + +=item * =item without previous =over + +=item * =back without previous =over + +An C<=item> or C<=back> command has been found outside a +C<=over>/C<=back> block. + +=item * No argument for =begin + +A C<=begin> command was found that is not followed by the formatter +specification. + +=item * =end without =begin + +A standalone C<=end> command was found. + +=item * Nested =begin's + +There were at least two consecutive C<=begin> commands without +the corresponding C<=end>. Only one C<=begin> may be active at +a time. + +=item * =for without formatter specification + +There is no specification of the formatter after the C<=for> command. + +=item * unresolved internal link I<NAME> + +The given link to I<NAME> does not have a matching node in the current +POD. This also happened when a single word node name is not enclosed in +C<"">. + +=item * Unknown command "I<CMD>" + +An invalid POD command has been found. Valid are C<=head1>, C<=head2>, +C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>, +C<=for>, C<=pod>, C<=cut> + +=item * Unknown interior-sequence "I<SEQ>" + +An invalid markup command has been encountered. Valid are: +C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>, +C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>, +C<ZE<lt>E<gt>> + +=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt> + +Two nested identical markup commands have been found. Generally this +does not make sense. + +=item * garbled entity I<STRING> + +The I<STRING> found cannot be interpreted as a character entity. + +=item * Entity number out of range + +An entity specified by number (dec, hex, oct) is out of range (1-255). + +=item * malformed link LE<lt>E<gt> + +The link found cannot be parsed because it does not conform to the +syntax described in L<perlpod>. + +=item * nonempty ZE<lt>E<gt> + +The C<ZE<lt>E<gt>> sequence is supposed to be empty. + +=item * empty XE<lt>E<gt> + +The index entry specified contains nothing but whitespace. + +=item * Spurious text after =pod / =cut + +The commands C<=pod> and C<=cut> do not take any arguments. + +=item * Spurious character(s) after =back + +The C<=back> command does not take any arguments. + +=back + +=head2 Warnings + +These may not necessarily cause trouble, but indicate mediocre style. + +=over 4 + +=item * multiple occurrence of link target I<name> + +The POD file has some C<=item> and/or C<=head> commands that have +the same text. Potential hyperlinks to such a text cannot be unique then. +This warning is printed only with warning level greater than one. + +=item * line containing nothing but whitespace in paragraph + +There is some whitespace on a seemingly empty line. POD is very sensitive +to such things, so this is flagged. B<vi> users switch on the B<list> +option to avoid this problem. + +=begin _disabled_ + +=item * file does not start with =head + +The file starts with a different POD directive than head. +This is most probably something you do not want. + +=end _disabled_ + +=item * previous =item has no contents + +There is a list C<=item> right above the flagged line that has no +text contents. You probably want to delete empty items. + +=item * preceding non-item paragraph(s) + +A list introduced by C<=over> starts with a text or verbatim paragraph, +but continues with C<=item>s. Move the non-item paragraph out of the +C<=over>/C<=back> block. + +=item * =item type mismatch (I<one> vs. I<two>) + +A list started with e.g. a bullet-like C<=item> and continued with a +numbered one. This is obviously inconsistent. For most translators the +type of the I<first> C<=item> determines the type of the list. + +=item * I<N> unescaped C<E<lt>E<gt>> in paragraph + +Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>> +can potentially cause errors as they could be misinterpreted as +markup commands. This is only printed when the -warnings level is +greater than 1. + +=item * Unknown entity + +A character entity was found that does not belong to the standard +ISO set or the POD specials C<verbar> and C<sol>. + +=item * No items in =over + +The list opened with C<=over> does not contain any items. + +=item * No argument for =item + +C<=item> without any parameters is deprecated. It should either be followed +by C<*> to indicate an unordered list, by a number (optionally followed +by a dot) to indicate an ordered (numbered) list or simple text for a +definition list. + +=item * empty section in previous paragraph + +The previous section (introduced by a C<=head> command) does not contain +any text. This usually indicates that something is missing. Note: A +C<=head1> followed immediately by C<=head2> does not trigger this warning. + +=item * Verbatim paragraph in NAME section + +The NAME section (C<=head1 NAME>) should consist of a single paragraph +with the script/module name, followed by a dash `-' and a very short +description of what the thing is good for. + +=item * =headI<n> without preceding higher level + +For example if there is a C<=head2> in the POD file prior to a +C<=head1>. + +=back + +=head2 Hyperlinks + +There are some warnings with respect to malformed hyperlinks: + +=over 4 + +=item * ignoring leading/trailing whitespace in link + +There is whitespace at the beginning or the end of the contents of +LE<lt>...E<gt>. + +=item * (section) in '$page' deprecated + +There is a section detected in the page name of LE<lt>...E<gt>, e.g. +C<LE<lt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only. +Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able +to expand this to appropriate code. For links to (builtin) functions, +please say C<LE<lt>perlfunc/mkdirE<gt>>, without (). + +=item * alternative text/node '%s' contains non-escaped | or / + +The characters C<|> and C</> are special in the LE<lt>...E<gt> context. +Although the hyperlink parser does its best to determine which "/" is +text and which is a delimiter in case of doubt, one ought to escape +these literal characters like this: + + / E<sol> + | E<verbar> + +=back + +=head1 RETURN VALUE + +B<podchecker> returns the number of POD syntax errors found or -1 if +there were no POD commands at all found in the file. + +=head1 EXAMPLES + +See L</SYNOPSIS> + +=head1 INTERFACE + +While checking, this module collects document properties, e.g. the nodes +for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>). +POD translators can use this feature to syntax-check and get the nodes in +a first pass before actually starting to convert. This is expensive in terms +of execution time, but allows for very robust conversions. + +Since PodParser-1.24 the B<Pod::Checker> module uses only the B<poderror> +method to print errors and warnings. The summary output (e.g. +"Pod syntax OK") has been dropped from the module and has been included in +B<podchecker> (the script). This allows users of B<Pod::Checker> to +control completely the output behavior. Users of B<podchecker> (the script) +get the well-known behavior. + +=cut + +############################################################################# + +#use diagnostics; +use Carp qw(croak); +use Exporter; +use Pod::Parser; + +@ISA = qw(Pod::Parser); +@EXPORT = qw(&podchecker); + +my %VALID_COMMANDS = ( + 'pod' => 1, + 'cut' => 1, + 'head1' => 1, + 'head2' => 1, + 'head3' => 1, + 'head4' => 1, + 'over' => 1, + 'back' => 1, + 'item' => 1, + 'for' => 1, + 'begin' => 1, + 'end' => 1, + 'encoding' => 1, +); + +my %VALID_SEQUENCES = ( + 'I' => 1, + 'B' => 1, + 'S' => 1, + 'C' => 1, + 'L' => 1, + 'F' => 1, + 'X' => 1, + 'Z' => 1, + 'E' => 1, +); + +# stolen from HTML::Entities +my %ENTITIES = ( + # Some normal chars that have special meaning in SGML context + amp => '&', # ampersand +'gt' => '>', # greater than +'lt' => '<', # less than + quot => '"', # double quote + + # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML + AElig => 'Æ', # capital AE diphthong (ligature) + Aacute => 'Á', # capital A, acute accent + Acirc => 'Â', # capital A, circumflex accent + Agrave => 'À', # capital A, grave accent + Aring => 'Å', # capital A, ring + Atilde => 'Ã', # capital A, tilde + Auml => 'Ä', # capital A, dieresis or umlaut mark + Ccedil => 'Ç', # capital C, cedilla + ETH => 'Ð', # capital Eth, Icelandic + Eacute => 'É', # capital E, acute accent + Ecirc => 'Ê', # capital E, circumflex accent + Egrave => 'È', # capital E, grave accent + Euml => 'Ë', # capital E, dieresis or umlaut mark + Iacute => 'Í', # capital I, acute accent + Icirc => 'Î', # capital I, circumflex accent + Igrave => 'Ì', # capital I, grave accent + Iuml => 'Ï', # capital I, dieresis or umlaut mark + Ntilde => 'Ñ', # capital N, tilde + Oacute => 'Ó', # capital O, acute accent + Ocirc => 'Ô', # capital O, circumflex accent + Ograve => 'Ò', # capital O, grave accent + Oslash => 'Ø', # capital O, slash + Otilde => 'Õ', # capital O, tilde + Ouml => 'Ö', # capital O, dieresis or umlaut mark + THORN => 'Þ', # capital THORN, Icelandic + Uacute => 'Ú', # capital U, acute accent + Ucirc => 'Û', # capital U, circumflex accent + Ugrave => 'Ù', # capital U, grave accent + Uuml => 'Ü', # capital U, dieresis or umlaut mark + Yacute => 'Ý', # capital Y, acute accent + aacute => 'á', # small a, acute accent + acirc => 'â', # small a, circumflex accent + aelig => 'æ', # small ae diphthong (ligature) + agrave => 'à', # small a, grave accent + aring => 'å', # small a, ring + atilde => 'ã', # small a, tilde + auml => 'ä', # small a, dieresis or umlaut mark + ccedil => 'ç', # small c, cedilla + eacute => 'é', # small e, acute accent + ecirc => 'ê', # small e, circumflex accent + egrave => 'è', # small e, grave accent + eth => 'ð', # small eth, Icelandic + euml => 'ë', # small e, dieresis or umlaut mark + iacute => 'í', # small i, acute accent + icirc => 'î', # small i, circumflex accent + igrave => 'ì', # small i, grave accent + iuml => 'ï', # small i, dieresis or umlaut mark + ntilde => 'ñ', # small n, tilde + oacute => 'ó', # small o, acute accent + ocirc => 'ô', # small o, circumflex accent + ograve => 'ò', # small o, grave accent + oslash => 'ø', # small o, slash + otilde => 'õ', # small o, tilde + ouml => 'ö', # small o, dieresis or umlaut mark + szlig => 'ß', # small sharp s, German (sz ligature) + thorn => 'þ', # small thorn, Icelandic + uacute => 'ú', # small u, acute accent + ucirc => 'û', # small u, circumflex accent + ugrave => 'ù', # small u, grave accent + uuml => 'ü', # small u, dieresis or umlaut mark + yacute => 'ý', # small y, acute accent + yuml => 'ÿ', # small y, dieresis or umlaut mark + + # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96) + copy => '©', # copyright sign + reg => '®', # registered sign + nbsp => "\240", # non breaking space + + # Additional ISO-8859/1 entities listed in rfc1866 (section 14) + iexcl => '¡', + cent => '¢', + pound => '£', + curren => '¤', + yen => '¥', + brvbar => '¦', + sect => '§', + uml => '¨', + ordf => 'ª', + laquo => '«', +'not' => '¬', # not is a keyword in perl + shy => '', + macr => '¯', + deg => '°', + plusmn => '±', + sup1 => '¹', + sup2 => '²', + sup3 => '³', + acute => '´', + micro => 'µ', + para => '¶', + middot => '·', + cedil => '¸', + ordm => 'º', + raquo => '»', + frac14 => '¼', + frac12 => '½', + frac34 => '¾', + iquest => '¿', +'times' => '×', # times is a keyword in perl + divide => '÷', + +# some POD special entities + verbar => '|', + sol => '/' +); + +##--------------------------------------------------------------------------- + +##--------------------------------- +## Function definitions begin here +##--------------------------------- + +sub podchecker { + my ($infile, $outfile, %options) = @_; + local $_; + + ## Set defaults + $infile ||= \*STDIN; + $outfile ||= \*STDERR; + + ## Now create a pod checker + my $checker = new Pod::Checker(%options); + + ## Now check the pod document for errors + $checker->parse_from_file($infile, $outfile); + + ## Return the number of errors found + return $checker->num_errors(); +} + +##--------------------------------------------------------------------------- + +##------------------------------- +## Method definitions begin here +##------------------------------- + +################################## + +=over 4 + +=item C<Pod::Checker-E<gt>new( %options )> + +Return a reference to a new Pod::Checker object that inherits from +Pod::Parser and is used for calling the required methods later. The +following options are recognized: + +C<-warnings =E<gt> num> + Print warnings if C<num> is true. The higher the value of C<num>, +the more warnings are printed. Currently there are only levels 1 and 2. + +C<-quiet =E<gt> num> + If C<num> is true, do not print any errors/warnings. This is useful +when Pod::Checker is used to munge POD code into plain text from within +POD formatters. + +=cut + +## sub new { +## my $this = shift; +## my $class = ref($this) || $this; +## my %params = @_; +## my $self = {%params}; +## bless $self, $class; +## $self->initialize(); +## return $self; +## } + +sub initialize { + my $self = shift; + ## Initialize number of errors, and setup an error function to + ## increment this number and then print to the designated output. + $self->{_NUM_ERRORS} = 0; + $self->{_NUM_WARNINGS} = 0; + $self->{-quiet} ||= 0; + # set the error handling subroutine + $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror'); + $self->{_commands} = 0; # total number of POD commands encountered + $self->{_list_stack} = []; # stack for nested lists + $self->{_have_begin} = ''; # stores =begin + $self->{_links} = []; # stack for internal hyperlinks + $self->{_nodes} = []; # stack for =head/=item nodes + $self->{_index} = []; # text in X<> + # print warnings? + $self->{-warnings} = 1 unless(defined $self->{-warnings}); + $self->{_current_head1} = ''; # the current =head1 block + $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings}); +} + +################################## + +=item C<$checker-E<gt>poderror( @args )> + +=item C<$checker-E<gt>poderror( {%opts}, @args )> + +Internal method for printing errors and warnings. If no options are +given, simply prints "@_". The following options are recognized and used +to form the output: + + -msg + +A message to print prior to C<@args>. + + -line + +The line number the error occurred in. + + -file + +The file (name) the error occurred in. + + -severity + +The error level, should be 'WARNING' or 'ERROR'. + +=cut + +# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args ) +sub poderror { + my $self = shift; + my %opts = (ref $_[0]) ? %{shift()} : (); + + ## Retrieve options + chomp( my $msg = ($opts{-msg} || '')."@_" ); + my $line = (exists $opts{-line}) ? " at line $opts{-line}" : ''; + my $file = (exists $opts{-file}) ? " in file $opts{-file}" : ''; + unless (exists $opts{-severity}) { + ## See if can find severity in message prefix + $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// ); + } + my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : ''; + + ## Increment error count and print message " + ++($self->{_NUM_ERRORS}) + if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR')); + ++($self->{_NUM_WARNINGS}) + if(!%opts || ($opts{-severity} && $opts{-severity} eq 'WARNING')); + unless($self->{-quiet}) { + my $out_fh = $self->output_handle() || \*STDERR; + print $out_fh ($severity, $msg, $line, $file, "\n") + if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING'); + } +} + +################################## + +=item C<$checker-E<gt>num_errors()> + +Set (if argument specified) and retrieve the number of errors found. + +=cut + +sub num_errors { + return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS}; +} + +################################## + +=item C<$checker-E<gt>num_warnings()> + +Set (if argument specified) and retrieve the number of warnings found. + +=cut + +sub num_warnings { + return (@_ > 1) ? ($_[0]->{_NUM_WARNINGS} = $_[1]) : $_[0]->{_NUM_WARNINGS}; +} + +################################## + +=item C<$checker-E<gt>name()> + +Set (if argument specified) and retrieve the canonical name of POD as +found in the C<=head1 NAME> section. + +=cut + +sub name { + return (@_ > 1 && $_[1]) ? + ($_[0]->{-name} = $_[1]) : $_[0]->{-name}; +} + +################################## + +=item C<$checker-E<gt>node()> + +Add (if argument specified) and retrieve the nodes (as defined by C<=headX> +and C<=item>) of the current POD. The nodes are returned in the order of +their occurrence. They consist of plain text, each piece of whitespace is +collapsed to a single blank. + +=cut + +sub node { + my ($self,$text) = @_; + if(defined $text) { + $text =~ s/\s+$//s; # strip trailing whitespace + $text =~ s/\s+/ /gs; # collapse whitespace + # add node, order important! + push(@{$self->{_nodes}}, $text); + # keep also a uniqueness counter + $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s); + return $text; + } + @{$self->{_nodes}}; +} + +################################## + +=item C<$checker-E<gt>idx()> + +Add (if argument specified) and retrieve the index entries (as defined by +C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece +of whitespace is collapsed to a single blank. + +=cut + +# set/return index entries of current POD +sub idx { + my ($self,$text) = @_; + if(defined $text) { + $text =~ s/\s+$//s; # strip trailing whitespace + $text =~ s/\s+/ /gs; # collapse whitespace + # add node, order important! + push(@{$self->{_index}}, $text); + # keep also a uniqueness counter + $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s); + return $text; + } + @{$self->{_index}}; +} + +################################## + +=item C<$checker-E<gt>hyperlink()> + +Add (if argument specified) and retrieve the hyperlinks (as defined by +C<LE<lt>E<gt>>) of the current POD. They consist of a 2-item array: line +number and C<Pod::Hyperlink> object. + +=back + +=cut + +# set/return hyperlinks of the current POD +sub hyperlink { + my $self = shift; + if($_[0]) { + push(@{$self->{_links}}, $_[0]); + return $_[0]; + } + @{$self->{_links}}; +} + +## overrides for Pod::Parser + +sub end_pod { + ## Do some final checks and + ## print the number of errors found + my $self = shift; + my $infile = $self->input_file(); + + if(@{$self->{_list_stack}}) { + my $list; + while(($list = $self->_close_list('EOF',$infile)) && + $list->indent() ne 'auto') { + $self->poderror({ -line => 'EOF', -file => $infile, + -severity => 'ERROR', -msg => '=over on line ' . + $list->start() . ' without closing =back' }); + } + } + + # check validity of document internal hyperlinks + # first build the node names from the paragraph text + my %nodes; + foreach($self->node()) { + $nodes{$_} = 1; + if(/^(\S+)\s+\S/) { + # we have more than one word. Use the first as a node, too. + # This is used heavily in perlfunc.pod + $nodes{$1} ||= 2; # derived node + } + } + foreach($self->idx()) { + $nodes{$_} = 3; # index node + } + foreach($self->hyperlink()) { + my ($line,$link) = @$_; + # _TODO_ what if there is a link to the page itself by the name, + # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION"> + if($link->node() && !$link->page() && $link->type() ne 'hyperlink') { + my $node = $self->_check_ptree($self->parse_text($link->node(), + $line), $line, $infile, 'L'); + if($node && !$nodes{$node}) { + $self->poderror({ -line => $line || '', -file => $infile, + -severity => 'ERROR', + -msg => "unresolved internal link '$node'"}); + } + } + } + + # check the internal nodes for uniqueness. This pertains to + # =headX, =item and X<...> + if($self->{-warnings} && $self->{-warnings}>1) { + foreach(grep($self->{_unique_nodes}->{$_} > 1, + keys %{$self->{_unique_nodes}})) { + $self->poderror({ -line => '-', -file => $infile, + -severity => 'WARNING', + -msg => "multiple occurrence of link target '$_'"}); + } + } + + # no POD found here + $self->num_errors(-1) if($self->{_commands} == 0); +} + +# check a POD command directive +sub command { + my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_; + my ($file, $line) = $pod_para->file_line; + ## Check the command syntax + my $arg; # this will hold the command argument + if (! $VALID_COMMANDS{$cmd}) { + $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR', + -msg => "Unknown command '$cmd'" }); + } + else { # found a valid command + $self->{_commands}++; # delete this line if below is enabled again + + ##### following check disabled due to strong request + #if(!$self->{_commands}++ && $cmd !~ /^head/) { + # $self->poderror({ -line => $line, -file => $file, + # -severity => 'WARNING', + # -msg => "file does not start with =head" }); + #} + + # check syntax of particular command + if($cmd eq 'over') { + # check for argument + $arg = $self->interpolate_and_check($paragraph, $line,$file); + my $indent = 4; # default + if($arg && $arg =~ /^\s*(\d+)\s*$/) { + $indent = $1; + } + # start a new list + $self->_open_list($indent,$line,$file); + } + elsif($cmd eq 'item') { + # are we in a list? + unless(@{$self->{_list_stack}}) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => '=item without previous =over' }); + # auto-open in case we encounter many more + $self->_open_list('auto',$line,$file); + } + my $list = $self->{_list_stack}->[0]; + # check whether the previous item had some contents + if(defined $self->{_list_item_contents} && + $self->{_list_item_contents} == 0) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => 'previous =item has no contents' }); + } + if($list->{_has_par}) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => 'preceding non-item paragraph(s)' }); + delete $list->{_has_par}; + } + # check for argument + $arg = $self->interpolate_and_check($paragraph, $line, $file); + if($arg && $arg =~ /(\S+)/) { + $arg =~ s/[\s\n]+$//; + my $type; + if($arg =~ /^[*]\s*(\S*.*)/) { + $type = 'bullet'; + $self->{_list_item_contents} = $1 ? 1 : 0; + $arg = $1; + } + elsif($arg =~ /^\d+\.?\s+(\S*)/) { + $type = 'number'; + $self->{_list_item_contents} = $1 ? 1 : 0; + $arg = $1; + } + else { + $type = 'definition'; + $self->{_list_item_contents} = 1; + } + my $first = $list->type(); + if($first && $first ne $type) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "=item type mismatch ('$first' vs. '$type')"}); + } + else { # first item + $list->type($type); + } + } + else { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => 'No argument for =item' }); + $arg = ' '; # empty + $self->{_list_item_contents} = 0; + } + # add this item + $list->item($arg); + # remember this node + $self->node($arg); + } + elsif($cmd eq 'back') { + # check if we have an open list + unless(@{$self->{_list_stack}}) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => '=back without previous =over' }); + } + else { + # check for spurious characters + $arg = $self->interpolate_and_check($paragraph, $line,$file); + if($arg && $arg =~ /\S/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => 'Spurious character(s) after =back' }); + } + # close list + my $list = $self->_close_list($line,$file); + # check for empty lists + if(!$list->item() && $self->{-warnings}) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => 'No items in =over (at line ' . + $list->start() . ') / =back list'}); + } + } + } + elsif($cmd =~ /^head(\d+)/) { + my $hnum = $1; + $self->{"_have_head_$hnum"}++; # count head types + if($hnum > 1 && !$self->{'_have_head_'.($hnum -1)}) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "=head$hnum without preceding higher level"}); + } + # check whether the previous =head section had some contents + if(defined $self->{_commands_in_head} && + $self->{_commands_in_head} == 0 && + defined $self->{_last_head} && + $self->{_last_head} >= $hnum) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => 'empty section in previous paragraph'}); + } + $self->{_commands_in_head} = -1; + $self->{_last_head} = $hnum; + # check if there is an open list + if(@{$self->{_list_stack}}) { + my $list; + while(($list = $self->_close_list($line,$file)) && + $list->indent() ne 'auto') { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => '=over on line '. $list->start() . + " without closing =back (at $cmd)" }); + } + } + # remember this node + $arg = $self->interpolate_and_check($paragraph, $line,$file); + $arg =~ s/[\s\n]+$//s; + $self->node($arg); + unless(length($arg)) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "empty =$cmd"}); + } + if($cmd eq 'head1') { + $self->{_current_head1} = $arg; + } else { + $self->{_current_head1} = ''; + } + } + elsif($cmd eq 'begin') { + if($self->{_have_begin}) { + # already have a begin + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => q{Nested =begin's (first at line } . + $self->{_have_begin} . ')'}); + } + else { + # check for argument + $arg = $self->interpolate_and_check($paragraph, $line,$file); + unless($arg && $arg =~ /(\S+)/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => 'No argument for =begin'}); + } + # remember the =begin + $self->{_have_begin} = "$line:$1"; + } + } + elsif($cmd eq 'end') { + if($self->{_have_begin}) { + # close the existing =begin + $self->{_have_begin} = ''; + # check for spurious characters + $arg = $self->interpolate_and_check($paragraph, $line,$file); + # the closing argument is optional + #if($arg && $arg =~ /\S/) { + # $self->poderror({ -line => $line, -file => $file, + # -severity => 'WARNING', + # -msg => "Spurious character(s) after =end" }); + #} + } + else { + # don't have a matching =begin + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => '=end without =begin' }); + } + } + elsif($cmd eq 'for') { + unless($paragraph =~ /\s*(\S+)\s*/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => '=for without formatter specification' }); + } + $arg = ''; # do not expand paragraph below + } + elsif($cmd =~ /^(pod|cut)$/) { + # check for argument + $arg = $self->interpolate_and_check($paragraph, $line,$file); + if($arg && $arg =~ /(\S+)/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => "Spurious text after =$cmd"}); + } + } + $self->{_commands_in_head}++; + ## Check the interior sequences in the command-text + $self->interpolate_and_check($paragraph, $line,$file) + unless(defined $arg); + } +} + +sub _open_list +{ + my ($self,$indent,$line,$file) = @_; + my $list = Pod::List->new( + -indent => $indent, + -start => $line, + -file => $file); + unshift(@{$self->{_list_stack}}, $list); + undef $self->{_list_item_contents}; + $list; +} + +sub _close_list +{ + my ($self,$line,$file) = @_; + my $list = shift(@{$self->{_list_stack}}); + if(defined $self->{_list_item_contents} && + $self->{_list_item_contents} == 0) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => 'previous =item has no contents' }); + } + undef $self->{_list_item_contents}; + $list; +} + +# process a block of some text +sub interpolate_and_check { + my ($self, $paragraph, $line, $file) = @_; + ## Check the interior sequences in the command-text + # and return the text + $self->_check_ptree( + $self->parse_text($paragraph,$line), $line, $file, ''); +} + +sub _check_ptree { + my ($self,$ptree,$line,$file,$nestlist) = @_; + local($_); + my $text = ''; + # process each node in the parse tree + foreach(@$ptree) { + # regular text chunk + unless(ref) { + # count the unescaped angle brackets + # complain only when warning level is greater than 1 + if($self->{-warnings} && $self->{-warnings}>1) { + my $count; + if($count = tr/<>/<>/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "$count unescaped <> in paragraph" }); + } + } + $text .= $_; + next; + } + # have an interior sequence + my $cmd = $_->cmd_name(); + my $contents = $_->parse_tree(); + ($file,$line) = $_->file_line(); + # check for valid tag + if (! $VALID_SEQUENCES{$cmd}) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => qq(Unknown interior-sequence '$cmd')}); + # expand it anyway + $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); + next; + } + if($nestlist =~ /$cmd/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => "nested commands $cmd<...$cmd<...>...>"}); + # _TODO_ should we add the contents anyway? + # expand it anyway, see below + } + if($cmd eq 'E') { + # preserve entities + if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => 'garbled entity ' . $_->raw_text()}); + next; + } + my $ent = $$contents[0]; + my $val; + if($ent =~ /^0x[0-9a-f]+$/i) { + # hexadec entity + $val = hex($ent); + } + elsif($ent =~ /^0\d+$/) { + # octal + $val = oct($ent); + } + elsif($ent =~ /^\d+$/) { + # numeric entity + $val = $ent; + } + if(defined $val) { + if($val>0 && $val<256) { + $text .= chr($val); + } + else { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => 'Entity number out of range ' . $_->raw_text()}); + } + } + elsif($ENTITIES{$ent}) { + # known ISO entity + $text .= $ENTITIES{$ent}; + } + else { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => 'Unknown entity ' . $_->raw_text()}); + $text .= "E<$ent>"; + } + } + elsif($cmd eq 'L') { + # try to parse the hyperlink + my $link = Pod::Hyperlink->new($contents->raw_text()); + unless(defined $link) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => 'malformed link ' . $_->raw_text() ." : $@"}); + next; + } + $link->line($line); # remember line + if($self->{-warnings}) { + foreach my $w ($link->warning()) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => $w }); + } + } + # check the link text + $text .= $self->_check_ptree($self->parse_text($link->text(), + $line), $line, $file, "$nestlist$cmd"); + # remember link + $self->hyperlink([$line,$link]); + } + elsif($cmd =~ /[BCFIS]/) { + # add the guts + $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); + } + elsif($cmd eq 'Z') { + if(length($contents->raw_text())) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => 'Nonempty Z<>'}); + } + } + elsif($cmd eq 'X') { + my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); + if($idx =~ /^\s*$/s) { + $self->poderror({ -line => $line, -file => $file, + -severity => 'ERROR', + -msg => 'Empty X<>'}); + } + else { + # remember this node + $self->idx($idx); + } + } + else { + # not reached + croak 'internal error'; + } + } + $text; +} + +# process a block of verbatim text +sub verbatim { + ## Nothing particular to check + my ($self, $paragraph, $line_num, $pod_para) = @_; + + $self->_preproc_par($paragraph); + + if($self->{_current_head1} eq 'NAME') { + my ($file, $line) = $pod_para->file_line; + $self->poderror({ -line => $line, -file => $file, + -severity => 'WARNING', + -msg => 'Verbatim paragraph in NAME section' }); + } +} + +# process a block of regular text +sub textblock { + my ($self, $paragraph, $line_num, $pod_para) = @_; + my ($file, $line) = $pod_para->file_line; + + $self->_preproc_par($paragraph); + + # skip this paragraph if in a =begin block + unless($self->{_have_begin}) { + my $block = $self->interpolate_and_check($paragraph, $line,$file); + if($self->{_current_head1} eq 'NAME') { + if($block =~ /^\s*(\S+?)\s*[,-]/) { + # this is the canonical name + $self->{-name} = $1 unless(defined $self->{-name}); + } + } + } +} + +sub _preproc_par +{ + my $self = shift; + $_[0] =~ s/[\s\n]+$//; + if($_[0]) { + $self->{_commands_in_head}++; + $self->{_list_item_contents}++ if(defined $self->{_list_item_contents}); + if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) { + $self->{_list_stack}->[0]->{_has_par} = 1; + } + } +} + +1; + +__END__ + +=head1 AUTHOR + +Please report bugs using L<http://rt.cpan.org>. + +Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version), +Marek Rouchal E<lt>marekr@cpan.orgE<gt> + +Based on code for B<Pod::Text::pod2text()> written by +Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> + +=cut + diff --git a/ext/Pod-Parser/lib/Pod/Find.pm b/ext/Pod-Parser/lib/Pod/Find.pm new file mode 100644 index 0000000000..8d1103b6a1 --- /dev/null +++ b/ext/Pod-Parser/lib/Pod/Find.pm @@ -0,0 +1,535 @@ +############################################################################# +# Pod/Find.pm -- finds files containing POD documentation +# +# Author: Marek Rouchal <marekr@cpan.org> +# +# Copyright (C) 1999-2000 by Marek Rouchal (and borrowing code +# from Nick Ing-Simmon's PodToHtml). All rights reserved. +# This file is part of "PodParser". Pod::Find is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::Find; +use strict; + +use vars qw($VERSION); +$VERSION = '1.35'; ## Current version of this package +require 5.005; ## requires this Perl version or later +use Carp; + +BEGIN { + if ($] < 5.006) { + require Symbol; + import Symbol; + } +} + +############################################################################# + +=head1 NAME + +Pod::Find - find POD documents in directory trees + +=head1 SYNOPSIS + + use Pod::Find qw(pod_find simplify_name); + my %pods = pod_find({ -verbose => 1, -inc => 1 }); + foreach(keys %pods) { + print "found library POD `$pods{$_}' in $_\n"; + } + + print "podname=",simplify_name('a/b/c/mymodule.pod'),"\n"; + + $location = pod_where( { -inc => 1 }, "Pod::Find" ); + +=head1 DESCRIPTION + +B<Pod::Find> provides a set of functions to locate POD files. Note that +no function is exported by default to avoid pollution of your namespace, +so be sure to specify them in the B<use> statement if you need them: + + use Pod::Find qw(pod_find); + +From this version on the typical SCM (software configuration management) +files/directories like RCS, CVS, SCCS, .svn are ignored. + +=cut + +#use diagnostics; +use Exporter; +use File::Spec; +use File::Find; +use Cwd; + +use vars qw(@ISA @EXPORT_OK $VERSION); +@ISA = qw(Exporter); +@EXPORT_OK = qw(&pod_find &simplify_name &pod_where &contains_pod); + +# package global variables +my $SIMPLIFY_RX; + +=head2 C<pod_find( { %opts } , @directories )> + +The function B<pod_find> searches for POD documents in a given set of +files and/or directories. It returns a hash with the file names as keys +and the POD name as value. The POD name is derived from the file name +and its position in the directory tree. + +E.g. when searching in F<$HOME/perl5lib>, the file +F<$HOME/perl5lib/MyModule.pm> would get the POD name I<MyModule>, +whereas F<$HOME/perl5lib/Myclass/Subclass.pm> would be +I<Myclass::Subclass>. The name information can be used for POD +translators. + +Only text files containing at least one valid POD command are found. + +A warning is printed if more than one POD file with the same POD name +is found, e.g. F<CPAN.pm> in different directories. This usually +indicates duplicate occurrences of modules in the I<@INC> search path. + +B<OPTIONS> The first argument for B<pod_find> may be a hash reference +with options. The rest are either directories that are searched +recursively or files. The POD names of files are the plain basenames +with any Perl-like extension (.pm, .pl, .pod) stripped. + +=over 4 + +=item C<-verbose =E<gt> 1> + +Print progress information while scanning. + +=item C<-perl =E<gt> 1> + +Apply Perl-specific heuristics to find the correct PODs. This includes +stripping Perl-like extensions, omitting subdirectories that are numeric +but do I<not> match the current Perl interpreter's version id, suppressing +F<site_perl> as a module hierarchy name etc. + +=item C<-script =E<gt> 1> + +Search for PODs in the current Perl interpreter's installation +B<scriptdir>. This is taken from the local L<Config|Config> module. + +=item C<-inc =E<gt> 1> + +Search for PODs in the current Perl interpreter's I<@INC> paths. This +automatically considers paths specified in the C<PERL5LIB> environment +as this is included in I<@INC> by the Perl interpreter itself. + +=back + +=cut + +# return a hash of the POD files found +# first argument may be a hashref (options), +# rest is a list of directories to search recursively +sub pod_find +{ + my %opts; + if(ref $_[0]) { + %opts = %{shift()}; + } + + $opts{-verbose} ||= 0; + $opts{-perl} ||= 0; + + my (@search) = @_; + + if($opts{-script}) { + require Config; + push(@search, $Config::Config{scriptdir}) + if -d $Config::Config{scriptdir}; + $opts{-perl} = 1; + } + + if($opts{-inc}) { + if ($^O eq 'MacOS') { + # tolerate '.', './some_dir' and '(../)+some_dir' on Mac OS + my @new_INC = @INC; + for (@new_INC) { + if ( $_ eq '.' ) { + $_ = ':'; + } elsif ( $_ =~ s{^((?:\.\./)+)}{':' x (length($1)/3)}e ) { + $_ = ':'. $_; + } else { + $_ =~ s{^\./}{:}; + } + } + push(@search, grep($_ ne File::Spec->curdir, @new_INC)); + } else { + push(@search, grep($_ ne File::Spec->curdir, @INC)); + } + + $opts{-perl} = 1; + } + + if($opts{-perl}) { + require Config; + # this code simplifies the POD name for Perl modules: + # * remove "site_perl" + # * remove e.g. "i586-linux" (from 'archname') + # * remove e.g. 5.00503 + # * remove pod/ if followed by *.pod (e.g. in pod/perlfunc.pod) + + # Mac OS: + # * remove ":?site_perl:" + # * remove :?pod: if followed by *.pod (e.g. in :pod:perlfunc.pod) + + if ($^O eq 'MacOS') { + $SIMPLIFY_RX = + qq!^(?i:\:?site_perl\:|\:?pod\:(?=.*?\\.pod\\z))*!; + } else { + $SIMPLIFY_RX = + qq!^(?i:site(_perl)?/|\Q$Config::Config{archname}\E/|\\d+\\.\\d+([_.]?\\d+)?/|pod/(?=.*?\\.pod\\z))*!; + } + } + + my %dirs_visited; + my %pods; + my %names; + my $pwd = cwd(); + + foreach my $try (@search) { + unless(File::Spec->file_name_is_absolute($try)) { + # make path absolute + $try = File::Spec->catfile($pwd,$try); + } + # simplify path + # on VMS canonpath will vmsify:[the.path], but File::Find::find + # wants /unixy/paths + $try = File::Spec->canonpath($try) if ($^O ne 'VMS'); + $try = VMS::Filespec::unixify($try) if ($^O eq 'VMS'); + my $name; + if(-f $try) { + if($name = _check_and_extract_name($try, $opts{-verbose})) { + _check_for_duplicates($try, $name, \%names, \%pods); + } + next; + } + my $root_rx = $^O eq 'MacOS' ? qq!^\Q$try\E! : qq!^\Q$try\E/!; + File::Find::find( sub { + my $item = $File::Find::name; + if(-d) { + if($item =~ m{/(?:RCS|CVS|SCCS|\.svn)$}) { + $File::Find::prune = 1; + return; + } + elsif($dirs_visited{$item}) { + warn "Directory '$item' already seen, skipping.\n" + if($opts{-verbose}); + $File::Find::prune = 1; + return; + } + else { + $dirs_visited{$item} = 1; + } + if($opts{-perl} && /^(\d+\.[\d_]+)\z/s && eval "$1" != $]) { + $File::Find::prune = 1; + warn "Perl $] version mismatch on $_, skipping.\n" + if($opts{-verbose}); + } + return; + } + if($name = _check_and_extract_name($item, $opts{-verbose}, $root_rx)) { + _check_for_duplicates($item, $name, \%names, \%pods); + } + }, $try); # end of File::Find::find + } + chdir $pwd; + return %pods; +} + +sub _check_for_duplicates { + my ($file, $name, $names_ref, $pods_ref) = @_; + if($$names_ref{$name}) { + warn "Duplicate POD found (shadowing?): $name ($file)\n"; + warn ' Already seen in ', + join(' ', grep($$pods_ref{$_} eq $name, keys %$pods_ref)),"\n"; + } + else { + $$names_ref{$name} = 1; + } + return $$pods_ref{$file} = $name; +} + +sub _check_and_extract_name { + my ($file, $verbose, $root_rx) = @_; + + # check extension or executable flag + # this involves testing the .bat extension on Win32! + unless(-f $file && -T $file && ($file =~ /\.(pod|pm|plx?)\z/i || -x $file )) { + return; + } + + return unless contains_pod($file,$verbose); + + # strip non-significant path components + # TODO what happens on e.g. Win32? + my $name = $file; + if(defined $root_rx) { + $name =~ s/$root_rx//s; + $name =~ s/$SIMPLIFY_RX//s if(defined $SIMPLIFY_RX); + } + else { + if ($^O eq 'MacOS') { + $name =~ s/^.*://s; + } else { + $name =~ s{^.*/}{}s; + } + } + _simplify($name); + $name =~ s{/+}{::}g; + if ($^O eq 'MacOS') { + $name =~ s{:+}{::}g; # : -> :: + } else { + $name =~ s{/+}{::}g; # / -> :: + } + return $name; +} + +=head2 C<simplify_name( $str )> + +The function B<simplify_name> is equivalent to B<basename>, but also +strips Perl-like extensions (.pm, .pl, .pod) and extensions like +F<.bat>, F<.cmd> on Win32 and OS/2, or F<.com> on VMS, respectively. + +=cut + +# basic simplification of the POD name: +# basename & strip extension +sub simplify_name { + my ($str) = @_; + # remove all path components + if ($^O eq 'MacOS') { + $str =~ s/^.*://s; + } else { + $str =~ s{^.*/}{}s; + } + _simplify($str); + return $str; +} + +# internal sub only +sub _simplify { + # strip Perl's own extensions + $_[0] =~ s/\.(pod|pm|plx?)\z//i; + # strip meaningless extensions on Win32 and OS/2 + $_[0] =~ s/\.(bat|exe|cmd)\z//i if($^O =~ /mswin|os2/i); + # strip meaningless extensions on VMS + $_[0] =~ s/\.(com)\z//i if($^O eq 'VMS'); +} + +# contribution from Tim Jenness <t.jenness@jach.hawaii.edu> + +=head2 C<pod_where( { %opts }, $pod )> + +Returns the location of a pod document given a search directory +and a module (e.g. C<File::Find>) or script (e.g. C<perldoc>) name. + +Options: + +=over 4 + +=item C<-inc =E<gt> 1> + +Search @INC for the pod and also the C<scriptdir> defined in the +L<Config|Config> module. + +=item C<-dirs =E<gt> [ $dir1, $dir2, ... ]> + +Reference to an array of search directories. These are searched in order +before looking in C<@INC> (if B<-inc>). Current directory is used if +none are specified. + +=item C<-verbose =E<gt> 1> + +List directories as they are searched + +=back + +Returns the full path of the first occurrence to the file. +Package names (eg 'A::B') are automatically converted to directory +names in the selected directory. (eg on unix 'A::B' is converted to +'A/B'). Additionally, '.pm', '.pl' and '.pod' are appended to the +search automatically if required. + +A subdirectory F<pod/> is also checked if it exists in any of the given +search directories. This ensures that e.g. L<perlfunc|perlfunc> is +found. + +It is assumed that if a module name is supplied, that that name +matches the file name. Pods are not opened to check for the 'NAME' +entry. + +A check is made to make sure that the file that is found does +contain some pod documentation. + +=cut + +sub pod_where { + + # default options + my %options = ( + '-inc' => 0, + '-verbose' => 0, + '-dirs' => [ File::Spec->curdir ], + ); + + # Check for an options hash as first argument + if (defined $_[0] && ref($_[0]) eq 'HASH') { + my $opt = shift; + + # Merge default options with supplied options + %options = (%options, %$opt); + } + + # Check usage + carp 'Usage: pod_where({options}, $pod)' unless (scalar(@_)); + + # Read argument + my $pod = shift; + + # Split on :: and then join the name together using File::Spec + my @parts = split (/::/, $pod); + + # Get full directory list + my @search_dirs = @{ $options{'-dirs'} }; + + if ($options{'-inc'}) { + + require Config; + + # Add @INC + if ($^O eq 'MacOS' && $options{'-inc'}) { + # tolerate '.', './some_dir' and '(../)+some_dir' on Mac OS + my @new_INC = @INC; + for (@new_INC) { + if ( $_ eq '.' ) { + $_ = ':'; + } elsif ( $_ =~ s{^((?:\.\./)+)}{':' x (length($1)/3)}e ) { + $_ = ':'. $_; + } else { + $_ =~ s{^\./}{:}; + } + } + push (@search_dirs, @new_INC); + } elsif ($options{'-inc'}) { + push (@search_dirs, @INC); + } + + # Add location of pod documentation for perl man pages (eg perlfunc) + # This is a pod directory in the private install tree + #my $perlpoddir = File::Spec->catdir($Config::Config{'installprivlib'}, + # 'pod'); + #push (@search_dirs, $perlpoddir) + # if -d $perlpoddir; + + # Add location of binaries such as pod2text + push (@search_dirs, $Config::Config{'scriptdir'}) + if -d $Config::Config{'scriptdir'}; + } + + warn 'Search path is: '.join(' ', @search_dirs)."\n" + if $options{'-verbose'}; + + # Loop over directories + Dir: foreach my $dir ( @search_dirs ) { + + # Don't bother if can't find the directory + if (-d $dir) { + warn "Looking in directory $dir\n" + if $options{'-verbose'}; + + # Now concatenate this directory with the pod we are searching for + my $fullname = File::Spec->catfile($dir, @parts); + warn "Filename is now $fullname\n" + if $options{'-verbose'}; + + # Loop over possible extensions + foreach my $ext ('', '.pod', '.pm', '.pl') { + my $fullext = $fullname . $ext; + if (-f $fullext && + contains_pod($fullext, $options{'-verbose'}) ) { + warn "FOUND: $fullext\n" if $options{'-verbose'}; + return $fullext; + } + } + } else { + warn "Directory $dir does not exist\n" + if $options{'-verbose'}; + next Dir; + } + # for some strange reason the path on MacOS/darwin/cygwin is + # 'pods' not 'pod' + # this could be the case also for other systems that + # have a case-tolerant file system, but File::Spec + # does not recognize 'darwin' yet. And cygwin also has "pods", + # but is not case tolerant. Oh well... + if((File::Spec->case_tolerant || $^O =~ /macos|darwin|cygwin/i) + && -d File::Spec->catdir($dir,'pods')) { + $dir = File::Spec->catdir($dir,'pods'); + redo Dir; + } + if(-d File::Spec->catdir($dir,'pod')) { + $dir = File::Spec->catdir($dir,'pod'); + redo Dir; + } + } + # No match; + return; +} + +=head2 C<contains_pod( $file , $verbose )> + +Returns true if the supplied filename (not POD module) contains some pod +information. + +=cut + +sub contains_pod { + my $file = shift; + my $verbose = 0; + $verbose = shift if @_; + + # check for one line of POD + my $podfh; + if ($] < 5.006) { + $podfh = gensym(); + } + + unless(open($podfh,"<$file")) { + warn "Error: $file is unreadable: $!\n"; + return; + } + + local $/ = undef; + my $pod = <$podfh>; + close($podfh) || die "Error closing $file: $!\n"; + unless($pod =~ /^=(head\d|pod|over|item|cut)\b/m) { + warn "No POD in $file, skipping.\n" + if($verbose); + return 0; + } + + return 1; +} + +=head1 AUTHOR + +Please report bugs using L<http://rt.cpan.org>. + +Marek Rouchal E<lt>marekr@cpan.orgE<gt>, +heavily borrowing code from Nick Ing-Simmons' PodToHtml. + +Tim Jenness E<lt>t.jenness@jach.hawaii.eduE<gt> provided +C<pod_where> and C<contains_pod>. + +=head1 SEE ALSO + +L<Pod::Parser>, L<Pod::Checker>, L<perldoc> + +=cut + +1; + diff --git a/ext/Pod-Parser/lib/Pod/InputObjects.pm b/ext/Pod-Parser/lib/Pod/InputObjects.pm new file mode 100644 index 0000000000..13a86188bc --- /dev/null +++ b/ext/Pod-Parser/lib/Pod/InputObjects.pm @@ -0,0 +1,936 @@ +############################################################################# +# Pod/InputObjects.pm -- package which defines objects for input streams +# and paragraphs and commands when parsing POD docs. +# +# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::InputObjects; +use strict; + +use vars qw($VERSION); +$VERSION = '1.31'; ## Current version of this package +require 5.005; ## requires this Perl version or later + +############################################################################# + +=head1 NAME + +Pod::InputObjects - objects representing POD input paragraphs, commands, etc. + +=head1 SYNOPSIS + + use Pod::InputObjects; + +=head1 REQUIRES + +perl5.004, Carp + +=head1 EXPORTS + +Nothing. + +=head1 DESCRIPTION + +This module defines some basic input objects used by B<Pod::Parser> when +reading and parsing POD text from an input source. The following objects +are defined: + +=over 4 + +=begin __PRIVATE__ + +=item package B<Pod::InputSource> + +An object corresponding to a source of POD input text. It is mostly a +wrapper around a filehandle or C<IO::Handle>-type object (or anything +that implements the C<getline()> method) which keeps track of some +additional information relevant to the parsing of PODs. + +=end __PRIVATE__ + +=item package B<Pod::Paragraph> + +An object corresponding to a paragraph of POD input text. It may be a +plain paragraph, a verbatim paragraph, or a command paragraph (see +L<perlpod>). + +=item package B<Pod::InteriorSequence> + +An object corresponding to an interior sequence command from the POD +input text (see L<perlpod>). + +=item package B<Pod::ParseTree> + +An object corresponding to a tree of parsed POD text. Each "node" in +a parse-tree (or I<ptree>) is either a text-string or a reference to +a B<Pod::InteriorSequence> object. The nodes appear in the parse-tree +in the order in which they were parsed from left-to-right. + +=back + +Each of these input objects are described in further detail in the +sections which follow. + +=cut + +############################################################################# + +package Pod::InputSource; + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head1 B<Pod::InputSource> + +This object corresponds to an input source or stream of POD +documentation. When parsing PODs, it is necessary to associate and store +certain context information with each input source. All of this +information is kept together with the stream itself in one of these +C<Pod::InputSource> objects. Each such object is merely a wrapper around +an C<IO::Handle> object of some kind (or at least something that +implements the C<getline()> method). They have the following +methods/attributes: + +=end __PRIVATE__ + +=cut + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head2 B<new()> + + my $pod_input1 = Pod::InputSource->new(-handle => $filehandle); + my $pod_input2 = new Pod::InputSource(-handle => $filehandle, + -name => $name); + my $pod_input3 = new Pod::InputSource(-handle => \*STDIN); + my $pod_input4 = Pod::InputSource->new(-handle => \*STDIN, + -name => "(STDIN)"); + +This is a class method that constructs a C<Pod::InputSource> object and +returns a reference to the new input source object. It takes one or more +keyword arguments in the form of a hash. The keyword C<-handle> is +required and designates the corresponding input handle. The keyword +C<-name> is optional and specifies the name associated with the input +handle (typically a file name). + +=end __PRIVATE__ + +=cut + +sub new { + ## Determine if we were called via an object-ref or a classname + my $this = shift; + my $class = ref($this) || $this; + + ## Any remaining arguments are treated as initial values for the + ## hash that is used to represent this object. Note that we default + ## certain values by specifying them *before* the arguments passed. + ## If they are in the argument list, they will override the defaults. + my $self = { -name => '(unknown)', + -handle => undef, + -was_cutting => 0, + @_ }; + + ## Bless ourselves into the desired class and perform any initialization + bless $self, $class; + return $self; +} + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head2 B<name()> + + my $filename = $pod_input->name(); + $pod_input->name($new_filename_to_use); + +This method gets/sets the name of the input source (usually a filename). +If no argument is given, it returns a string containing the name of +the input source; otherwise it sets the name of the input source to the +contents of the given argument. + +=end __PRIVATE__ + +=cut + +sub name { + (@_ > 1) and $_[0]->{'-name'} = $_[1]; + return $_[0]->{'-name'}; +} + +## allow 'filename' as an alias for 'name' +*filename = \&name; + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head2 B<handle()> + + my $handle = $pod_input->handle(); + +Returns a reference to the handle object from which input is read (the +one used to contructed this input source object). + +=end __PRIVATE__ + +=cut + +sub handle { + return $_[0]->{'-handle'}; +} + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head2 B<was_cutting()> + + print "Yes.\n" if ($pod_input->was_cutting()); + +The value of the C<cutting> state (that the B<cutting()> method would +have returned) immediately before any input was read from this input +stream. After all input from this stream has been read, the C<cutting> +state is restored to this value. + +=end __PRIVATE__ + +=cut + +sub was_cutting { + (@_ > 1) and $_[0]->{-was_cutting} = $_[1]; + return $_[0]->{-was_cutting}; +} + +##--------------------------------------------------------------------------- + +############################################################################# + +package Pod::Paragraph; + +##--------------------------------------------------------------------------- + +=head1 B<Pod::Paragraph> + +An object representing a paragraph of POD input text. +It has the following methods/attributes: + +=cut + +##--------------------------------------------------------------------------- + +=head2 Pod::Paragraph-E<gt>B<new()> + + my $pod_para1 = Pod::Paragraph->new(-text => $text); + my $pod_para2 = Pod::Paragraph->new(-name => $cmd, + -text => $text); + my $pod_para3 = new Pod::Paragraph(-text => $text); + my $pod_para4 = new Pod::Paragraph(-name => $cmd, + -text => $text); + my $pod_para5 = Pod::Paragraph->new(-name => $cmd, + -text => $text, + -file => $filename, + -line => $line_number); + +This is a class method that constructs a C<Pod::Paragraph> object and +returns a reference to the new paragraph object. It may be given one or +two keyword arguments. The C<-text> keyword indicates the corresponding +text of the POD paragraph. The C<-name> keyword indicates the name of +the corresponding POD command, such as C<head1> or C<item> (it should +I<not> contain the C<=> prefix); this is needed only if the POD +paragraph corresponds to a command paragraph. The C<-file> and C<-line> +keywords indicate the filename and line number corresponding to the +beginning of the paragraph + +=cut + +sub new { + ## Determine if we were called via an object-ref or a classname + my $this = shift; + my $class = ref($this) || $this; + + ## Any remaining arguments are treated as initial values for the + ## hash that is used to represent this object. Note that we default + ## certain values by specifying them *before* the arguments passed. + ## If they are in the argument list, they will override the defaults. + my $self = { + -name => undef, + -text => (@_ == 1) ? shift : undef, + -file => '<unknown-file>', + -line => 0, + -prefix => '=', + -separator => ' ', + -ptree => [], + @_ + }; + + ## Bless ourselves into the desired class and perform any initialization + bless $self, $class; + return $self; +} + +##--------------------------------------------------------------------------- + +=head2 $pod_para-E<gt>B<cmd_name()> + + my $para_cmd = $pod_para->cmd_name(); + +If this paragraph is a command paragraph, then this method will return +the name of the command (I<without> any leading C<=> prefix). + +=cut + +sub cmd_name { + (@_ > 1) and $_[0]->{'-name'} = $_[1]; + return $_[0]->{'-name'}; +} + +## let name() be an alias for cmd_name() +*name = \&cmd_name; + +##--------------------------------------------------------------------------- + +=head2 $pod_para-E<gt>B<text()> + + my $para_text = $pod_para->text(); + +This method will return the corresponding text of the paragraph. + +=cut + +sub text { + (@_ > 1) and $_[0]->{'-text'} = $_[1]; + return $_[0]->{'-text'}; +} + +##--------------------------------------------------------------------------- + +=head2 $pod_para-E<gt>B<raw_text()> + + my $raw_pod_para = $pod_para->raw_text(); + +This method will return the I<raw> text of the POD paragraph, exactly +as it appeared in the input. + +=cut + +sub raw_text { + return $_[0]->{'-text'} unless (defined $_[0]->{'-name'}); + return $_[0]->{'-prefix'} . $_[0]->{'-name'} . + $_[0]->{'-separator'} . $_[0]->{'-text'}; +} + +##--------------------------------------------------------------------------- + +=head2 $pod_para-E<gt>B<cmd_prefix()> + + my $prefix = $pod_para->cmd_prefix(); + +If this paragraph is a command paragraph, then this method will return +the prefix used to denote the command (which should be the string "=" +or "=="). + +=cut + +sub cmd_prefix { + return $_[0]->{'-prefix'}; +} + +##--------------------------------------------------------------------------- + +=head2 $pod_para-E<gt>B<cmd_separator()> + + my $separator = $pod_para->cmd_separator(); + +If this paragraph is a command paragraph, then this method will return +the text used to separate the command name from the rest of the +paragraph (if any). + +=cut + +sub cmd_separator { + return $_[0]->{'-separator'}; +} + +##--------------------------------------------------------------------------- + +=head2 $pod_para-E<gt>B<parse_tree()> + + my $ptree = $pod_parser->parse_text( $pod_para->text() ); + $pod_para->parse_tree( $ptree ); + $ptree = $pod_para->parse_tree(); + +This method will get/set the corresponding parse-tree of the paragraph's text. + +=cut + +sub parse_tree { + (@_ > 1) and $_[0]->{'-ptree'} = $_[1]; + return $_[0]->{'-ptree'}; +} + +## let ptree() be an alias for parse_tree() +*ptree = \&parse_tree; + +##--------------------------------------------------------------------------- + +=head2 $pod_para-E<gt>B<file_line()> + + my ($filename, $line_number) = $pod_para->file_line(); + my $position = $pod_para->file_line(); + +Returns the current filename and line number for the paragraph +object. If called in a list context, it returns a list of two +elements: first the filename, then the line number. If called in +a scalar context, it returns a string containing the filename, followed +by a colon (':'), followed by the line number. + +=cut + +sub file_line { + my @loc = ($_[0]->{'-file'} || '<unknown-file>', + $_[0]->{'-line'} || 0); + return (wantarray) ? @loc : join(':', @loc); +} + +##--------------------------------------------------------------------------- + +############################################################################# + +package Pod::InteriorSequence; + +##--------------------------------------------------------------------------- + +=head1 B<Pod::InteriorSequence> + +An object representing a POD interior sequence command. +It has the following methods/attributes: + +=cut + +##--------------------------------------------------------------------------- + +=head2 Pod::InteriorSequence-E<gt>B<new()> + + my $pod_seq1 = Pod::InteriorSequence->new(-name => $cmd + -ldelim => $delimiter); + my $pod_seq2 = new Pod::InteriorSequence(-name => $cmd, + -ldelim => $delimiter); + my $pod_seq3 = new Pod::InteriorSequence(-name => $cmd, + -ldelim => $delimiter, + -file => $filename, + -line => $line_number); + + my $pod_seq4 = new Pod::InteriorSequence(-name => $cmd, $ptree); + my $pod_seq5 = new Pod::InteriorSequence($cmd, $ptree); + +This is a class method that constructs a C<Pod::InteriorSequence> object +and returns a reference to the new interior sequence object. It should +be given two keyword arguments. The C<-ldelim> keyword indicates the +corresponding left-delimiter of the interior sequence (e.g. 'E<lt>'). +The C<-name> keyword indicates the name of the corresponding interior +sequence command, such as C<I> or C<B> or C<C>. The C<-file> and +C<-line> keywords indicate the filename and line number corresponding +to the beginning of the interior sequence. If the C<$ptree> argument is +given, it must be the last argument, and it must be either string, or +else an array-ref suitable for passing to B<Pod::ParseTree::new> (or +it may be a reference to a Pod::ParseTree object). + +=cut + +sub new { + ## Determine if we were called via an object-ref or a classname + my $this = shift; + my $class = ref($this) || $this; + + ## See if first argument has no keyword + if (((@_ <= 2) or (@_ % 2)) and $_[0] !~ /^-\w/) { + ## Yup - need an implicit '-name' before first parameter + unshift @_, '-name'; + } + + ## See if odd number of args + if ((@_ % 2) != 0) { + ## Yup - need an implicit '-ptree' before the last parameter + splice @_, $#_, 0, '-ptree'; + } + + ## Any remaining arguments are treated as initial values for the + ## hash that is used to represent this object. Note that we default + ## certain values by specifying them *before* the arguments passed. + ## If they are in the argument list, they will override the defaults. + my $self = { + -name => (@_ == 1) ? $_[0] : undef, + -file => '<unknown-file>', + -line => 0, + -ldelim => '<', + -rdelim => '>', + @_ + }; + + ## Initialize contents if they havent been already + my $ptree = $self->{'-ptree'} || new Pod::ParseTree(); + if ( ref $ptree =~ /^(ARRAY)?$/ ) { + ## We have an array-ref, or a normal scalar. Pass it as an + ## an argument to the ptree-constructor + $ptree = new Pod::ParseTree($1 ? [$ptree] : $ptree); + } + $self->{'-ptree'} = $ptree; + + ## Bless ourselves into the desired class and perform any initialization + bless $self, $class; + return $self; +} + +##--------------------------------------------------------------------------- + +=head2 $pod_seq-E<gt>B<cmd_name()> + + my $seq_cmd = $pod_seq->cmd_name(); + +The name of the interior sequence command. + +=cut + +sub cmd_name { + (@_ > 1) and $_[0]->{'-name'} = $_[1]; + return $_[0]->{'-name'}; +} + +## let name() be an alias for cmd_name() +*name = \&cmd_name; + +##--------------------------------------------------------------------------- + +## Private subroutine to set the parent pointer of all the given +## children that are interior-sequences to be $self + +sub _set_child2parent_links { + my ($self, @children) = @_; + ## Make sure any sequences know who their parent is + for (@children) { + next unless (length and ref and ref ne 'SCALAR'); + if (UNIVERSAL::isa($_, 'Pod::InteriorSequence') or + UNIVERSAL::can($_, 'nested')) + { + $_->nested($self); + } + } +} + +## Private subroutine to unset child->parent links + +sub _unset_child2parent_links { + my $self = shift; + $self->{'-parent_sequence'} = undef; + my $ptree = $self->{'-ptree'}; + for (@$ptree) { + next unless (length and ref and ref ne 'SCALAR'); + $_->_unset_child2parent_links() + if UNIVERSAL::isa($_, 'Pod::InteriorSequence'); + } +} + +##--------------------------------------------------------------------------- + +=head2 $pod_seq-E<gt>B<prepend()> + + $pod_seq->prepend($text); + $pod_seq1->prepend($pod_seq2); + +Prepends the given string or parse-tree or sequence object to the parse-tree +of this interior sequence. + +=cut + +sub prepend { + my $self = shift; + $self->{'-ptree'}->prepend(@_); + _set_child2parent_links($self, @_); + return $self; +} + +##--------------------------------------------------------------------------- + +=head2 $pod_seq-E<gt>B<append()> + + $pod_seq->append($text); + $pod_seq1->append($pod_seq2); + +Appends the given string or parse-tree or sequence object to the parse-tree +of this interior sequence. + +=cut + +sub append { + my $self = shift; + $self->{'-ptree'}->append(@_); + _set_child2parent_links($self, @_); + return $self; +} + +##--------------------------------------------------------------------------- + +=head2 $pod_seq-E<gt>B<nested()> + + $outer_seq = $pod_seq->nested || print "not nested"; + +If this interior sequence is nested inside of another interior +sequence, then the outer/parent sequence that contains it is +returned. Otherwise C<undef> is returned. + +=cut + +sub nested { + my $self = shift; + (@_ == 1) and $self->{'-parent_sequence'} = shift; + return $self->{'-parent_sequence'} || undef; +} + +##--------------------------------------------------------------------------- + +=head2 $pod_seq-E<gt>B<raw_text()> + + my $seq_raw_text = $pod_seq->raw_text(); + +This method will return the I<raw> text of the POD interior sequence, +exactly as it appeared in the input. + +=cut + +sub raw_text { + my $self = shift; + my $text = $self->{'-name'} . $self->{'-ldelim'}; + for ( $self->{'-ptree'}->children ) { + $text .= (ref $_) ? $_->raw_text : $_; + } + $text .= $self->{'-rdelim'}; + return $text; +} + +##--------------------------------------------------------------------------- + +=head2 $pod_seq-E<gt>B<left_delimiter()> + + my $ldelim = $pod_seq->left_delimiter(); + +The leftmost delimiter beginning the argument text to the interior +sequence (should be "<"). + +=cut + +sub left_delimiter { + (@_ > 1) and $_[0]->{'-ldelim'} = $_[1]; + return $_[0]->{'-ldelim'}; +} + +## let ldelim() be an alias for left_delimiter() +*ldelim = \&left_delimiter; + +##--------------------------------------------------------------------------- + +=head2 $pod_seq-E<gt>B<right_delimiter()> + +The rightmost delimiter beginning the argument text to the interior +sequence (should be ">"). + +=cut + +sub right_delimiter { + (@_ > 1) and $_[0]->{'-rdelim'} = $_[1]; + return $_[0]->{'-rdelim'}; +} + +## let rdelim() be an alias for right_delimiter() +*rdelim = \&right_delimiter; + +##--------------------------------------------------------------------------- + +=head2 $pod_seq-E<gt>B<parse_tree()> + + my $ptree = $pod_parser->parse_text($paragraph_text); + $pod_seq->parse_tree( $ptree ); + $ptree = $pod_seq->parse_tree(); + +This method will get/set the corresponding parse-tree of the interior +sequence's text. + +=cut + +sub parse_tree { + (@_ > 1) and $_[0]->{'-ptree'} = $_[1]; + return $_[0]->{'-ptree'}; +} + +## let ptree() be an alias for parse_tree() +*ptree = \&parse_tree; + +##--------------------------------------------------------------------------- + +=head2 $pod_seq-E<gt>B<file_line()> + + my ($filename, $line_number) = $pod_seq->file_line(); + my $position = $pod_seq->file_line(); + +Returns the current filename and line number for the interior sequence +object. If called in a list context, it returns a list of two +elements: first the filename, then the line number. If called in +a scalar context, it returns a string containing the filename, followed +by a colon (':'), followed by the line number. + +=cut + +sub file_line { + my @loc = ($_[0]->{'-file'} || '<unknown-file>', + $_[0]->{'-line'} || 0); + return (wantarray) ? @loc : join(':', @loc); +} + +##--------------------------------------------------------------------------- + +=head2 Pod::InteriorSequence::B<DESTROY()> + +This method performs any necessary cleanup for the interior-sequence. +If you override this method then it is B<imperative> that you invoke +the parent method from within your own method, otherwise +I<interior-sequence storage will not be reclaimed upon destruction!> + +=cut + +sub DESTROY { + ## We need to get rid of all child->parent pointers throughout the + ## tree so their reference counts will go to zero and they can be + ## garbage-collected + _unset_child2parent_links(@_); +} + +##--------------------------------------------------------------------------- + +############################################################################# + +package Pod::ParseTree; + +##--------------------------------------------------------------------------- + +=head1 B<Pod::ParseTree> + +This object corresponds to a tree of parsed POD text. As POD text is +scanned from left to right, it is parsed into an ordered list of +text-strings and B<Pod::InteriorSequence> objects (in order of +appearance). A B<Pod::ParseTree> object corresponds to this list of +strings and sequences. Each interior sequence in the parse-tree may +itself contain a parse-tree (since interior sequences may be nested). + +=cut + +##--------------------------------------------------------------------------- + +=head2 Pod::ParseTree-E<gt>B<new()> + + my $ptree1 = Pod::ParseTree->new; + my $ptree2 = new Pod::ParseTree; + my $ptree4 = Pod::ParseTree->new($array_ref); + my $ptree3 = new Pod::ParseTree($array_ref); + +This is a class method that constructs a C<Pod::Parse_tree> object and +returns a reference to the new parse-tree. If a single-argument is given, +it must be a reference to an array, and is used to initialize the root +(top) of the parse tree. + +=cut + +sub new { + ## Determine if we were called via an object-ref or a classname + my $this = shift; + my $class = ref($this) || $this; + + my $self = (@_ == 1 and ref $_[0]) ? $_[0] : []; + + ## Bless ourselves into the desired class and perform any initialization + bless $self, $class; + return $self; +} + +##--------------------------------------------------------------------------- + +=head2 $ptree-E<gt>B<top()> + + my $top_node = $ptree->top(); + $ptree->top( $top_node ); + $ptree->top( @children ); + +This method gets/sets the top node of the parse-tree. If no arguments are +given, it returns the topmost node in the tree (the root), which is also +a B<Pod::ParseTree>. If it is given a single argument that is a reference, +then the reference is assumed to a parse-tree and becomes the new top node. +Otherwise, if arguments are given, they are treated as the new list of +children for the top node. + +=cut + +sub top { + my $self = shift; + if (@_ > 0) { + @{ $self } = (@_ == 1 and ref $_[0]) ? ${ @_ } : @_; + } + return $self; +} + +## let parse_tree() & ptree() be aliases for the 'top' method +*parse_tree = *ptree = \⊤ + +##--------------------------------------------------------------------------- + +=head2 $ptree-E<gt>B<children()> + +This method gets/sets the children of the top node in the parse-tree. +If no arguments are given, it returns the list (array) of children +(each of which should be either a string or a B<Pod::InteriorSequence>. +Otherwise, if arguments are given, they are treated as the new list of +children for the top node. + +=cut + +sub children { + my $self = shift; + if (@_ > 0) { + @{ $self } = (@_ == 1 and ref $_[0]) ? ${ @_ } : @_; + } + return @{ $self }; +} + +##--------------------------------------------------------------------------- + +=head2 $ptree-E<gt>B<prepend()> + +This method prepends the given text or parse-tree to the current parse-tree. +If the first item on the parse-tree is text and the argument is also text, +then the text is prepended to the first item (not added as a separate string). +Otherwise the argument is added as a new string or parse-tree I<before> +the current one. + +=cut + +use vars qw(@ptree); ## an alias used for performance reasons + +sub prepend { + my $self = shift; + local *ptree = $self; + for (@_) { + next unless length; + if (@ptree && !(ref $ptree[0]) && !(ref $_)) { + $ptree[0] = $_ . $ptree[0]; + } + else { + unshift @ptree, $_; + } + } +} + +##--------------------------------------------------------------------------- + +=head2 $ptree-E<gt>B<append()> + +This method appends the given text or parse-tree to the current parse-tree. +If the last item on the parse-tree is text and the argument is also text, +then the text is appended to the last item (not added as a separate string). +Otherwise the argument is added as a new string or parse-tree I<after> +the current one. + +=cut + +sub append { + my $self = shift; + local *ptree = $self; + my $can_append = @ptree && !(ref $ptree[-1]); + for (@_) { + if (ref) { + push @ptree, $_; + } + elsif(!length) { + next; + } + elsif ($can_append) { + $ptree[-1] .= $_; + } + else { + push @ptree, $_; + } + } +} + +=head2 $ptree-E<gt>B<raw_text()> + + my $ptree_raw_text = $ptree->raw_text(); + +This method will return the I<raw> text of the POD parse-tree +exactly as it appeared in the input. + +=cut + +sub raw_text { + my $self = shift; + my $text = ''; + for ( @$self ) { + $text .= (ref $_) ? $_->raw_text : $_; + } + return $text; +} + +##--------------------------------------------------------------------------- + +## Private routines to set/unset child->parent links + +sub _unset_child2parent_links { + my $self = shift; + local *ptree = $self; + for (@ptree) { + next unless (defined and length and ref and ref ne 'SCALAR'); + $_->_unset_child2parent_links() + if UNIVERSAL::isa($_, 'Pod::InteriorSequence'); + } +} + +sub _set_child2parent_links { + ## nothing to do, Pod::ParseTrees cant have parent pointers +} + +=head2 Pod::ParseTree::B<DESTROY()> + +This method performs any necessary cleanup for the parse-tree. +If you override this method then it is B<imperative> +that you invoke the parent method from within your own method, +otherwise I<parse-tree storage will not be reclaimed upon destruction!> + +=cut + +sub DESTROY { + ## We need to get rid of all child->parent pointers throughout the + ## tree so their reference counts will go to zero and they can be + ## garbage-collected + _unset_child2parent_links(@_); +} + +############################################################################# + +=head1 SEE ALSO + +See L<Pod::Parser>, L<Pod::Select> + +=head1 AUTHOR + +Please report bugs using L<http://rt.cpan.org>. + +Brad Appleton E<lt>bradapp@enteract.comE<gt> + +=cut + +1; diff --git a/ext/Pod-Parser/lib/Pod/ParseUtils.pm b/ext/Pod-Parser/lib/Pod/ParseUtils.pm new file mode 100644 index 0000000000..f3c7ef4677 --- /dev/null +++ b/ext/Pod-Parser/lib/Pod/ParseUtils.pm @@ -0,0 +1,855 @@ +############################################################################# +# Pod/ParseUtils.pm -- helpers for POD parsing and conversion +# +# Copyright (C) 1999-2000 by Marek Rouchal. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::ParseUtils; +use strict; + +use vars qw($VERSION); +$VERSION = '1.36'; ## Current version of this package +require 5.005; ## requires this Perl version or later + +=head1 NAME + +Pod::ParseUtils - helpers for POD parsing and conversion + +=head1 SYNOPSIS + + use Pod::ParseUtils; + + my $list = new Pod::List; + my $link = Pod::Hyperlink->new('Pod::Parser'); + +=head1 DESCRIPTION + +B<Pod::ParseUtils> contains a few object-oriented helper packages for +POD parsing and processing (i.e. in POD formatters and translators). + +=cut + +#----------------------------------------------------------------------------- +# Pod::List +# +# class to hold POD list info (=over, =item, =back) +#----------------------------------------------------------------------------- + +package Pod::List; + +use Carp; + +=head2 Pod::List + +B<Pod::List> can be used to hold information about POD lists +(written as =over ... =item ... =back) for further processing. +The following methods are available: + +=over 4 + +=item Pod::List-E<gt>new() + +Create a new list object. Properties may be specified through a hash +reference like this: + + my $list = Pod::List->new({ -start => $., -indent => 4 }); + +See the individual methods/properties for details. + +=cut + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my %params = @_; + my $self = {%params}; + bless $self, $class; + $self->initialize(); + return $self; +} + +sub initialize { + my $self = shift; + $self->{-file} ||= 'unknown'; + $self->{-start} ||= 'unknown'; + $self->{-indent} ||= 4; # perlpod: "should be the default" + $self->{_items} = []; + $self->{-type} ||= ''; +} + +=item $list-E<gt>file() + +Without argument, retrieves the file name the list is in. This must +have been set before by either specifying B<-file> in the B<new()> +method or by calling the B<file()> method with a scalar argument. + +=cut + +# The POD file name the list appears in +sub file { + return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; +} + +=item $list-E<gt>start() + +Without argument, retrieves the line number where the list started. +This must have been set before by either specifying B<-start> in the +B<new()> method or by calling the B<start()> method with a scalar +argument. + +=cut + +# The line in the file the node appears +sub start { + return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start}; +} + +=item $list-E<gt>indent() + +Without argument, retrieves the indent level of the list as specified +in C<=over n>. This must have been set before by either specifying +B<-indent> in the B<new()> method or by calling the B<indent()> method +with a scalar argument. + +=cut + +# indent level +sub indent { + return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent}; +} + +=item $list-E<gt>type() + +Without argument, retrieves the list type, which can be an arbitrary value, +e.g. C<OL>, C<UL>, ... when thinking the HTML way. +This must have been set before by either specifying +B<-type> in the B<new()> method or by calling the B<type()> method +with a scalar argument. + +=cut + +# The type of the list (UL, OL, ...) +sub type { + return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type}; +} + +=item $list-E<gt>rx() + +Without argument, retrieves a regular expression for simplifying the +individual item strings once the list type has been determined. Usage: +E.g. when converting to HTML, one might strip the leading number in +an ordered list as C<E<lt>OLE<gt>> already prints numbers itself. +This must have been set before by either specifying +B<-rx> in the B<new()> method or by calling the B<rx()> method +with a scalar argument. + +=cut + +# The regular expression to simplify the items +sub rx { + return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx}; +} + +=item $list-E<gt>item() + +Without argument, retrieves the array of the items in this list. +The items may be represented by any scalar. +If an argument has been given, it is pushed on the list of items. + +=cut + +# The individual =items of this list +sub item { + my ($self,$item) = @_; + if(defined $item) { + push(@{$self->{_items}}, $item); + return $item; + } + else { + return @{$self->{_items}}; + } +} + +=item $list-E<gt>parent() + +Without argument, retrieves information about the parent holding this +list, which is represented as an arbitrary scalar. +This must have been set before by either specifying +B<-parent> in the B<new()> method or by calling the B<parent()> method +with a scalar argument. + +=cut + +# possibility for parsers/translators to store information about the +# lists's parent object +sub parent { + return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent}; +} + +=item $list-E<gt>tag() + +Without argument, retrieves information about the list tag, which can be +any scalar. +This must have been set before by either specifying +B<-tag> in the B<new()> method or by calling the B<tag()> method +with a scalar argument. + +=back + +=cut + +# possibility for parsers/translators to store information about the +# list's object +sub tag { + return (@_ > 1) ? ($_[0]->{-tag} = $_[1]) : $_[0]->{-tag}; +} + +#----------------------------------------------------------------------------- +# Pod::Hyperlink +# +# class to manipulate POD hyperlinks (L<>) +#----------------------------------------------------------------------------- + +package Pod::Hyperlink; + +=head2 Pod::Hyperlink + +B<Pod::Hyperlink> is a class for manipulation of POD hyperlinks. Usage: + + my $link = Pod::Hyperlink->new('alternative text|page/"section in page"'); + +The B<Pod::Hyperlink> class is mainly designed to parse the contents of the +C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the +different parts of a POD hyperlink for further processing. It can also be +used to construct hyperlinks. + +=over 4 + +=item Pod::Hyperlink-E<gt>new() + +The B<new()> method can either be passed a set of key/value pairs or a single +scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object +of the class C<Pod::Hyperlink> is returned. The value C<undef> indicates a +failure, the error message is stored in C<$@>. + +=cut + +use Carp; + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my $self = +{}; + bless $self, $class; + $self->initialize(); + if(defined $_[0]) { + if(ref($_[0])) { + # called with a list of parameters + %$self = %{$_[0]}; + $self->_construct_text(); + } + else { + # called with L<> contents + return unless($self->parse($_[0])); + } + } + return $self; +} + +sub initialize { + my $self = shift; + $self->{-line} ||= 'undef'; + $self->{-file} ||= 'undef'; + $self->{-page} ||= ''; + $self->{-node} ||= ''; + $self->{-alttext} ||= ''; + $self->{-type} ||= 'undef'; + $self->{_warnings} = []; +} + +=item $link-E<gt>parse($string) + +This method can be used to (re)parse a (new) hyperlink, i.e. the contents +of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object. +Warnings are stored in the B<warnings> property. +E.g. sections like C<LE<lt>open(2)E<gt>> are deprecated, as they do not point +to Perl documents. C<LE<lt>DBI::foo(3p)E<gt>> is wrong as well, the manpage +section can simply be dropped. + +=cut + +sub parse { + my $self = shift; + local($_) = $_[0]; + # syntax check the link and extract destination + my ($alttext,$page,$node,$type,$quoted) = (undef,'','','',0); + + $self->{_warnings} = []; + + # collapse newlines with whitespace + s/\s*\n+\s*/ /g; + + # strip leading/trailing whitespace + if(s/^[\s\n]+//) { + $self->warning('ignoring leading whitespace in link'); + } + if(s/[\s\n]+$//) { + $self->warning('ignoring trailing whitespace in link'); + } + unless(length($_)) { + _invalid_link('empty link'); + return; + } + + ## Check for different possibilities. This is tedious and error-prone + # we match all possibilities (alttext, page, section/item) + #warn "DEBUG: link=$_\n"; + + # only page + # problem: a lot of people use (), or (1) or the like to indicate + # man page sections. But this collides with L<func()> that is supposed + # to point to an internal funtion... + my $page_rx = '[\w.-]+(?:::[\w.-]+)*(?:[(](?:\d\w*|)[)]|)'; + # page name only + if(/^($page_rx)$/o) { + $page = $1; + $type = 'page'; + } + # alttext, page and "section" + elsif(m{^(.*?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$}o) { + ($alttext, $page, $node) = ($1, $2, $3); + $type = 'section'; + $quoted = 1; #... therefore | and / are allowed + } + # alttext and page + elsif(/^(.*?)\s*[|]\s*($page_rx)$/o) { + ($alttext, $page) = ($1, $2); + $type = 'page'; + } + # alttext and "section" + elsif(m{^(.*?)\s*[|]\s*(?:/\s*|)"(.+)"$}) { + ($alttext, $node) = ($1,$2); + $type = 'section'; + $quoted = 1; + } + # page and "section" + elsif(m{^($page_rx)\s*/\s*"(.+)"$}o) { + ($page, $node) = ($1, $2); + $type = 'section'; + $quoted = 1; + } + # page and item + elsif(m{^($page_rx)\s*/\s*(.+)$}o) { + ($page, $node) = ($1, $2); + $type = 'item'; + } + # only "section" + elsif(m{^/?"(.+)"$}) { + $node = $1; + $type = 'section'; + $quoted = 1; + } + # only item + elsif(m{^\s*/(.+)$}) { + $node = $1; + $type = 'item'; + } + + # non-standard: Hyperlink with alt-text - doesn't remove protocol prefix, maybe it should? + elsif(/^ \s* (.*?) \s* [|] \s* (\w+:[^:\s] [^\s|]*?) \s* $/ix) { + ($alttext,$node) = ($1,$2); + $type = 'hyperlink'; + } + + # non-standard: Hyperlink + elsif(/^(\w+:[^:\s]\S*)$/i) { + $node = $1; + $type = 'hyperlink'; + } + # alttext, page and item + elsif(m{^(.*?)\s*[|]\s*($page_rx)\s*/\s*(.+)$}o) { + ($alttext, $page, $node) = ($1, $2, $3); + $type = 'item'; + } + # alttext and item + elsif(m{^(.*?)\s*[|]\s*/(.+)$}) { + ($alttext, $node) = ($1,$2); + } + # must be an item or a "malformed" section (without "") + else { + $node = $_; + $type = 'item'; + } + # collapse whitespace in nodes + $node =~ s/\s+/ /gs; + + # empty alternative text expands to node name + if(defined $alttext) { + if(!length($alttext)) { + $alttext = $node || $page; + } + } + else { + $alttext = ''; + } + + if($page =~ /[(]\w*[)]$/) { + $self->warning("(section) in '$page' deprecated"); + } + if(!$quoted && $node =~ m{[|/]} && $type ne 'hyperlink') { + $self->warning("node '$node' contains non-escaped | or /"); + } + if($alttext =~ m{[|/]}) { + $self->warning("alternative text '$node' contains non-escaped | or /"); + } + $self->{-page} = $page; + $self->{-node} = $node; + $self->{-alttext} = $alttext; + #warn "DEBUG: page=$page section=$section item=$item alttext=$alttext\n"; + $self->{-type} = $type; + $self->_construct_text(); + 1; +} + +sub _construct_text { + my $self = shift; + my $alttext = $self->alttext(); + my $type = $self->type(); + my $section = $self->node(); + my $page = $self->page(); + my $page_ext = ''; + $page =~ s/([(]\w*[)])$// && ($page_ext = $1); + if($alttext) { + $self->{_text} = $alttext; + } + elsif($type eq 'hyperlink') { + $self->{_text} = $section; + } + else { + $self->{_text} = ($section || '') . + (($page && $section) ? ' in ' : '') . + "$page$page_ext"; + } + # for being marked up later + # use the non-standard markers P<> and Q<>, so that the resulting + # text can be parsed by the translators. It's their job to put + # the correct hypertext around the linktext + if($alttext) { + $self->{_markup} = "Q<$alttext>"; + } + elsif($type eq 'hyperlink') { + $self->{_markup} = "Q<$section>"; + } + else { + $self->{_markup} = (!$section ? '' : "Q<$section>") . + ($page ? ($section ? ' in ':'') . "P<$page>$page_ext" : ''); + } +} + +=item $link-E<gt>markup($string) + +Set/retrieve the textual value of the link. This string contains special +markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the +translator's interior sequence expansion engine to the +formatter-specific code to highlight/activate the hyperlink. The details +have to be implemented in the translator. + +=cut + +#' retrieve/set markuped text +sub markup { + return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup}; +} + +=item $link-E<gt>text() + +This method returns the textual representation of the hyperlink as above, +but without markers (read only). Depending on the link type this is one of +the following alternatives (the + and * denote the portions of the text +that are marked up): + + +perl+ L<perl> + *$|* in +perlvar+ L<perlvar/$|> + *OPTIONS* in +perldoc+ L<perldoc/"OPTIONS"> + *DESCRIPTION* L<"DESCRIPTION"> + +=cut + +# The complete link's text +sub text { + return $_[0]->{_text}; +} + +=item $link-E<gt>warning() + +After parsing, this method returns any warnings encountered during the +parsing process. + +=cut + +# Set/retrieve warnings +sub warning { + my $self = shift; + if(@_) { + push(@{$self->{_warnings}}, @_); + return @_; + } + return @{$self->{_warnings}}; +} + +=item $link-E<gt>file() + +=item $link-E<gt>line() + +Just simple slots for storing information about the line and the file +the link was encountered in. Has to be filled in manually. + +=cut + +# The line in the file the link appears +sub line { + return (@_ > 1) ? ($_[0]->{-line} = $_[1]) : $_[0]->{-line}; +} + +# The POD file name the link appears in +sub file { + return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; +} + +=item $link-E<gt>page() + +This method sets or returns the POD page this link points to. + +=cut + +# The POD page the link appears on +sub page { + if (@_ > 1) { + $_[0]->{-page} = $_[1]; + $_[0]->_construct_text(); + } + return $_[0]->{-page}; +} + +=item $link-E<gt>node() + +As above, but the destination node text of the link. + +=cut + +# The link destination +sub node { + if (@_ > 1) { + $_[0]->{-node} = $_[1]; + $_[0]->_construct_text(); + } + return $_[0]->{-node}; +} + +=item $link-E<gt>alttext() + +Sets or returns an alternative text specified in the link. + +=cut + +# Potential alternative text +sub alttext { + if (@_ > 1) { + $_[0]->{-alttext} = $_[1]; + $_[0]->_construct_text(); + } + return $_[0]->{-alttext}; +} + +=item $link-E<gt>type() + +The node type, either C<section> or C<item>. As an unofficial type, +there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>> + +=cut + +# The type: item or headn +sub type { + return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type}; +} + +=item $link-E<gt>link() + +Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>. + +=back + +=cut + +# The link itself +sub link { + my $self = shift; + my $link = $self->page() || ''; + if($self->node()) { + my $node = $self->node(); + $node =~ s/\|/E<verbar>/g; + $node =~ s{/}{E<sol>}g; + if($self->type() eq 'section') { + $link .= ($link ? '/' : '') . '"' . $node . '"'; + } + elsif($self->type() eq 'hyperlink') { + $link = $self->node(); + } + else { # item + $link .= '/' . $node; + } + } + if($self->alttext()) { + my $text = $self->alttext(); + $text =~ s/\|/E<verbar>/g; + $text =~ s{/}{E<sol>}g; + $link = "$text|$link"; + } + return $link; +} + +sub _invalid_link { + my ($msg) = @_; + # this sets @_ + #eval { die "$msg\n" }; + #chomp $@; + $@ = $msg; # this seems to work, too! + return; +} + +#----------------------------------------------------------------------------- +# Pod::Cache +# +# class to hold POD page details +#----------------------------------------------------------------------------- + +package Pod::Cache; + +=head2 Pod::Cache + +B<Pod::Cache> holds information about a set of POD documents, +especially the nodes for hyperlinks. +The following methods are available: + +=over 4 + +=item Pod::Cache-E<gt>new() + +Create a new cache object. This object can hold an arbitrary number of +POD documents of class Pod::Cache::Item. + +=cut + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my $self = []; + bless $self, $class; + return $self; +} + +=item $cache-E<gt>item() + +Add a new item to the cache. Without arguments, this method returns a +list of all cache elements. + +=cut + +sub item { + my ($self,%param) = @_; + if(%param) { + my $item = Pod::Cache::Item->new(%param); + push(@$self, $item); + return $item; + } + else { + return @{$self}; + } +} + +=item $cache-E<gt>find_page($name) + +Look for a POD document named C<$name> in the cache. Returns the +reference to the corresponding Pod::Cache::Item object or undef if +not found. + +=back + +=cut + +sub find_page { + my ($self,$page) = @_; + foreach(@$self) { + if($_->page() eq $page) { + return $_; + } + } + return; +} + +package Pod::Cache::Item; + +=head2 Pod::Cache::Item + +B<Pod::Cache::Item> holds information about individual POD documents, +that can be grouped in a Pod::Cache object. +It is intended to hold information about the hyperlink nodes of POD +documents. +The following methods are available: + +=over 4 + +=item Pod::Cache::Item-E<gt>new() + +Create a new object. + +=cut + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my %params = @_; + my $self = {%params}; + bless $self, $class; + $self->initialize(); + return $self; +} + +sub initialize { + my $self = shift; + $self->{-nodes} = [] unless(defined $self->{-nodes}); +} + +=item $cacheitem-E<gt>page() + +Set/retrieve the POD document name (e.g. "Pod::Parser"). + +=cut + +# The POD page +sub page { + return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page}; +} + +=item $cacheitem-E<gt>description() + +Set/retrieve the POD short description as found in the C<=head1 NAME> +section. + +=cut + +# The POD description, taken out of NAME if present +sub description { + return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description}; +} + +=item $cacheitem-E<gt>path() + +Set/retrieve the POD file storage path. + +=cut + +# The file path +sub path { + return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path}; +} + +=item $cacheitem-E<gt>file() + +Set/retrieve the POD file name. + +=cut + +# The POD file name +sub file { + return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; +} + +=item $cacheitem-E<gt>nodes() + +Add a node (or a list of nodes) to the document's node list. Note that +the order is kept, i.e. start with the first node and end with the last. +If no argument is given, the current list of nodes is returned in the +same order the nodes have been added. +A node can be any scalar, but usually is a pair of node string and +unique id for the C<find_node> method to work correctly. + +=cut + +# The POD nodes +sub nodes { + my ($self,@nodes) = @_; + if(@nodes) { + push(@{$self->{-nodes}}, @nodes); + return @nodes; + } + else { + return @{$self->{-nodes}}; + } +} + +=item $cacheitem-E<gt>find_node($name) + +Look for a node or index entry named C<$name> in the object. +Returns the unique id of the node (i.e. the second element of the array +stored in the node array) or undef if not found. + +=cut + +sub find_node { + my ($self,$node) = @_; + my @search; + push(@search, @{$self->{-nodes}}) if($self->{-nodes}); + push(@search, @{$self->{-idx}}) if($self->{-idx}); + foreach(@search) { + if($_->[0] eq $node) { + return $_->[1]; # id + } + } + return; +} + +=item $cacheitem-E<gt>idx() + +Add an index entry (or a list of them) to the document's index list. Note that +the order is kept, i.e. start with the first node and end with the last. +If no argument is given, the current list of index entries is returned in the +same order the entries have been added. +An index entry can be any scalar, but usually is a pair of string and +unique id. + +=back + +=cut + +# The POD index entries +sub idx { + my ($self,@idx) = @_; + if(@idx) { + push(@{$self->{-idx}}, @idx); + return @idx; + } + else { + return @{$self->{-idx}}; + } +} + +=head1 AUTHOR + +Please report bugs using L<http://rt.cpan.org>. + +Marek Rouchal E<lt>marekr@cpan.orgE<gt>, borrowing +a lot of things from L<pod2man> and L<pod2roff> as well as other POD +processing tools by Tom Christiansen, Brad Appleton and Russ Allbery. + +=head1 SEE ALSO + +L<pod2man>, L<pod2roff>, L<Pod::Parser>, L<Pod::Checker>, +L<pod2html> + +=cut + +1; diff --git a/ext/Pod-Parser/lib/Pod/Parser.pm b/ext/Pod-Parser/lib/Pod/Parser.pm new file mode 100644 index 0000000000..c807f3fbad --- /dev/null +++ b/ext/Pod-Parser/lib/Pod/Parser.pm @@ -0,0 +1,1829 @@ +############################################################################# +# Pod/Parser.pm -- package which defines a base class for parsing POD docs. +# +# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::Parser; +use strict; + +## These "variables" are used as local "glob aliases" for performance +use vars qw($VERSION @ISA %myData %myOpts @input_stack); +$VERSION = '1.37'; ## Current version of this package +require 5.005; ## requires this Perl version or later + +############################################################################# + +=head1 NAME + +Pod::Parser - base class for creating POD filters and translators + +=head1 SYNOPSIS + + use Pod::Parser; + + package MyParser; + @ISA = qw(Pod::Parser); + + sub command { + my ($parser, $command, $paragraph, $line_num) = @_; + ## Interpret the command and its text; sample actions might be: + if ($command eq 'head1') { ... } + elsif ($command eq 'head2') { ... } + ## ... other commands and their actions + my $out_fh = $parser->output_handle(); + my $expansion = $parser->interpolate($paragraph, $line_num); + print $out_fh $expansion; + } + + sub verbatim { + my ($parser, $paragraph, $line_num) = @_; + ## Format verbatim paragraph; sample actions might be: + my $out_fh = $parser->output_handle(); + print $out_fh $paragraph; + } + + sub textblock { + my ($parser, $paragraph, $line_num) = @_; + ## Translate/Format this block of text; sample actions might be: + my $out_fh = $parser->output_handle(); + my $expansion = $parser->interpolate($paragraph, $line_num); + print $out_fh $expansion; + } + + sub interior_sequence { + my ($parser, $seq_command, $seq_argument) = @_; + ## Expand an interior sequence; sample actions might be: + return "*$seq_argument*" if ($seq_command eq 'B'); + return "`$seq_argument'" if ($seq_command eq 'C'); + return "_${seq_argument}_'" if ($seq_command eq 'I'); + ## ... other sequence commands and their resulting text + } + + package main; + + ## Create a parser object and have it parse file whose name was + ## given on the command-line (use STDIN if no files were given). + $parser = new MyParser(); + $parser->parse_from_filehandle(\*STDIN) if (@ARGV == 0); + for (@ARGV) { $parser->parse_from_file($_); } + +=head1 REQUIRES + +perl5.005, Pod::InputObjects, Exporter, Symbol, Carp + +=head1 EXPORTS + +Nothing. + +=head1 DESCRIPTION + +B<Pod::Parser> is a base class for creating POD filters and translators. +It handles most of the effort involved with parsing the POD sections +from an input stream, leaving subclasses free to be concerned only with +performing the actual translation of text. + +B<Pod::Parser> parses PODs, and makes method calls to handle the various +components of the POD. Subclasses of B<Pod::Parser> override these methods +to translate the POD into whatever output format they desire. + +=head1 QUICK OVERVIEW + +To create a POD filter for translating POD documentation into some other +format, you create a subclass of B<Pod::Parser> which typically overrides +just the base class implementation for the following methods: + +=over 2 + +=item * + +B<command()> + +=item * + +B<verbatim()> + +=item * + +B<textblock()> + +=item * + +B<interior_sequence()> + +=back + +You may also want to override the B<begin_input()> and B<end_input()> +methods for your subclass (to perform any needed per-file and/or +per-document initialization or cleanup). + +If you need to perform any preprocessing of input before it is parsed +you may want to override one or more of B<preprocess_line()> and/or +B<preprocess_paragraph()>. + +Sometimes it may be necessary to make more than one pass over the input +files. If this is the case you have several options. You can make the +first pass using B<Pod::Parser> and override your methods to store the +intermediate results in memory somewhere for the B<end_pod()> method to +process. You could use B<Pod::Parser> for several passes with an +appropriate state variable to control the operation for each pass. If +your input source can't be reset to start at the beginning, you can +store it in some other structure as a string or an array and have that +structure implement a B<getline()> method (which is all that +B<parse_from_filehandle()> uses to read input). + +Feel free to add any member data fields you need to keep track of things +like current font, indentation, horizontal or vertical position, or +whatever else you like. Be sure to read L<"PRIVATE METHODS AND DATA"> +to avoid name collisions. + +For the most part, the B<Pod::Parser> base class should be able to +do most of the input parsing for you and leave you free to worry about +how to interpret the commands and translate the result. + +Note that all we have described here in this quick overview is the +simplest most straightforward use of B<Pod::Parser> to do stream-based +parsing. It is also possible to use the B<Pod::Parser::parse_text> function +to do more sophisticated tree-based parsing. See L<"TREE-BASED PARSING">. + +=head1 PARSING OPTIONS + +A I<parse-option> is simply a named option of B<Pod::Parser> with a +value that corresponds to a certain specified behavior. These various +behaviors of B<Pod::Parser> may be enabled/disabled by setting +or unsetting one or more I<parse-options> using the B<parseopts()> method. +The set of currently accepted parse-options is as follows: + +=over 3 + +=item B<-want_nonPODs> (default: unset) + +Normally (by default) B<Pod::Parser> will only provide access to +the POD sections of the input. Input paragraphs that are not part +of the POD-format documentation are not made available to the caller +(not even using B<preprocess_paragraph()>). Setting this option to a +non-empty, non-zero value will allow B<preprocess_paragraph()> to see +non-POD sections of the input as well as POD sections. The B<cutting()> +method can be used to determine if the corresponding paragraph is a POD +paragraph, or some other input paragraph. + +=item B<-process_cut_cmd> (default: unset) + +Normally (by default) B<Pod::Parser> handles the C<=cut> POD directive +by itself and does not pass it on to the caller for processing. Setting +this option to a non-empty, non-zero value will cause B<Pod::Parser> to +pass the C<=cut> directive to the caller just like any other POD command +(and hence it may be processed by the B<command()> method). + +B<Pod::Parser> will still interpret the C<=cut> directive to mean that +"cutting mode" has been (re)entered, but the caller will get a chance +to capture the actual C<=cut> paragraph itself for whatever purpose +it desires. + +=item B<-warnings> (default: unset) + +Normally (by default) B<Pod::Parser> recognizes a bare minimum of +pod syntax errors and warnings and issues diagnostic messages +for errors, but not for warnings. (Use B<Pod::Checker> to do more +thorough checking of POD syntax.) Setting this option to a non-empty, +non-zero value will cause B<Pod::Parser> to issue diagnostics for +the few warnings it recognizes as well as the errors. + +=back + +Please see L<"parseopts()"> for a complete description of the interface +for the setting and unsetting of parse-options. + +=cut + +############################################################################# + +#use diagnostics; +use Pod::InputObjects; +use Carp; +use Exporter; +BEGIN { + if ($] < 5.006) { + require Symbol; + import Symbol; + } +} +@ISA = qw(Exporter); + +############################################################################# + +=head1 RECOMMENDED SUBROUTINE/METHOD OVERRIDES + +B<Pod::Parser> provides several methods which most subclasses will probably +want to override. These methods are as follows: + +=cut + +##--------------------------------------------------------------------------- + +=head1 B<command()> + + $parser->command($cmd,$text,$line_num,$pod_para); + +This method should be overridden by subclasses to take the appropriate +action when a POD command paragraph (denoted by a line beginning with +"=") is encountered. When such a POD directive is seen in the input, +this method is called and is passed: + +=over 3 + +=item C<$cmd> + +the name of the command for this POD paragraph + +=item C<$text> + +the paragraph text for the given POD paragraph command. + +=item C<$line_num> + +the line-number of the beginning of the paragraph + +=item C<$pod_para> + +a reference to a C<Pod::Paragraph> object which contains further +information about the paragraph command (see L<Pod::InputObjects> +for details). + +=back + +B<Note> that this method I<is> called for C<=pod> paragraphs. + +The base class implementation of this method simply treats the raw POD +command as normal block of paragraph text (invoking the B<textblock()> +method with the command paragraph). + +=cut + +sub command { + my ($self, $cmd, $text, $line_num, $pod_para) = @_; + ## Just treat this like a textblock + $self->textblock($pod_para->raw_text(), $line_num, $pod_para); +} + +##--------------------------------------------------------------------------- + +=head1 B<verbatim()> + + $parser->verbatim($text,$line_num,$pod_para); + +This method may be overridden by subclasses to take the appropriate +action when a block of verbatim text is encountered. It is passed the +following parameters: + +=over 3 + +=item C<$text> + +the block of text for the verbatim paragraph + +=item C<$line_num> + +the line-number of the beginning of the paragraph + +=item C<$pod_para> + +a reference to a C<Pod::Paragraph> object which contains further +information about the paragraph (see L<Pod::InputObjects> +for details). + +=back + +The base class implementation of this method simply prints the textblock +(unmodified) to the output filehandle. + +=cut + +sub verbatim { + my ($self, $text, $line_num, $pod_para) = @_; + my $out_fh = $self->{_OUTPUT}; + print $out_fh $text; +} + +##--------------------------------------------------------------------------- + +=head1 B<textblock()> + + $parser->textblock($text,$line_num,$pod_para); + +This method may be overridden by subclasses to take the appropriate +action when a normal block of POD text is encountered (although the base +class method will usually do what you want). It is passed the following +parameters: + +=over 3 + +=item C<$text> + +the block of text for the a POD paragraph + +=item C<$line_num> + +the line-number of the beginning of the paragraph + +=item C<$pod_para> + +a reference to a C<Pod::Paragraph> object which contains further +information about the paragraph (see L<Pod::InputObjects> +for details). + +=back + +In order to process interior sequences, subclasses implementations of +this method will probably want to invoke either B<interpolate()> or +B<parse_text()>, passing it the text block C<$text>, and the corresponding +line number in C<$line_num>, and then perform any desired processing upon +the returned result. + +The base class implementation of this method simply prints the text block +as it occurred in the input stream). + +=cut + +sub textblock { + my ($self, $text, $line_num, $pod_para) = @_; + my $out_fh = $self->{_OUTPUT}; + print $out_fh $self->interpolate($text, $line_num); +} + +##--------------------------------------------------------------------------- + +=head1 B<interior_sequence()> + + $parser->interior_sequence($seq_cmd,$seq_arg,$pod_seq); + +This method should be overridden by subclasses to take the appropriate +action when an interior sequence is encountered. An interior sequence is +an embedded command within a block of text which appears as a command +name (usually a single uppercase character) followed immediately by a +string of text which is enclosed in angle brackets. This method is +passed the sequence command C<$seq_cmd> and the corresponding text +C<$seq_arg>. It is invoked by the B<interpolate()> method for each interior +sequence that occurs in the string that it is passed. It should return +the desired text string to be used in place of the interior sequence. +The C<$pod_seq> argument is a reference to a C<Pod::InteriorSequence> +object which contains further information about the interior sequence. +Please see L<Pod::InputObjects> for details if you need to access this +additional information. + +Subclass implementations of this method may wish to invoke the +B<nested()> method of C<$pod_seq> to see if it is nested inside +some other interior-sequence (and if so, which kind). + +The base class implementation of the B<interior_sequence()> method +simply returns the raw text of the interior sequence (as it occurred +in the input) to the caller. + +=cut + +sub interior_sequence { + my ($self, $seq_cmd, $seq_arg, $pod_seq) = @_; + ## Just return the raw text of the interior sequence + return $pod_seq->raw_text(); +} + +############################################################################# + +=head1 OPTIONAL SUBROUTINE/METHOD OVERRIDES + +B<Pod::Parser> provides several methods which subclasses may want to override +to perform any special pre/post-processing. These methods do I<not> have to +be overridden, but it may be useful for subclasses to take advantage of them. + +=cut + +##--------------------------------------------------------------------------- + +=head1 B<new()> + + my $parser = Pod::Parser->new(); + +This is the constructor for B<Pod::Parser> and its subclasses. You +I<do not> need to override this method! It is capable of constructing +subclass objects as well as base class objects, provided you use +any of the following constructor invocation styles: + + my $parser1 = MyParser->new(); + my $parser2 = new MyParser(); + my $parser3 = $parser2->new(); + +where C<MyParser> is some subclass of B<Pod::Parser>. + +Using the syntax C<MyParser::new()> to invoke the constructor is I<not> +recommended, but if you insist on being able to do this, then the +subclass I<will> need to override the B<new()> constructor method. If +you do override the constructor, you I<must> be sure to invoke the +B<initialize()> method of the newly blessed object. + +Using any of the above invocations, the first argument to the +constructor is always the corresponding package name (or object +reference). No other arguments are required, but if desired, an +associative array (or hash-table) my be passed to the B<new()> +constructor, as in: + + my $parser1 = MyParser->new( MYDATA => $value1, MOREDATA => $value2 ); + my $parser2 = new MyParser( -myflag => 1 ); + +All arguments passed to the B<new()> constructor will be treated as +key/value pairs in a hash-table. The newly constructed object will be +initialized by copying the contents of the given hash-table (which may +have been empty). The B<new()> constructor for this class and all of its +subclasses returns a blessed reference to the initialized object (hash-table). + +=cut + +sub new { + ## Determine if we were called via an object-ref or a classname + my ($this,%params) = @_; + my $class = ref($this) || $this; + ## Any remaining arguments are treated as initial values for the + ## hash that is used to represent this object. + my $self = { %params }; + ## Bless ourselves into the desired class and perform any initialization + bless $self, $class; + $self->initialize(); + return $self; +} + +##--------------------------------------------------------------------------- + +=head1 B<initialize()> + + $parser->initialize(); + +This method performs any necessary object initialization. It takes no +arguments (other than the object instance of course, which is typically +copied to a local variable named C<$self>). If subclasses override this +method then they I<must> be sure to invoke C<$self-E<gt>SUPER::initialize()>. + +=cut + +sub initialize { + #my $self = shift; + #return; +} + +##--------------------------------------------------------------------------- + +=head1 B<begin_pod()> + + $parser->begin_pod(); + +This method is invoked at the beginning of processing for each POD +document that is encountered in the input. Subclasses should override +this method to perform any per-document initialization. + +=cut + +sub begin_pod { + #my $self = shift; + #return; +} + +##--------------------------------------------------------------------------- + +=head1 B<begin_input()> + + $parser->begin_input(); + +This method is invoked by B<parse_from_filehandle()> immediately I<before> +processing input from a filehandle. The base class implementation does +nothing, however, subclasses may override it to perform any per-file +initializations. + +Note that if multiple files are parsed for a single POD document +(perhaps the result of some future C<=include> directive) this method +is invoked for every file that is parsed. If you wish to perform certain +initializations once per document, then you should use B<begin_pod()>. + +=cut + +sub begin_input { + #my $self = shift; + #return; +} + +##--------------------------------------------------------------------------- + +=head1 B<end_input()> + + $parser->end_input(); + +This method is invoked by B<parse_from_filehandle()> immediately I<after> +processing input from a filehandle. The base class implementation does +nothing, however, subclasses may override it to perform any per-file +cleanup actions. + +Please note that if multiple files are parsed for a single POD document +(perhaps the result of some kind of C<=include> directive) this method +is invoked for every file that is parsed. If you wish to perform certain +cleanup actions once per document, then you should use B<end_pod()>. + +=cut + +sub end_input { + #my $self = shift; + #return; +} + +##--------------------------------------------------------------------------- + +=head1 B<end_pod()> + + $parser->end_pod(); + +This method is invoked at the end of processing for each POD document +that is encountered in the input. Subclasses should override this method +to perform any per-document finalization. + +=cut + +sub end_pod { + #my $self = shift; + #return; +} + +##--------------------------------------------------------------------------- + +=head1 B<preprocess_line()> + + $textline = $parser->preprocess_line($text, $line_num); + +This method should be overridden by subclasses that wish to perform +any kind of preprocessing for each I<line> of input (I<before> it has +been determined whether or not it is part of a POD paragraph). The +parameter C<$text> is the input line; and the parameter C<$line_num> is +the line number of the corresponding text line. + +The value returned should correspond to the new text to use in its +place. If the empty string or an undefined value is returned then no +further processing will be performed for this line. + +Please note that the B<preprocess_line()> method is invoked I<before> +the B<preprocess_paragraph()> method. After all (possibly preprocessed) +lines in a paragraph have been assembled together and it has been +determined that the paragraph is part of the POD documentation from one +of the selected sections, then B<preprocess_paragraph()> is invoked. + +The base class implementation of this method returns the given text. + +=cut + +sub preprocess_line { + my ($self, $text, $line_num) = @_; + return $text; +} + +##--------------------------------------------------------------------------- + +=head1 B<preprocess_paragraph()> + + $textblock = $parser->preprocess_paragraph($text, $line_num); + +This method should be overridden by subclasses that wish to perform any +kind of preprocessing for each block (paragraph) of POD documentation +that appears in the input stream. The parameter C<$text> is the POD +paragraph from the input file; and the parameter C<$line_num> is the +line number for the beginning of the corresponding paragraph. + +The value returned should correspond to the new text to use in its +place If the empty string is returned or an undefined value is +returned, then the given C<$text> is ignored (not processed). + +This method is invoked after gathering up all the lines in a paragraph +and after determining the cutting state of the paragraph, +but before trying to further parse or interpret them. After +B<preprocess_paragraph()> returns, the current cutting state (which +is returned by C<$self-E<gt>cutting()>) is examined. If it evaluates +to true then input text (including the given C<$text>) is cut (not +processed) until the next POD directive is encountered. + +Please note that the B<preprocess_line()> method is invoked I<before> +the B<preprocess_paragraph()> method. After all (possibly preprocessed) +lines in a paragraph have been assembled together and either it has been +determined that the paragraph is part of the POD documentation from one +of the selected sections or the C<-want_nonPODs> option is true, +then B<preprocess_paragraph()> is invoked. + +The base class implementation of this method returns the given text. + +=cut + +sub preprocess_paragraph { + my ($self, $text, $line_num) = @_; + return $text; +} + +############################################################################# + +=head1 METHODS FOR PARSING AND PROCESSING + +B<Pod::Parser> provides several methods to process input text. These +methods typically won't need to be overridden (and in some cases they +can't be overridden), but subclasses may want to invoke them to exploit +their functionality. + +=cut + +##--------------------------------------------------------------------------- + +=head1 B<parse_text()> + + $ptree1 = $parser->parse_text($text, $line_num); + $ptree2 = $parser->parse_text({%opts}, $text, $line_num); + $ptree3 = $parser->parse_text(\%opts, $text, $line_num); + +This method is useful if you need to perform your own interpolation +of interior sequences and can't rely upon B<interpolate> to expand +them in simple bottom-up order. + +The parameter C<$text> is a string or block of text to be parsed +for interior sequences; and the parameter C<$line_num> is the +line number corresponding to the beginning of C<$text>. + +B<parse_text()> will parse the given text into a parse-tree of "nodes." +and interior-sequences. Each "node" in the parse tree is either a +text-string, or a B<Pod::InteriorSequence>. The result returned is a +parse-tree of type B<Pod::ParseTree>. Please see L<Pod::InputObjects> +for more information about B<Pod::InteriorSequence> and B<Pod::ParseTree>. + +If desired, an optional hash-ref may be specified as the first argument +to customize certain aspects of the parse-tree that is created and +returned. The set of recognized option keywords are: + +=over 3 + +=item B<-expand_seq> =E<gt> I<code-ref>|I<method-name> + +Normally, the parse-tree returned by B<parse_text()> will contain an +unexpanded C<Pod::InteriorSequence> object for each interior-sequence +encountered. Specifying B<-expand_seq> tells B<parse_text()> to "expand" +every interior-sequence it sees by invoking the referenced function +(or named method of the parser object) and using the return value as the +expanded result. + +If a subroutine reference was given, it is invoked as: + + &$code_ref( $parser, $sequence ) + +and if a method-name was given, it is invoked as: + + $parser->method_name( $sequence ) + +where C<$parser> is a reference to the parser object, and C<$sequence> +is a reference to the interior-sequence object. +[I<NOTE>: If the B<interior_sequence()> method is specified, then it is +invoked according to the interface specified in L<"interior_sequence()">]. + +=item B<-expand_text> =E<gt> I<code-ref>|I<method-name> + +Normally, the parse-tree returned by B<parse_text()> will contain a +text-string for each contiguous sequence of characters outside of an +interior-sequence. Specifying B<-expand_text> tells B<parse_text()> to +"preprocess" every such text-string it sees by invoking the referenced +function (or named method of the parser object) and using the return value +as the preprocessed (or "expanded") result. [Note that if the result is +an interior-sequence, then it will I<not> be expanded as specified by the +B<-expand_seq> option; Any such recursive expansion needs to be handled by +the specified callback routine.] + +If a subroutine reference was given, it is invoked as: + + &$code_ref( $parser, $text, $ptree_node ) + +and if a method-name was given, it is invoked as: + + $parser->method_name( $text, $ptree_node ) + +where C<$parser> is a reference to the parser object, C<$text> is the +text-string encountered, and C<$ptree_node> is a reference to the current +node in the parse-tree (usually an interior-sequence object or else the +top-level node of the parse-tree). + +=item B<-expand_ptree> =E<gt> I<code-ref>|I<method-name> + +Rather than returning a C<Pod::ParseTree>, pass the parse-tree as an +argument to the referenced subroutine (or named method of the parser +object) and return the result instead of the parse-tree object. + +If a subroutine reference was given, it is invoked as: + + &$code_ref( $parser, $ptree ) + +and if a method-name was given, it is invoked as: + + $parser->method_name( $ptree ) + +where C<$parser> is a reference to the parser object, and C<$ptree> +is a reference to the parse-tree object. + +=back + +=cut + +sub parse_text { + my $self = shift; + local $_ = ''; + + ## Get options and set any defaults + my %opts = (ref $_[0]) ? %{ shift() } : (); + my $expand_seq = $opts{'-expand_seq'} || undef; + my $expand_text = $opts{'-expand_text'} || undef; + my $expand_ptree = $opts{'-expand_ptree'} || undef; + + my $text = shift; + my $line = shift; + my $file = $self->input_file(); + my $cmd = ""; + + ## Convert method calls into closures, for our convenience + my $xseq_sub = $expand_seq; + my $xtext_sub = $expand_text; + my $xptree_sub = $expand_ptree; + if (defined $expand_seq and $expand_seq eq 'interior_sequence') { + ## If 'interior_sequence' is the method to use, we have to pass + ## more than just the sequence object, we also need to pass the + ## sequence name and text. + $xseq_sub = sub { + my ($sself, $iseq) = @_; + my $args = join('', $iseq->parse_tree->children); + return $sself->interior_sequence($iseq->name, $args, $iseq); + }; + } + ref $xseq_sub or $xseq_sub = sub { shift()->$expand_seq(@_) }; + ref $xtext_sub or $xtext_sub = sub { shift()->$expand_text(@_) }; + ref $xptree_sub or $xptree_sub = sub { shift()->$expand_ptree(@_) }; + + ## Keep track of the "current" interior sequence, and maintain a stack + ## of "in progress" sequences. + ## + ## NOTE that we push our own "accumulator" at the very beginning of the + ## stack. It's really a parse-tree, not a sequence; but it implements + ## the methods we need so we can use it to gather-up all the sequences + ## and strings we parse. Thus, by the end of our parsing, it should be + ## the only thing left on our stack and all we have to do is return it! + ## + my $seq = Pod::ParseTree->new(); + my @seq_stack = ($seq); + my ($ldelim, $rdelim) = ('', ''); + + ## Iterate over all sequence starts text (NOTE: split with + ## capturing parens keeps the delimiters) + $_ = $text; + my @tokens = split /([A-Z]<(?:<+\s)?)/; + while ( @tokens ) { + $_ = shift @tokens; + ## Look for the beginning of a sequence + if ( /^([A-Z])(<(?:<+\s)?)$/ ) { + ## Push a new sequence onto the stack of those "in-progress" + my $ldelim_orig; + ($cmd, $ldelim_orig) = ($1, $2); + ($ldelim = $ldelim_orig) =~ s/\s+$//; + ($rdelim = $ldelim) =~ tr/</>/; + $seq = Pod::InteriorSequence->new( + -name => $cmd, + -ldelim => $ldelim_orig, -rdelim => $rdelim, + -file => $file, -line => $line + ); + (@seq_stack > 1) and $seq->nested($seq_stack[-1]); + push @seq_stack, $seq; + } + ## Look for sequence ending + elsif ( @seq_stack > 1 ) { + ## Make sure we match the right kind of closing delimiter + my ($seq_end, $post_seq) = ('', ''); + if ( ($ldelim eq '<' and /\A(.*?)(>)/s) + or /\A(.*?)(\s+$rdelim)/s ) + { + ## Found end-of-sequence, capture the interior and the + ## closing the delimiter, and put the rest back on the + ## token-list + $post_seq = substr($_, length($1) + length($2)); + ($_, $seq_end) = ($1, $2); + (length $post_seq) and unshift @tokens, $post_seq; + } + if (length) { + ## In the middle of a sequence, append this text to it, and + ## dont forget to "expand" it if that's what the caller wanted + $seq->append($expand_text ? &$xtext_sub($self,$_,$seq) : $_); + $_ .= $seq_end; + } + if (length $seq_end) { + ## End of current sequence, record terminating delimiter + $seq->rdelim($seq_end); + ## Pop it off the stack of "in progress" sequences + pop @seq_stack; + ## Append result to its parent in current parse tree + $seq_stack[-1]->append($expand_seq ? &$xseq_sub($self,$seq) + : $seq); + ## Remember the current cmd-name and left-delimiter + if(@seq_stack > 1) { + $cmd = $seq_stack[-1]->name; + $ldelim = $seq_stack[-1]->ldelim; + $rdelim = $seq_stack[-1]->rdelim; + } else { + $cmd = $ldelim = $rdelim = ''; + } + } + } + elsif (length) { + ## In the middle of a sequence, append this text to it, and + ## dont forget to "expand" it if that's what the caller wanted + $seq->append($expand_text ? &$xtext_sub($self,$_,$seq) : $_); + } + ## Keep track of line count + $line += s/\r*\n//; + ## Remember the "current" sequence + $seq = $seq_stack[-1]; + } + + ## Handle unterminated sequences + my $errorsub = (@seq_stack > 1) ? $self->errorsub() : undef; + while (@seq_stack > 1) { + ($cmd, $file, $line) = ($seq->name, $seq->file_line); + $ldelim = $seq->ldelim; + ($rdelim = $ldelim) =~ tr/</>/; + $rdelim =~ s/^(\S+)(\s*)$/$2$1/; + pop @seq_stack; + my $errmsg = "*** ERROR: unterminated ${cmd}${ldelim}...${rdelim}". + " at line $line in file $file\n"; + (ref $errorsub) and &{$errorsub}($errmsg) + or (defined $errorsub) and $self->$errorsub($errmsg) + or carp($errmsg); + $seq_stack[-1]->append($expand_seq ? &$xseq_sub($self,$seq) : $seq); + $seq = $seq_stack[-1]; + } + + ## Return the resulting parse-tree + my $ptree = (pop @seq_stack)->parse_tree; + return $expand_ptree ? &$xptree_sub($self, $ptree) : $ptree; +} + +##--------------------------------------------------------------------------- + +=head1 B<interpolate()> + + $textblock = $parser->interpolate($text, $line_num); + +This method translates all text (including any embedded interior sequences) +in the given text string C<$text> and returns the interpolated result. The +parameter C<$line_num> is the line number corresponding to the beginning +of C<$text>. + +B<interpolate()> merely invokes a private method to recursively expand +nested interior sequences in bottom-up order (innermost sequences are +expanded first). If there is a need to expand nested sequences in +some alternate order, use B<parse_text> instead. + +=cut + +sub interpolate { + my($self, $text, $line_num) = @_; + my %parse_opts = ( -expand_seq => 'interior_sequence' ); + my $ptree = $self->parse_text( \%parse_opts, $text, $line_num ); + return join '', $ptree->children(); +} + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head1 B<parse_paragraph()> + + $parser->parse_paragraph($text, $line_num); + +This method takes the text of a POD paragraph to be processed, along +with its corresponding line number, and invokes the appropriate method +(one of B<command()>, B<verbatim()>, or B<textblock()>). + +For performance reasons, this method is invoked directly without any +dynamic lookup; Hence subclasses may I<not> override it! + +=end __PRIVATE__ + +=cut + +sub parse_paragraph { + my ($self, $text, $line_num) = @_; + local *myData = $self; ## alias to avoid deref-ing overhead + local *myOpts = ($myData{_PARSEOPTS} ||= {}); ## get parse-options + local $_; + + ## See if we want to preprocess nonPOD paragraphs as well as POD ones. + my $wantNonPods = $myOpts{'-want_nonPODs'}; + + ## Update cutting status + $myData{_CUTTING} = 0 if $text =~ /^={1,2}\S/; + + ## Perform any desired preprocessing if we wanted it this early + $wantNonPods and $text = $self->preprocess_paragraph($text, $line_num); + + ## Ignore up until next POD directive if we are cutting + return if $myData{_CUTTING}; + + ## Now we know this is block of text in a POD section! + + ##----------------------------------------------------------------- + ## This is a hook (hack ;-) for Pod::Select to do its thing without + ## having to override methods, but also without Pod::Parser assuming + ## $self is an instance of Pod::Select (if the _SELECTED_SECTIONS + ## field exists then we assume there is an is_selected() method for + ## us to invoke (calling $self->can('is_selected') could verify this + ## but that is more overhead than I want to incur) + ##----------------------------------------------------------------- + + ## Ignore this block if it isnt in one of the selected sections + if (exists $myData{_SELECTED_SECTIONS}) { + $self->is_selected($text) or return ($myData{_CUTTING} = 1); + } + + ## If we havent already, perform any desired preprocessing and + ## then re-check the "cutting" state + unless ($wantNonPods) { + $text = $self->preprocess_paragraph($text, $line_num); + return 1 unless ((defined $text) and (length $text)); + return 1 if ($myData{_CUTTING}); + } + + ## Look for one of the three types of paragraphs + my ($pfx, $cmd, $arg, $sep) = ('', '', '', ''); + my $pod_para = undef; + if ($text =~ /^(={1,2})(?=\S)/) { + ## Looks like a command paragraph. Capture the command prefix used + ## ("=" or "=="), as well as the command-name, its paragraph text, + ## and whatever sequence of characters was used to separate them + $pfx = $1; + $_ = substr($text, length $pfx); + ($cmd, $sep, $text) = split /(\s+)/, $_, 2; + ## If this is a "cut" directive then we dont need to do anything + ## except return to "cutting" mode. + if ($cmd eq 'cut') { + $myData{_CUTTING} = 1; + return unless $myOpts{'-process_cut_cmd'}; + } + } + ## Save the attributes indicating how the command was specified. + $pod_para = new Pod::Paragraph( + -name => $cmd, + -text => $text, + -prefix => $pfx, + -separator => $sep, + -file => $myData{_INFILE}, + -line => $line_num + ); + # ## Invoke appropriate callbacks + # if (exists $myData{_CALLBACKS}) { + # ## Look through the callback list, invoke callbacks, + # ## then see if we need to do the default actions + # ## (invoke_callbacks will return true if we do). + # return 1 unless $self->invoke_callbacks($cmd, $text, $line_num, $pod_para); + # } + + # If the last paragraph ended in whitespace, and we're not between verbatim blocks, carp + if ($myData{_WHITESPACE} and $myOpts{'-warnings'} + and not ($text =~ /^\s+/ and ($myData{_PREVIOUS}||"") eq "verbatim")) { + my $errorsub = $self->errorsub(); + my $line = $line_num - 1; + my $errmsg = "*** WARNING: line containing nothing but whitespace". + " in paragraph at line $line in file $myData{_INFILE}\n"; + (ref $errorsub) and &{$errorsub}($errmsg) + or (defined $errorsub) and $self->$errorsub($errmsg) + or carp($errmsg); + } + + if (length $cmd) { + ## A command paragraph + $self->command($cmd, $text, $line_num, $pod_para); + $myData{_PREVIOUS} = $cmd; + } + elsif ($text =~ /^\s+/) { + ## Indented text - must be a verbatim paragraph + $self->verbatim($text, $line_num, $pod_para); + $myData{_PREVIOUS} = "verbatim"; + } + else { + ## Looks like an ordinary block of text + $self->textblock($text, $line_num, $pod_para); + $myData{_PREVIOUS} = "textblock"; + } + + # Update the whitespace for the next time around + $myData{_WHITESPACE} = $text =~ /^[^\S\r\n]+\Z/m ? 1 : 0; + + return 1; +} + +##--------------------------------------------------------------------------- + +=head1 B<parse_from_filehandle()> + + $parser->parse_from_filehandle($in_fh,$out_fh); + +This method takes an input filehandle (which is assumed to already be +opened for reading) and reads the entire input stream looking for blocks +(paragraphs) of POD documentation to be processed. If no first argument +is given the default input filehandle C<STDIN> is used. + +The C<$in_fh> parameter may be any object that provides a B<getline()> +method to retrieve a single line of input text (hence, an appropriate +wrapper object could be used to parse PODs from a single string or an +array of strings). + +Using C<$in_fh-E<gt>getline()>, input is read line-by-line and assembled +into paragraphs or "blocks" (which are separated by lines containing +nothing but whitespace). For each block of POD documentation +encountered it will invoke a method to parse the given paragraph. + +If a second argument is given then it should correspond to a filehandle where +output should be sent (otherwise the default output filehandle is +C<STDOUT> if no output filehandle is currently in use). + +B<NOTE:> For performance reasons, this method caches the input stream at +the top of the stack in a local variable. Any attempts by clients to +change the stack contents during processing when in the midst executing +of this method I<will not affect> the input stream used by the current +invocation of this method. + +This method does I<not> usually need to be overridden by subclasses. + +=cut + +sub parse_from_filehandle { + my $self = shift; + my %opts = (ref $_[0] eq 'HASH') ? %{ shift() } : (); + my ($in_fh, $out_fh) = @_; + $in_fh = \*STDIN unless ($in_fh); + local *myData = $self; ## alias to avoid deref-ing overhead + local *myOpts = ($myData{_PARSEOPTS} ||= {}); ## get parse-options + local $_; + + ## Put this stream at the top of the stack and do beginning-of-input + ## processing. NOTE that $in_fh might be reset during this process. + my $topstream = $self->_push_input_stream($in_fh, $out_fh); + (exists $opts{-cutting}) and $self->cutting( $opts{-cutting} ); + + ## Initialize line/paragraph + my ($textline, $paragraph) = ('', ''); + my ($nlines, $plines) = (0, 0); + + ## Use <$fh> instead of $fh->getline where possible (for speed) + $_ = ref $in_fh; + my $tied_fh = (/^(?:GLOB|FileHandle|IO::\w+)$/ or tied $in_fh); + + ## Read paragraphs line-by-line + while (defined ($textline = $tied_fh ? <$in_fh> : $in_fh->getline)) { + $textline = $self->preprocess_line($textline, ++$nlines); + next unless ((defined $textline) && (length $textline)); + + if ((! length $paragraph) && ($textline =~ /^==/)) { + ## '==' denotes a one-line command paragraph + $paragraph = $textline; + $plines = 1; + $textline = ''; + } else { + ## Append this line to the current paragraph + $paragraph .= $textline; + ++$plines; + } + + ## See if this line is blank and ends the current paragraph. + ## If it isnt, then keep iterating until it is. + next unless (($textline =~ /^([^\S\r\n]*)[\r\n]*$/) + && (length $paragraph)); + + ## Now process the paragraph + parse_paragraph($self, $paragraph, ($nlines - $plines) + 1); + $paragraph = ''; + $plines = 0; + } + ## Dont forget about the last paragraph in the file + if (length $paragraph) { + parse_paragraph($self, $paragraph, ($nlines - $plines) + 1) + } + + ## Now pop the input stream off the top of the input stack. + $self->_pop_input_stream(); +} + +##--------------------------------------------------------------------------- + +=head1 B<parse_from_file()> + + $parser->parse_from_file($filename,$outfile); + +This method takes a filename and does the following: + +=over 2 + +=item * + +opens the input and output files for reading +(creating the appropriate filehandles) + +=item * + +invokes the B<parse_from_filehandle()> method passing it the +corresponding input and output filehandles. + +=item * + +closes the input and output files. + +=back + +If the special input filename "-" or "<&STDIN" is given then the STDIN +filehandle is used for input (and no open or close is performed). If no +input filename is specified then "-" is implied. Filehandle references, +or objects that support the regular IO operations (like C<E<lt>$fhE<gt>> +or C<$fh-<Egt>getline>) are also accepted; the handles must already be +opened. + +If a second argument is given then it should be the name of the desired +output file. If the special output filename "-" or ">&STDOUT" is given +then the STDOUT filehandle is used for output (and no open or close is +performed). If the special output filename ">&STDERR" is given then the +STDERR filehandle is used for output (and no open or close is +performed). If no output filehandle is currently in use and no output +filename is specified, then "-" is implied. +Alternatively, filehandle references or objects that support the regular +IO operations (like C<print>, e.g. L<IO::String>) are also accepted; +the object must already be opened. + +This method does I<not> usually need to be overridden by subclasses. + +=cut + +sub parse_from_file { + my $self = shift; + my %opts = (ref $_[0] eq 'HASH') ? %{ shift() } : (); + my ($infile, $outfile) = @_; + my ($in_fh, $out_fh); + if ($] < 5.006) { + ($in_fh, $out_fh) = (gensym(), gensym()); + } + my ($close_input, $close_output) = (0, 0); + local *myData = $self; + local *_; + + ## Is $infile a filename or a (possibly implied) filehandle + if (defined $infile && ref $infile) { + if (ref($infile) =~ /^(SCALAR|ARRAY|HASH|CODE|REF)$/) { + croak "Input from $1 reference not supported!\n"; + } + ## Must be a filehandle-ref (or else assume its a ref to an object + ## that supports the common IO read operations). + $myData{_INFILE} = ${$infile}; + $in_fh = $infile; + } + elsif (!defined($infile) || !length($infile) || ($infile eq '-') + || ($infile =~ /^<&(?:STDIN|0)$/i)) + { + ## Not a filename, just a string implying STDIN + $infile ||= '-'; + $myData{_INFILE} = '<standard input>'; + $in_fh = \*STDIN; + } + else { + ## We have a filename, open it for reading + $myData{_INFILE} = $infile; + open($in_fh, "< $infile") or + croak "Can't open $infile for reading: $!\n"; + $close_input = 1; + } + + ## NOTE: we need to be *very* careful when "defaulting" the output + ## file. We only want to use a default if this is the beginning of + ## the entire document (but *not* if this is an included file). We + ## determine this by seeing if the input stream stack has been set-up + ## already + + ## Is $outfile a filename, a (possibly implied) filehandle, maybe a ref? + if (ref $outfile) { + ## we need to check for ref() first, as other checks involve reading + if (ref($outfile) =~ /^(ARRAY|HASH|CODE)$/) { + croak "Output to $1 reference not supported!\n"; + } + elsif (ref($outfile) eq 'SCALAR') { +# # NOTE: IO::String isn't a part of the perl distribution, +# # so probably we shouldn't support this case... +# require IO::String; +# $myData{_OUTFILE} = "$outfile"; +# $out_fh = IO::String->new($outfile); + croak "Output to SCALAR reference not supported!\n"; + } + else { + ## Must be a filehandle-ref (or else assume its a ref to an + ## object that supports the common IO write operations). + $myData{_OUTFILE} = ${$outfile}; + $out_fh = $outfile; + } + } + elsif (!defined($outfile) || !length($outfile) || ($outfile eq '-') + || ($outfile =~ /^>&?(?:STDOUT|1)$/i)) + { + if (defined $myData{_TOP_STREAM}) { + $out_fh = $myData{_OUTPUT}; + } + else { + ## Not a filename, just a string implying STDOUT + $outfile ||= '-'; + $myData{_OUTFILE} = '<standard output>'; + $out_fh = \*STDOUT; + } + } + elsif ($outfile =~ /^>&(STDERR|2)$/i) { + ## Not a filename, just a string implying STDERR + $myData{_OUTFILE} = '<standard error>'; + $out_fh = \*STDERR; + } + else { + ## We have a filename, open it for writing + $myData{_OUTFILE} = $outfile; + (-d $outfile) and croak "$outfile is a directory, not POD input!\n"; + open($out_fh, "> $outfile") or + croak "Can't open $outfile for writing: $!\n"; + $close_output = 1; + } + + ## Whew! That was a lot of work to set up reasonably/robust behavior + ## in the case of a non-filename for reading and writing. Now we just + ## have to parse the input and close the handles when we're finished. + $self->parse_from_filehandle(\%opts, $in_fh, $out_fh); + + $close_input and + close($in_fh) || croak "Can't close $infile after reading: $!\n"; + $close_output and + close($out_fh) || croak "Can't close $outfile after writing: $!\n"; +} + +############################################################################# + +=head1 ACCESSOR METHODS + +Clients of B<Pod::Parser> should use the following methods to access +instance data fields: + +=cut + +##--------------------------------------------------------------------------- + +=head1 B<errorsub()> + + $parser->errorsub("method_name"); + $parser->errorsub(\&warn_user); + $parser->errorsub(sub { print STDERR, @_ }); + +Specifies the method or subroutine to use when printing error messages +about POD syntax. The supplied method/subroutine I<must> return TRUE upon +successful printing of the message. If C<undef> is given, then the B<carp> +builtin is used to issue error messages (this is the default behavior). + + my $errorsub = $parser->errorsub() + my $errmsg = "This is an error message!\n" + (ref $errorsub) and &{$errorsub}($errmsg) + or (defined $errorsub) and $parser->$errorsub($errmsg) + or carp($errmsg); + +Returns a method name, or else a reference to the user-supplied subroutine +used to print error messages. Returns C<undef> if the B<carp> builtin +is used to issue error messages (this is the default behavior). + +=cut + +sub errorsub { + return (@_ > 1) ? ($_[0]->{_ERRORSUB} = $_[1]) : $_[0]->{_ERRORSUB}; +} + +##--------------------------------------------------------------------------- + +=head1 B<cutting()> + + $boolean = $parser->cutting(); + +Returns the current C<cutting> state: a boolean-valued scalar which +evaluates to true if text from the input file is currently being "cut" +(meaning it is I<not> considered part of the POD document). + + $parser->cutting($boolean); + +Sets the current C<cutting> state to the given value and returns the +result. + +=cut + +sub cutting { + return (@_ > 1) ? ($_[0]->{_CUTTING} = $_[1]) : $_[0]->{_CUTTING}; +} + +##--------------------------------------------------------------------------- + +##--------------------------------------------------------------------------- + +=head1 B<parseopts()> + +When invoked with no additional arguments, B<parseopts> returns a hashtable +of all the current parsing options. + + ## See if we are parsing non-POD sections as well as POD ones + my %opts = $parser->parseopts(); + $opts{'-want_nonPODs}' and print "-want_nonPODs\n"; + +When invoked using a single string, B<parseopts> treats the string as the +name of a parse-option and returns its corresponding value if it exists +(returns C<undef> if it doesn't). + + ## Did we ask to see '=cut' paragraphs? + my $want_cut = $parser->parseopts('-process_cut_cmd'); + $want_cut and print "-process_cut_cmd\n"; + +When invoked with multiple arguments, B<parseopts> treats them as +key/value pairs and the specified parse-option names are set to the +given values. Any unspecified parse-options are unaffected. + + ## Set them back to the default + $parser->parseopts(-warnings => 0); + +When passed a single hash-ref, B<parseopts> uses that hash to completely +reset the existing parse-options, all previous parse-option values +are lost. + + ## Reset all options to default + $parser->parseopts( { } ); + +See L<"PARSING OPTIONS"> for more information on the name and meaning of each +parse-option currently recognized. + +=cut + +sub parseopts { + local *myData = shift; + local *myOpts = ($myData{_PARSEOPTS} ||= {}); + return %myOpts if (@_ == 0); + if (@_ == 1) { + local $_ = shift; + return ref($_) ? $myData{_PARSEOPTS} = $_ : $myOpts{$_}; + } + my @newOpts = (%myOpts, @_); + $myData{_PARSEOPTS} = { @newOpts }; +} + +##--------------------------------------------------------------------------- + +=head1 B<output_file()> + + $fname = $parser->output_file(); + +Returns the name of the output file being written. + +=cut + +sub output_file { + return $_[0]->{_OUTFILE}; +} + +##--------------------------------------------------------------------------- + +=head1 B<output_handle()> + + $fhandle = $parser->output_handle(); + +Returns the output filehandle object. + +=cut + +sub output_handle { + return $_[0]->{_OUTPUT}; +} + +##--------------------------------------------------------------------------- + +=head1 B<input_file()> + + $fname = $parser->input_file(); + +Returns the name of the input file being read. + +=cut + +sub input_file { + return $_[0]->{_INFILE}; +} + +##--------------------------------------------------------------------------- + +=head1 B<input_handle()> + + $fhandle = $parser->input_handle(); + +Returns the current input filehandle object. + +=cut + +sub input_handle { + return $_[0]->{_INPUT}; +} + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head1 B<input_streams()> + + $listref = $parser->input_streams(); + +Returns a reference to an array which corresponds to the stack of all +the input streams that are currently in the middle of being parsed. + +While parsing an input stream, it is possible to invoke +B<parse_from_file()> or B<parse_from_filehandle()> to parse a new input +stream and then return to parsing the previous input stream. Each input +stream to be parsed is pushed onto the end of this input stack +before any of its input is read. The input stream that is currently +being parsed is always at the end (or top) of the input stack. When an +input stream has been exhausted, it is popped off the end of the +input stack. + +Each element on this input stack is a reference to C<Pod::InputSource> +object. Please see L<Pod::InputObjects> for more details. + +This method might be invoked when printing diagnostic messages, for example, +to obtain the name and line number of the all input files that are currently +being processed. + +=end __PRIVATE__ + +=cut + +sub input_streams { + return $_[0]->{_INPUT_STREAMS}; +} + +##--------------------------------------------------------------------------- + +=begin __PRIVATE__ + +=head1 B<top_stream()> + + $hashref = $parser->top_stream(); + +Returns a reference to the hash-table that represents the element +that is currently at the top (end) of the input stream stack +(see L<"input_streams()">). The return value will be the C<undef> +if the input stack is empty. + +This method might be used when printing diagnostic messages, for example, +to obtain the name and line number of the current input file. + +=end __PRIVATE__ + +=cut + +sub top_stream { + return $_[0]->{_TOP_STREAM} || undef; +} + +############################################################################# + +=head1 PRIVATE METHODS AND DATA + +B<Pod::Parser> makes use of several internal methods and data fields +which clients should not need to see or use. For the sake of avoiding +name collisions for client data and methods, these methods and fields +are briefly discussed here. Determined hackers may obtain further +information about them by reading the B<Pod::Parser> source code. + +Private data fields are stored in the hash-object whose reference is +returned by the B<new()> constructor for this class. The names of all +private methods and data-fields used by B<Pod::Parser> begin with a +prefix of "_" and match the regular expression C</^_\w+$/>. + +=cut + +##--------------------------------------------------------------------------- + +=begin _PRIVATE_ + +=head1 B<_push_input_stream()> + + $hashref = $parser->_push_input_stream($in_fh,$out_fh); + +This method will push the given input stream on the input stack and +perform any necessary beginning-of-document or beginning-of-file +processing. The argument C<$in_fh> is the input stream filehandle to +push, and C<$out_fh> is the corresponding output filehandle to use (if +it is not given or is undefined, then the current output stream is used, +which defaults to standard output if it doesnt exist yet). + +The value returned will be reference to the hash-table that represents +the new top of the input stream stack. I<Please Note> that it is +possible for this method to use default values for the input and output +file handles. If this happens, you will need to look at the C<INPUT> +and C<OUTPUT> instance data members to determine their new values. + +=end _PRIVATE_ + +=cut + +sub _push_input_stream { + my ($self, $in_fh, $out_fh) = @_; + local *myData = $self; + + ## Initialize stuff for the entire document if this is *not* + ## an included file. + ## + ## NOTE: we need to be *very* careful when "defaulting" the output + ## filehandle. We only want to use a default value if this is the + ## beginning of the entire document (but *not* if this is an included + ## file). + unless (defined $myData{_TOP_STREAM}) { + $out_fh = \*STDOUT unless (defined $out_fh); + $myData{_CUTTING} = 1; ## current "cutting" state + $myData{_INPUT_STREAMS} = []; ## stack of all input streams + } + + ## Initialize input indicators + $myData{_OUTFILE} = '(unknown)' unless (defined $myData{_OUTFILE}); + $myData{_OUTPUT} = $out_fh if (defined $out_fh); + $in_fh = \*STDIN unless (defined $in_fh); + $myData{_INFILE} = '(unknown)' unless (defined $myData{_INFILE}); + $myData{_INPUT} = $in_fh; + my $input_top = $myData{_TOP_STREAM} + = new Pod::InputSource( + -name => $myData{_INFILE}, + -handle => $in_fh, + -was_cutting => $myData{_CUTTING} + ); + local *input_stack = $myData{_INPUT_STREAMS}; + push(@input_stack, $input_top); + + ## Perform beginning-of-document and/or beginning-of-input processing + $self->begin_pod() if (@input_stack == 1); + $self->begin_input(); + + return $input_top; +} + +##--------------------------------------------------------------------------- + +=begin _PRIVATE_ + +=head1 B<_pop_input_stream()> + + $hashref = $parser->_pop_input_stream(); + +This takes no arguments. It will perform any necessary end-of-file or +end-of-document processing and then pop the current input stream from +the top of the input stack. + +The value returned will be reference to the hash-table that represents +the new top of the input stream stack. + +=end _PRIVATE_ + +=cut + +sub _pop_input_stream { + my ($self) = @_; + local *myData = $self; + local *input_stack = $myData{_INPUT_STREAMS}; + + ## Perform end-of-input and/or end-of-document processing + $self->end_input() if (@input_stack > 0); + $self->end_pod() if (@input_stack == 1); + + ## Restore cutting state to whatever it was before we started + ## parsing this file. + my $old_top = pop(@input_stack); + $myData{_CUTTING} = $old_top->was_cutting(); + + ## Dont forget to reset the input indicators + my $input_top = undef; + if (@input_stack > 0) { + $input_top = $myData{_TOP_STREAM} = $input_stack[-1]; + $myData{_INFILE} = $input_top->name(); + $myData{_INPUT} = $input_top->handle(); + } else { + delete $myData{_TOP_STREAM}; + delete $myData{_INPUT_STREAMS}; + } + + return $input_top; +} + +############################################################################# + +=head1 TREE-BASED PARSING + +If straightforward stream-based parsing wont meet your needs (as is +likely the case for tasks such as translating PODs into structured +markup languages like HTML and XML) then you may need to take the +tree-based approach. Rather than doing everything in one pass and +calling the B<interpolate()> method to expand sequences into text, it +may be desirable to instead create a parse-tree using the B<parse_text()> +method to return a tree-like structure which may contain an ordered +list of children (each of which may be a text-string, or a similar +tree-like structure). + +Pay special attention to L<"METHODS FOR PARSING AND PROCESSING"> and +to the objects described in L<Pod::InputObjects>. The former describes +the gory details and parameters for how to customize and extend the +parsing behavior of B<Pod::Parser>. B<Pod::InputObjects> provides +several objects that may all be used interchangeably as parse-trees. The +most obvious one is the B<Pod::ParseTree> object. It defines the basic +interface and functionality that all things trying to be a POD parse-tree +should do. A B<Pod::ParseTree> is defined such that each "node" may be a +text-string, or a reference to another parse-tree. Each B<Pod::Paragraph> +object and each B<Pod::InteriorSequence> object also supports the basic +parse-tree interface. + +The B<parse_text()> method takes a given paragraph of text, and +returns a parse-tree that contains one or more children, each of which +may be a text-string, or an InteriorSequence object. There are also +callback-options that may be passed to B<parse_text()> to customize +the way it expands or transforms interior-sequences, as well as the +returned result. These callbacks can be used to create a parse-tree +with custom-made objects (which may or may not support the parse-tree +interface, depending on how you choose to do it). + +If you wish to turn an entire POD document into a parse-tree, that process +is fairly straightforward. The B<parse_text()> method is the key to doing +this successfully. Every paragraph-callback (i.e. the polymorphic methods +for B<command()>, B<verbatim()>, and B<textblock()> paragraphs) takes +a B<Pod::Paragraph> object as an argument. Each paragraph object has a +B<parse_tree()> method that can be used to get or set a corresponding +parse-tree. So for each of those paragraph-callback methods, simply call +B<parse_text()> with the options you desire, and then use the returned +parse-tree to assign to the given paragraph object. + +That gives you a parse-tree for each paragraph - so now all you need is +an ordered list of paragraphs. You can maintain that yourself as a data +element in the object/hash. The most straightforward way would be simply +to use an array-ref, with the desired set of custom "options" for each +invocation of B<parse_text>. Let's assume the desired option-set is +given by the hash C<%options>. Then we might do something like the +following: + + package MyPodParserTree; + + @ISA = qw( Pod::Parser ); + + ... + + sub begin_pod { + my $self = shift; + $self->{'-paragraphs'} = []; ## initialize paragraph list + } + + sub command { + my ($parser, $command, $paragraph, $line_num, $pod_para) = @_; + my $ptree = $parser->parse_text({%options}, $paragraph, ...); + $pod_para->parse_tree( $ptree ); + push @{ $self->{'-paragraphs'} }, $pod_para; + } + + sub verbatim { + my ($parser, $paragraph, $line_num, $pod_para) = @_; + push @{ $self->{'-paragraphs'} }, $pod_para; + } + + sub textblock { + my ($parser, $paragraph, $line_num, $pod_para) = @_; + my $ptree = $parser->parse_text({%options}, $paragraph, ...); + $pod_para->parse_tree( $ptree ); + push @{ $self->{'-paragraphs'} }, $pod_para; + } + + ... + + package main; + ... + my $parser = new MyPodParserTree(...); + $parser->parse_from_file(...); + my $paragraphs_ref = $parser->{'-paragraphs'}; + +Of course, in this module-author's humble opinion, I'd be more inclined to +use the existing B<Pod::ParseTree> object than a simple array. That way +everything in it, paragraphs and sequences, all respond to the same core +interface for all parse-tree nodes. The result would look something like: + + package MyPodParserTree2; + + ... + + sub begin_pod { + my $self = shift; + $self->{'-ptree'} = new Pod::ParseTree; ## initialize parse-tree + } + + sub parse_tree { + ## convenience method to get/set the parse-tree for the entire POD + (@_ > 1) and $_[0]->{'-ptree'} = $_[1]; + return $_[0]->{'-ptree'}; + } + + sub command { + my ($parser, $command, $paragraph, $line_num, $pod_para) = @_; + my $ptree = $parser->parse_text({<<options>>}, $paragraph, ...); + $pod_para->parse_tree( $ptree ); + $parser->parse_tree()->append( $pod_para ); + } + + sub verbatim { + my ($parser, $paragraph, $line_num, $pod_para) = @_; + $parser->parse_tree()->append( $pod_para ); + } + + sub textblock { + my ($parser, $paragraph, $line_num, $pod_para) = @_; + my $ptree = $parser->parse_text({<<options>>}, $paragraph, ...); + $pod_para->parse_tree( $ptree ); + $parser->parse_tree()->append( $pod_para ); + } + + ... + + package main; + ... + my $parser = new MyPodParserTree2(...); + $parser->parse_from_file(...); + my $ptree = $parser->parse_tree; + ... + +Now you have the entire POD document as one great big parse-tree. You +can even use the B<-expand_seq> option to B<parse_text> to insert +whole different kinds of objects. Just don't expect B<Pod::Parser> +to know what to do with them after that. That will need to be in your +code. Or, alternatively, you can insert any object you like so long as +it conforms to the B<Pod::ParseTree> interface. + +One could use this to create subclasses of B<Pod::Paragraphs> and +B<Pod::InteriorSequences> for specific commands (or to create your own +custom node-types in the parse-tree) and add some kind of B<emit()> +method to each custom node/subclass object in the tree. Then all you'd +need to do is recursively walk the tree in the desired order, processing +the children (most likely from left to right) by formatting them if +they are text-strings, or by calling their B<emit()> method if they +are objects/references. + +=head1 CAVEATS + +Please note that POD has the notion of "paragraphs": this is something +starting I<after> a blank (read: empty) line, with the single exception +of the file start, which is also starting a paragraph. That means that +especially a command (e.g. C<=head1>) I<must> be preceded with a blank +line; C<__END__> is I<not> a blank line. + +=head1 SEE ALSO + +L<Pod::InputObjects>, L<Pod::Select> + +B<Pod::InputObjects> defines POD input objects corresponding to +command paragraphs, parse-trees, and interior-sequences. + +B<Pod::Select> is a subclass of B<Pod::Parser> which provides the ability +to selectively include and/or exclude sections of a POD document from being +translated based upon the current heading, subheading, subsubheading, etc. + +=for __PRIVATE__ +B<Pod::Callbacks> is a subclass of B<Pod::Parser> which gives its users +the ability the employ I<callback functions> instead of, or in addition +to, overriding methods of the base class. + +=for __PRIVATE__ +B<Pod::Select> and B<Pod::Callbacks> do not override any +methods nor do they define any new methods with the same name. Because +of this, they may I<both> be used (in combination) as a base class of +the same subclass in order to combine their functionality without +causing any namespace clashes due to multiple inheritance. + +=head1 AUTHOR + +Please report bugs using L<http://rt.cpan.org>. + +Brad Appleton E<lt>bradapp@enteract.comE<gt> + +Based on code for B<Pod::Text> written by +Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> + +=head1 LICENSE + +Pod-Parser is free software; you can redistribute it and/or modify it +under the terms of the Artistic License distributed with Perl version +5.000 or (at your option) any later version. Please refer to the +Artistic License that came with your Perl distribution for more +details. If your version of Perl was not distributed under the +terms of the Artistic License, than you may distribute PodParser +under the same terms as Perl itself. + +=cut + +1; +# vim: ts=4 sw=4 et diff --git a/ext/Pod-Parser/lib/Pod/PlainText.pm b/ext/Pod-Parser/lib/Pod/PlainText.pm new file mode 100644 index 0000000000..0dce1799b2 --- /dev/null +++ b/ext/Pod-Parser/lib/Pod/PlainText.pm @@ -0,0 +1,738 @@ +# Pod::PlainText -- Convert POD data to formatted ASCII text. +# $Id: Text.pm,v 2.1 1999/09/20 11:53:33 eagle Exp $ +# +# Copyright 1999-2000 by Russ Allbery <rra@stanford.edu> +# +# This program is free software; you can redistribute it and/or modify it +# under the same terms as Perl itself. +# +# This module is intended to be a replacement for Pod::Text, and attempts to +# match its output except for some specific circumstances where other +# decisions seemed to produce better output. It uses Pod::Parser and is +# designed to be very easy to subclass. + +############################################################################ +# Modules and declarations +############################################################################ + +package Pod::PlainText; +use strict; + +require 5.005; + +use Carp qw(carp croak); +use Pod::Select (); + +use vars qw(@ISA %ESCAPES $VERSION); + +# We inherit from Pod::Select instead of Pod::Parser so that we can be used +# by Pod::Usage. +@ISA = qw(Pod::Select); + +$VERSION = '2.04'; + +BEGIN { + if ($] < 5.006) { + require Symbol; + import Symbol; + } +} + +############################################################################ +# Table of supported E<> escapes +############################################################################ + +# This table is taken near verbatim from Pod::PlainText in Pod::Parser, +# which got it near verbatim from the original Pod::Text. It is therefore +# credited to Tom Christiansen, and I'm glad I didn't have to write it. :) +%ESCAPES = ( + 'amp' => '&', # ampersand + 'lt' => '<', # left chevron, less-than + 'gt' => '>', # right chevron, greater-than + 'quot' => '"', # double quote + + "Aacute" => "\xC1", # capital A, acute accent + "aacute" => "\xE1", # small a, acute accent + "Acirc" => "\xC2", # capital A, circumflex accent + "acirc" => "\xE2", # small a, circumflex accent + "AElig" => "\xC6", # capital AE diphthong (ligature) + "aelig" => "\xE6", # small ae diphthong (ligature) + "Agrave" => "\xC0", # capital A, grave accent + "agrave" => "\xE0", # small a, grave accent + "Aring" => "\xC5", # capital A, ring + "aring" => "\xE5", # small a, ring + "Atilde" => "\xC3", # capital A, tilde + "atilde" => "\xE3", # small a, tilde + "Auml" => "\xC4", # capital A, dieresis or umlaut mark + "auml" => "\xE4", # small a, dieresis or umlaut mark + "Ccedil" => "\xC7", # capital C, cedilla + "ccedil" => "\xE7", # small c, cedilla + "Eacute" => "\xC9", # capital E, acute accent + "eacute" => "\xE9", # small e, acute accent + "Ecirc" => "\xCA", # capital E, circumflex accent + "ecirc" => "\xEA", # small e, circumflex accent + "Egrave" => "\xC8", # capital E, grave accent + "egrave" => "\xE8", # small e, grave accent + "ETH" => "\xD0", # capital Eth, Icelandic + "eth" => "\xF0", # small eth, Icelandic + "Euml" => "\xCB", # capital E, dieresis or umlaut mark + "euml" => "\xEB", # small e, dieresis or umlaut mark + "Iacute" => "\xCD", # capital I, acute accent + "iacute" => "\xED", # small i, acute accent + "Icirc" => "\xCE", # capital I, circumflex accent + "icirc" => "\xEE", # small i, circumflex accent + "Igrave" => "\xCD", # capital I, grave accent + "igrave" => "\xED", # small i, grave accent + "Iuml" => "\xCF", # capital I, dieresis or umlaut mark + "iuml" => "\xEF", # small i, dieresis or umlaut mark + "Ntilde" => "\xD1", # capital N, tilde + "ntilde" => "\xF1", # small n, tilde + "Oacute" => "\xD3", # capital O, acute accent + "oacute" => "\xF3", # small o, acute accent + "Ocirc" => "\xD4", # capital O, circumflex accent + "ocirc" => "\xF4", # small o, circumflex accent + "Ograve" => "\xD2", # capital O, grave accent + "ograve" => "\xF2", # small o, grave accent + "Oslash" => "\xD8", # capital O, slash + "oslash" => "\xF8", # small o, slash + "Otilde" => "\xD5", # capital O, tilde + "otilde" => "\xF5", # small o, tilde + "Ouml" => "\xD6", # capital O, dieresis or umlaut mark + "ouml" => "\xF6", # small o, dieresis or umlaut mark + "szlig" => "\xDF", # small sharp s, German (sz ligature) + "THORN" => "\xDE", # capital THORN, Icelandic + "thorn" => "\xFE", # small thorn, Icelandic + "Uacute" => "\xDA", # capital U, acute accent + "uacute" => "\xFA", # small u, acute accent + "Ucirc" => "\xDB", # capital U, circumflex accent + "ucirc" => "\xFB", # small u, circumflex accent + "Ugrave" => "\xD9", # capital U, grave accent + "ugrave" => "\xF9", # small u, grave accent + "Uuml" => "\xDC", # capital U, dieresis or umlaut mark + "uuml" => "\xFC", # small u, dieresis or umlaut mark + "Yacute" => "\xDD", # capital Y, acute accent + "yacute" => "\xFD", # small y, acute accent + "yuml" => "\xFF", # small y, dieresis or umlaut mark + + "lchevron" => "\xAB", # left chevron (double less than) + "rchevron" => "\xBB", # right chevron (double greater than) +); + + +############################################################################ +# Initialization +############################################################################ + +# Initialize the object. Must be sure to call our parent initializer. +sub initialize { + my $self = shift; + + $$self{alt} = 0 unless defined $$self{alt}; + $$self{indent} = 4 unless defined $$self{indent}; + $$self{loose} = 0 unless defined $$self{loose}; + $$self{sentence} = 0 unless defined $$self{sentence}; + $$self{width} = 76 unless defined $$self{width}; + + $$self{INDENTS} = []; # Stack of indentations. + $$self{MARGIN} = $$self{indent}; # Current left margin in spaces. + + return $self->SUPER::initialize; +} + + +############################################################################ +# Core overrides +############################################################################ + +# Called for each command paragraph. Gets the command, the associated +# paragraph, the line number, and a Pod::Paragraph object. Just dispatches +# the command to a method named the same as the command. =cut is handled +# internally by Pod::Parser. +sub command { + my $self = shift; + my $command = shift; + return if $command eq 'pod'; + return if ($$self{EXCLUDE} && $command ne 'end'); + if (defined $$self{ITEM}) { + $self->item ("\n"); + local $_ = "\n"; + $self->output($_) if($command eq 'back'); + } + $command = 'cmd_' . $command; + return $self->$command (@_); +} + +# Called for a verbatim paragraph. Gets the paragraph, the line number, and +# a Pod::Paragraph object. Just output it verbatim, but with tabs converted +# to spaces. +sub verbatim { + my $self = shift; + return if $$self{EXCLUDE}; + $self->item if defined $$self{ITEM}; + local $_ = shift; + return if /^\s*$/; + s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme; + return $self->output($_); +} + +# Called for a regular text block. Gets the paragraph, the line number, and +# a Pod::Paragraph object. Perform interpolation and output the results. +sub textblock { + my $self = shift; + return if $$self{EXCLUDE}; + if($$self{VERBATIM}) { + $self->output($_[0]); + return; + } + local $_ = shift; + my $line = shift; + + # Perform a little magic to collapse multiple L<> references. This is + # here mostly for backwards-compatibility. We'll just rewrite the whole + # thing into actual text at this part, bypassing the whole internal + # sequence parsing thing. + s{ + ( + L< # A link of the form L</something>. + / + ( + [:\w]+ # The item has to be a simple word... + (\(\))? # ...or simple function. + ) + > + ( + ,?\s+(and\s+)? # Allow lots of them, conjuncted. + L< + / + ( + [:\w]+ + (\(\))? + ) + > + )+ + ) + } { + local $_ = $1; + s%L</([^>]+)>%$1%g; + my @items = split /(?:,?\s+(?:and\s+)?)/; + my $string = "the "; + my $i; + for ($i = 0; $i < @items; $i++) { + $string .= $items[$i]; + $string .= ", " if @items > 2 && $i != $#items; + $string .= " and " if ($i == $#items - 1); + } + $string .= " entries elsewhere in this document"; + $string; + }gex; + + # Now actually interpolate and output the paragraph. + $_ = $self->interpolate ($_, $line); + s/\s*$/\n/s; + if (defined $$self{ITEM}) { + $self->item ($_ . "\n"); + } else { + $self->output ($self->reformat ($_ . "\n")); + } +} + +# Called for an interior sequence. Gets the command, argument, and a +# Pod::InteriorSequence object and is expected to return the resulting text. +# Calls code, bold, italic, file, and link to handle those types of +# sequences, and handles S<>, E<>, X<>, and Z<> directly. +sub interior_sequence { + my $self = shift; + my $command = shift; + local $_ = shift; + return '' if ($command eq 'X' || $command eq 'Z'); + + # Expand escapes into the actual character now, carping if invalid. + if ($command eq 'E') { + return $ESCAPES{$_} if defined $ESCAPES{$_}; + carp "Unknown escape: E<$_>"; + return "E<$_>"; + } + + # For all the other sequences, empty content produces no output. + return if $_ eq ''; + + # For S<>, compress all internal whitespace and then map spaces to \01. + # When we output the text, we'll map this back. + if ($command eq 'S') { + s/\s{2,}/ /g; + tr/ /\01/; + return $_; + } + + # Anything else needs to get dispatched to another method. + if ($command eq 'B') { return $self->seq_b ($_) } + elsif ($command eq 'C') { return $self->seq_c ($_) } + elsif ($command eq 'F') { return $self->seq_f ($_) } + elsif ($command eq 'I') { return $self->seq_i ($_) } + elsif ($command eq 'L') { return $self->seq_l ($_) } + else { carp "Unknown sequence $command<$_>" } +} + +# Called for each paragraph that's actually part of the POD. We take +# advantage of this opportunity to untabify the input. +sub preprocess_paragraph { + my $self = shift; + local $_ = shift; + 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me; + return $_; +} + + +############################################################################ +# Command paragraphs +############################################################################ + +# All command paragraphs take the paragraph and the line number. + +# First level heading. +sub cmd_head1 { + my $self = shift; + local $_ = shift; + s/\s+$//s; + $_ = $self->interpolate ($_, shift); + if ($$self{alt}) { + $self->output ("\n==== $_ ====\n\n"); + } else { + $_ .= "\n" if $$self{loose}; + $self->output ($_ . "\n"); + } +} + +# Second level heading. +sub cmd_head2 { + my $self = shift; + local $_ = shift; + s/\s+$//s; + $_ = $self->interpolate ($_, shift); + if ($$self{alt}) { + $self->output ("\n== $_ ==\n\n"); + } else { + $_ .= "\n" if $$self{loose}; + $self->output (' ' x ($$self{indent} / 2) . $_ . "\n"); + } +} + +# third level heading - not strictly perlpodspec compliant +sub cmd_head3 { + my $self = shift; + local $_ = shift; + s/\s+$//s; + $_ = $self->interpolate ($_, shift); + if ($$self{alt}) { + $self->output ("\n= $_ =\n"); + } else { + $_ .= "\n" if $$self{loose}; + $self->output (' ' x ($$self{indent}) . $_ . "\n"); + } +} + +# fourth level heading - not strictly perlpodspec compliant +# just like head3 +*cmd_head4 = \&cmd_head3; + +# Start a list. +sub cmd_over { + my $self = shift; + local $_ = shift; + unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} } + push (@{ $$self{INDENTS} }, $$self{MARGIN}); + $$self{MARGIN} += ($_ + 0); +} + +# End a list. +sub cmd_back { + my $self = shift; + $$self{MARGIN} = pop @{ $$self{INDENTS} }; + unless (defined $$self{MARGIN}) { + carp 'Unmatched =back'; + $$self{MARGIN} = $$self{indent}; + } +} + +# An individual list item. +sub cmd_item { + my $self = shift; + if (defined $$self{ITEM}) { $self->item } + local $_ = shift; + s/\s+$//s; + $$self{ITEM} = $self->interpolate ($_); +} + +# Begin a block for a particular translator. Setting VERBATIM triggers +# special handling in textblock(). +sub cmd_begin { + my $self = shift; + local $_ = shift; + my ($kind) = /^(\S+)/ or return; + if ($kind eq 'text') { + $$self{VERBATIM} = 1; + } else { + $$self{EXCLUDE} = 1; + } +} + +# End a block for a particular translator. We assume that all =begin/=end +# pairs are properly closed. +sub cmd_end { + my $self = shift; + $$self{EXCLUDE} = 0; + $$self{VERBATIM} = 0; +} + +# One paragraph for a particular translator. Ignore it unless it's intended +# for text, in which case we treat it as a verbatim text block. +sub cmd_for { + my $self = shift; + local $_ = shift; + my $line = shift; + return unless s/^text\b[ \t]*\n?//; + $self->verbatim ($_, $line); +} + + +############################################################################ +# Interior sequences +############################################################################ + +# The simple formatting ones. These are here mostly so that subclasses can +# override them and do more complicated things. +sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] } +sub seq_c { return $_[0]{alt} ? "``$_[1]''" : "`$_[1]'" } +sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] } +sub seq_i { return '*' . $_[1] . '*' } + +# The complicated one. Handle links. Since this is plain text, we can't +# actually make any real links, so this is all to figure out what text we +# print out. +sub seq_l { + my $self = shift; + local $_ = shift; + + # Smash whitespace in case we were split across multiple lines. + s/\s+/ /g; + + # If we were given any explicit text, just output it. + if (/^([^|]+)\|/) { return $1 } + + # Okay, leading and trailing whitespace isn't important; get rid of it. + s/^\s+//; + s/\s+$//; + + # Default to using the whole content of the link entry as a section + # name. Note that L<manpage/> forces a manpage interpretation, as does + # something looking like L<manpage(section)>. The latter is an + # enhancement over the original Pod::Text. + my ($manpage, $section) = ('', $_); + if (/^(?:https?|ftp|news):/) { + # a URL + return $_; + } elsif (/^"\s*(.*?)\s*"$/) { + $section = '"' . $1 . '"'; + } elsif (m/^[-:.\w]+(?:\(\S+\))?$/) { + ($manpage, $section) = ($_, ''); + } elsif (m{/}) { + ($manpage, $section) = split (/\s*\/\s*/, $_, 2); + } + + my $text = ''; + # Now build the actual output text. + if (!length $section) { + $text = "the $manpage manpage" if length $manpage; + } elsif ($section =~ /^[:\w]+(?:\(\))?/) { + $text .= 'the ' . $section . ' entry'; + $text .= (length $manpage) ? " in the $manpage manpage" + : ' elsewhere in this document'; + } else { + $section =~ s/^\"\s*//; + $section =~ s/\s*\"$//; + $text .= 'the section on "' . $section . '"'; + $text .= " in the $manpage manpage" if length $manpage; + } + return $text; +} + + +############################################################################ +# List handling +############################################################################ + +# This method is called whenever an =item command is complete (in other +# words, we've seen its associated paragraph or know for certain that it +# doesn't have one). It gets the paragraph associated with the item as an +# argument. If that argument is empty, just output the item tag; if it +# contains a newline, output the item tag followed by the newline. +# Otherwise, see if there's enough room for us to output the item tag in the +# margin of the text or if we have to put it on a separate line. +sub item { + my $self = shift; + local $_ = shift; + my $tag = $$self{ITEM}; + unless (defined $tag) { + carp 'item called without tag'; + return; + } + undef $$self{ITEM}; + my $indent = $$self{INDENTS}[-1]; + unless (defined $indent) { $indent = $$self{indent} } + my $space = ' ' x $indent; + $space =~ s/^ /:/ if $$self{alt}; + if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) { + my $margin = $$self{MARGIN}; + $$self{MARGIN} = $indent; + my $output = $self->reformat ($tag); + $output =~ s/\n*$/\n/; + $self->output ($output); + $$self{MARGIN} = $margin; + $self->output ($self->reformat ($_)) if /\S/; + } else { + $_ = $self->reformat ($_); + s/^ /:/ if ($$self{alt} && $indent > 0); + my $tagspace = ' ' x length $tag; + s/^($space)$tagspace/$1$tag/ or carp 'Bizarre space in item'; + $self->output ($_); + } +} + + +############################################################################ +# Output formatting +############################################################################ + +# Wrap a line, indenting by the current left margin. We can't use +# Text::Wrap because it plays games with tabs. We can't use formline, even +# though we'd really like to, because it screws up non-printing characters. +# So we have to do the wrapping ourselves. +sub wrap { + my $self = shift; + local $_ = shift; + my $output = ''; + my $spaces = ' ' x $$self{MARGIN}; + my $width = $$self{width} - $$self{MARGIN}; + while (length > $width) { + if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) { + $output .= $spaces . $1 . "\n"; + } else { + last; + } + } + $output .= $spaces . $_; + $output =~ s/\s+$/\n\n/; + return $output; +} + +# Reformat a paragraph of text for the current margin. Takes the text to +# reformat and returns the formatted text. +sub reformat { + my $self = shift; + local $_ = shift; + + # If we're trying to preserve two spaces after sentences, do some + # munging to support that. Otherwise, smash all repeated whitespace. + if ($$self{sentence}) { + s/ +$//mg; + s/\.\n/. \n/g; + s/\n/ /g; + s/ +/ /g; + } else { + s/\s+/ /g; + } + return $self->wrap($_); +} + +# Output text to the output device. +sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] } + + +############################################################################ +# Backwards compatibility +############################################################################ + +# The old Pod::Text module did everything in a pod2text() function. This +# tries to provide the same interface for legacy applications. +sub pod2text { + my @args; + + # This is really ugly; I hate doing option parsing in the middle of a + # module. But the old Pod::Text module supported passing flags to its + # entry function, so handle -a and -<number>. + while ($_[0] =~ /^-/) { + my $flag = shift; + if ($flag eq '-a') { push (@args, alt => 1) } + elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) } + else { + unshift (@_, $flag); + last; + } + } + + # Now that we know what arguments we're using, create the parser. + my $parser = Pod::PlainText->new (@args); + + # If two arguments were given, the second argument is going to be a file + # handle. That means we want to call parse_from_filehandle(), which + # means we need to turn the first argument into a file handle. Magic + # open will handle the <&STDIN case automagically. + if (defined $_[1]) { + my $infh; + if ($] < 5.006) { + $infh = gensym(); + } + unless (open ($infh, $_[0])) { + croak ("Can't open $_[0] for reading: $!\n"); + } + $_[0] = $infh; + return $parser->parse_from_filehandle (@_); + } else { + return $parser->parse_from_file (@_); + } +} + + +############################################################################ +# Module return value and documentation +############################################################################ + +1; +__END__ + +=head1 NAME + +Pod::PlainText - Convert POD data to formatted ASCII text + +=head1 SYNOPSIS + + use Pod::PlainText; + my $parser = Pod::PlainText->new (sentence => 0, width => 78); + + # Read POD from STDIN and write to STDOUT. + $parser->parse_from_filehandle; + + # Read POD from file.pod and write to file.txt. + $parser->parse_from_file ('file.pod', 'file.txt'); + +=head1 DESCRIPTION + +Pod::PlainText is a module that can convert documentation in the POD format (the +preferred language for documenting Perl) into formatted ASCII. It uses no +special formatting controls or codes whatsoever, and its output is therefore +suitable for nearly any device. + +As a derived class from Pod::Parser, Pod::PlainText supports the same methods and +interfaces. See L<Pod::Parser> for all the details; briefly, one creates a +new parser with C<Pod::PlainText-E<gt>new()> and then calls either +parse_from_filehandle() or parse_from_file(). + +new() can take options, in the form of key/value pairs, that control the +behavior of the parser. The currently recognized options are: + +=over 4 + +=item alt + +If set to a true value, selects an alternate output format that, among other +things, uses a different heading style and marks C<=item> entries with a +colon in the left margin. Defaults to false. + +=item indent + +The number of spaces to indent regular text, and the default indentation for +C<=over> blocks. Defaults to 4. + +=item loose + +If set to a true value, a blank line is printed after a C<=headN> headings. +If set to false (the default), no blank line is printed after C<=headN>. +This is the default because it's the expected formatting for manual pages; +if you're formatting arbitrary text documents, setting this to true may +result in more pleasing output. + +=item sentence + +If set to a true value, Pod::PlainText will assume that each sentence ends in two +spaces, and will try to preserve that spacing. If set to false, all +consecutive whitespace in non-verbatim paragraphs is compressed into a +single space. Defaults to true. + +=item width + +The column at which to wrap text on the right-hand side. Defaults to 76. + +=back + +The standard Pod::Parser method parse_from_filehandle() takes up to two +arguments, the first being the file handle to read POD from and the second +being the file handle to write the formatted output to. The first defaults +to STDIN if not given, and the second defaults to STDOUT. The method +parse_from_file() is almost identical, except that its two arguments are the +input and output disk files instead. See L<Pod::Parser> for the specific +details. + +=head1 DIAGNOSTICS + +=over 4 + +=item Bizarre space in item + +(W) Something has gone wrong in internal C<=item> processing. This message +indicates a bug in Pod::PlainText; you should never see it. + +=item Can't open %s for reading: %s + +(F) Pod::PlainText was invoked via the compatibility mode pod2text() interface +and the input file it was given could not be opened. + +=item Unknown escape: %s + +(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::PlainText didn't +know about. + +=item Unknown sequence: %s + +(W) The POD source contained a non-standard internal sequence (something of +the form C<XE<lt>E<gt>>) that Pod::PlainText didn't know about. + +=item Unmatched =back + +(W) Pod::PlainText encountered a C<=back> command that didn't correspond to an +C<=over> command. + +=back + +=head1 RESTRICTIONS + +Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on +output, due to an internal implementation detail. + +=head1 NOTES + +This is a replacement for an earlier Pod::Text module written by Tom +Christiansen. It has a revamped interface, since it now uses Pod::Parser, +but an interface roughly compatible with the old Pod::Text::pod2text() +function is still available. Please change to the new calling convention, +though. + +The original Pod::Text contained code to do formatting via termcap +sequences, although it wasn't turned on by default and it was problematic to +get it to work at all. This rewrite doesn't even try to do that, but a +subclass of it does. Look for L<Pod::Text::Termcap|Pod::Text::Termcap>. + +=head1 SEE ALSO + +L<Pod::Parser|Pod::Parser>, L<Pod::Text::Termcap|Pod::Text::Termcap>, +pod2text(1) + +=head1 AUTHOR + +Please report bugs using L<http://rt.cpan.org>. + +Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the +original Pod::Text by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> and +its conversion to Pod::Parser by Brad Appleton +E<lt>bradapp@enteract.comE<gt>. + +=cut diff --git a/ext/Pod-Parser/lib/Pod/Select.pm b/ext/Pod-Parser/lib/Pod/Select.pm new file mode 100644 index 0000000000..4724cb79cf --- /dev/null +++ b/ext/Pod-Parser/lib/Pod/Select.pm @@ -0,0 +1,746 @@ +############################################################################# +# Pod/Select.pm -- function to select portions of POD docs +# +# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::Select; +use strict; + +use vars qw($VERSION @ISA @EXPORT $MAX_HEADING_LEVEL %myData @section_headings @selected_sections); +$VERSION = '1.36'; ## Current version of this package +require 5.005; ## requires this Perl version or later + +############################################################################# + +=head1 NAME + +Pod::Select, podselect() - extract selected sections of POD from input + +=head1 SYNOPSIS + + use Pod::Select; + + ## Select all the POD sections for each file in @filelist + ## and print the result on standard output. + podselect(@filelist); + + ## Same as above, but write to tmp.out + podselect({-output => "tmp.out"}, @filelist): + + ## Select from the given filelist, only those POD sections that are + ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS. + podselect({-sections => ["NAME|SYNOPSIS", "OPTIONS"]}, @filelist): + + ## Select the "DESCRIPTION" section of the PODs from STDIN and write + ## the result to STDERR. + podselect({-output => ">&STDERR", -sections => ["DESCRIPTION"]}, \*STDIN); + +or + + use Pod::Select; + + ## Create a parser object for selecting POD sections from the input + $parser = new Pod::Select(); + + ## Select all the POD sections for each file in @filelist + ## and print the result to tmp.out. + $parser->parse_from_file("<&STDIN", "tmp.out"); + + ## Select from the given filelist, only those POD sections that are + ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS. + $parser->select("NAME|SYNOPSIS", "OPTIONS"); + for (@filelist) { $parser->parse_from_file($_); } + + ## Select the "DESCRIPTION" and "SEE ALSO" sections of the PODs from + ## STDIN and write the result to STDERR. + $parser->select("DESCRIPTION"); + $parser->add_selection("SEE ALSO"); + $parser->parse_from_filehandle(\*STDIN, \*STDERR); + +=head1 REQUIRES + +perl5.005, Pod::Parser, Exporter, Carp + +=head1 EXPORTS + +podselect() + +=head1 DESCRIPTION + +B<podselect()> is a function which will extract specified sections of +pod documentation from an input stream. This ability is provided by the +B<Pod::Select> module which is a subclass of B<Pod::Parser>. +B<Pod::Select> provides a method named B<select()> to specify the set of +POD sections to select for processing/printing. B<podselect()> merely +creates a B<Pod::Select> object and then invokes the B<podselect()> +followed by B<parse_from_file()>. + +=head1 SECTION SPECIFICATIONS + +B<podselect()> and B<Pod::Select::select()> may be given one or more +"section specifications" to restrict the text processed to only the +desired set of sections and their corresponding subsections. A section +specification is a string containing one or more Perl-style regular +expressions separated by forward slashes ("/"). If you need to use a +forward slash literally within a section title you can escape it with a +backslash ("\/"). + +The formal syntax of a section specification is: + +=over 4 + +=item * + +I<head1-title-regex>/I<head2-title-regex>/... + +=back + +Any omitted or empty regular expressions will default to ".*". +Please note that each regular expression given is implicitly +anchored by adding "^" and "$" to the beginning and end. Also, if a +given regular expression starts with a "!" character, then the +expression is I<negated> (so C<!foo> would match anything I<except> +C<foo>). + +Some example section specifications follow. + +=over 4 + +=item * + +Match the C<NAME> and C<SYNOPSIS> sections and all of their subsections: + +C<NAME|SYNOPSIS> + +=item * + +Match only the C<Question> and C<Answer> subsections of the C<DESCRIPTION> +section: + +C<DESCRIPTION/Question|Answer> + +=item * + +Match the C<Comments> subsection of I<all> sections: + +C</Comments> + +=item * + +Match all subsections of C<DESCRIPTION> I<except> for C<Comments>: + +C<DESCRIPTION/!Comments> + +=item * + +Match the C<DESCRIPTION> section but do I<not> match any of its subsections: + +C<DESCRIPTION/!.+> + +=item * + +Match all top level sections but none of their subsections: + +C</!.+> + +=back + +=begin _NOT_IMPLEMENTED_ + +=head1 RANGE SPECIFICATIONS + +B<podselect()> and B<Pod::Select::select()> may be given one or more +"range specifications" to restrict the text processed to only the +desired ranges of paragraphs in the desired set of sections. A range +specification is a string containing a single Perl-style regular +expression (a regex), or else two Perl-style regular expressions +(regexs) separated by a ".." (Perl's "range" operator is ".."). +The regexs in a range specification are delimited by forward slashes +("/"). If you need to use a forward slash literally within a regex you +can escape it with a backslash ("\/"). + +The formal syntax of a range specification is: + +=over 4 + +=item * + +/I<start-range-regex>/[../I<end-range-regex>/] + +=back + +Where each the item inside square brackets (the ".." followed by the +end-range-regex) is optional. Each "range-regex" is of the form: + + =cmd-expr text-expr + +Where I<cmd-expr> is intended to match the name of one or more POD +commands, and I<text-expr> is intended to match the paragraph text for +the command. If a range-regex is supposed to match a POD command, then +the first character of the regex (the one after the initial '/') +absolutely I<must> be a single '=' character; it may not be anything +else (not even a regex meta-character) if it is supposed to match +against the name of a POD command. + +If no I<=cmd-expr> is given then the text-expr will be matched against +plain textblocks unless it is preceded by a space, in which case it is +matched against verbatim text-blocks. If no I<text-expr> is given then +only the command-portion of the paragraph is matched against. + +Note that these two expressions are each implicitly anchored. This +means that when matching against the command-name, there will be an +implicit '^' and '$' around the given I<=cmd-expr>; and when matching +against the paragraph text there will be an implicit '\A' and '\Z' +around the given I<text-expr>. + +Unlike with section-specs, the '!' character does I<not> have any special +meaning (negation or otherwise) at the beginning of a range-spec! + +Some example range specifications follow. + +=over 4 + +=item +Match all C<=for html> paragraphs: + +C</=for html/> + +=item +Match all paragraphs between C<=begin html> and C<=end html> +(note that this will I<not> work correctly if such sections +are nested): + +C</=begin html/../=end html/> + +=item +Match all paragraphs between the given C<=item> name until the end of the +current section: + +C</=item mine/../=head\d/> + +=item +Match all paragraphs between the given C<=item> until the next item, or +until the end of the itemized list (note that this will I<not> work as +desired if the item contains an itemized list nested within it): + +C</=item mine/../=(item|back)/> + +=back + +=end _NOT_IMPLEMENTED_ + +=cut + +############################################################################# + +#use diagnostics; +use Carp; +use Pod::Parser 1.04; + +@ISA = qw(Pod::Parser); +@EXPORT = qw(&podselect); + +## Maximum number of heading levels supported for '=headN' directives +*MAX_HEADING_LEVEL = \3; + +############################################################################# + +=head1 OBJECT METHODS + +The following methods are provided in this module. Each one takes a +reference to the object itself as an implicit first parameter. + +=cut + +##--------------------------------------------------------------------------- + +## =begin _PRIVATE_ +## +## =head1 B<_init_headings()> +## +## Initialize the current set of active section headings. +## +## =cut +## +## =end _PRIVATE_ + +sub _init_headings { + my $self = shift; + local *myData = $self; + + ## Initialize current section heading titles if necessary + unless (defined $myData{_SECTION_HEADINGS}) { + local *section_headings = $myData{_SECTION_HEADINGS} = []; + for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { + $section_headings[$i] = ''; + } + } +} + +##--------------------------------------------------------------------------- + +=head1 B<curr_headings()> + + ($head1, $head2, $head3, ...) = $parser->curr_headings(); + $head1 = $parser->curr_headings(1); + +This method returns a list of the currently active section headings and +subheadings in the document being parsed. The list of headings returned +corresponds to the most recently parsed paragraph of the input. + +If an argument is given, it must correspond to the desired section +heading number, in which case only the specified section heading is +returned. If there is no current section heading at the specified +level, then C<undef> is returned. + +=cut + +sub curr_headings { + my $self = shift; + $self->_init_headings() unless (defined $self->{_SECTION_HEADINGS}); + my @headings = @{ $self->{_SECTION_HEADINGS} }; + return (@_ > 0 and $_[0] =~ /^\d+$/) ? $headings[$_[0] - 1] : @headings; +} + +##--------------------------------------------------------------------------- + +=head1 B<select()> + + $parser->select($section_spec1,$section_spec2,...); + +This method is used to select the particular sections and subsections of +POD documentation that are to be printed and/or processed. The existing +set of selected sections is I<replaced> with the given set of sections. +See B<add_selection()> for adding to the current set of selected +sections. + +Each of the C<$section_spec> arguments should be a section specification +as described in L<"SECTION SPECIFICATIONS">. The section specifications +are parsed by this method and the resulting regular expressions are +stored in the invoking object. + +If no C<$section_spec> arguments are given, then the existing set of +selected sections is cleared out (which means C<all> sections will be +processed). + +This method should I<not> normally be overridden by subclasses. + +=cut + +sub select { + my ($self, @sections) = @_; + local *myData = $self; + local $_; + +### NEED TO DISCERN A SECTION-SPEC FROM A RANGE-SPEC (look for m{^/.+/$}?) + + ##--------------------------------------------------------------------- + ## The following is a blatant hack for backward compatibility, and for + ## implementing add_selection(). If the *first* *argument* is the + ## string "+", then the remaining section specifications are *added* + ## to the current set of selections; otherwise the given section + ## specifications will *replace* the current set of selections. + ## + ## This should probably be fixed someday, but for the present time, + ## it seems incredibly unlikely that "+" would ever correspond to + ## a legitimate section heading + ##--------------------------------------------------------------------- + my $add = ($sections[0] eq '+') ? shift(@sections) : ''; + + ## Reset the set of sections to use + unless (@sections) { + delete $myData{_SELECTED_SECTIONS} unless ($add); + return; + } + $myData{_SELECTED_SECTIONS} = [] + unless ($add && exists $myData{_SELECTED_SECTIONS}); + local *selected_sections = $myData{_SELECTED_SECTIONS}; + + ## Compile each spec + for my $spec (@sections) { + if ( defined($_ = _compile_section_spec($spec)) ) { + ## Store them in our sections array + push(@selected_sections, $_); + } + else { + carp qq{Ignoring section spec "$spec"!\n}; + } + } +} + +##--------------------------------------------------------------------------- + +=head1 B<add_selection()> + + $parser->add_selection($section_spec1,$section_spec2,...); + +This method is used to add to the currently selected sections and +subsections of POD documentation that are to be printed and/or +processed. See <select()> for replacing the currently selected sections. + +Each of the C<$section_spec> arguments should be a section specification +as described in L<"SECTION SPECIFICATIONS">. The section specifications +are parsed by this method and the resulting regular expressions are +stored in the invoking object. + +This method should I<not> normally be overridden by subclasses. + +=cut + +sub add_selection { + my $self = shift; + return $self->select('+', @_); +} + +##--------------------------------------------------------------------------- + +=head1 B<clear_selections()> + + $parser->clear_selections(); + +This method takes no arguments, it has the exact same effect as invoking +<select()> with no arguments. + +=cut + +sub clear_selections { + my $self = shift; + return $self->select(); +} + +##--------------------------------------------------------------------------- + +=head1 B<match_section()> + + $boolean = $parser->match_section($heading1,$heading2,...); + +Returns a value of true if the given section and subsection heading +titles match any of the currently selected section specifications in +effect from prior calls to B<select()> and B<add_selection()> (or if +there are no explicitly selected/deselected sections). + +The arguments C<$heading1>, C<$heading2>, etc. are the heading titles of +the corresponding sections, subsections, etc. to try and match. If +C<$headingN> is omitted then it defaults to the current corresponding +section heading title in the input. + +This method should I<not> normally be overridden by subclasses. + +=cut + +sub match_section { + my $self = shift; + my (@headings) = @_; + local *myData = $self; + + ## Return true if no restrictions were explicitly specified + my $selections = (exists $myData{_SELECTED_SECTIONS}) + ? $myData{_SELECTED_SECTIONS} : undef; + return 1 unless ((defined $selections) && @{$selections}); + + ## Default any unspecified sections to the current one + my @current_headings = $self->curr_headings(); + for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { + (defined $headings[$i]) or $headings[$i] = $current_headings[$i]; + } + + ## Look for a match against the specified section expressions + for my $section_spec ( @{$selections} ) { + ##------------------------------------------------------ + ## Each portion of this spec must match in order for + ## the spec to be matched. So we will start with a + ## match-value of 'true' and logically 'and' it with + ## the results of matching a given element of the spec. + ##------------------------------------------------------ + my $match = 1; + for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { + my $regex = $section_spec->[$i]; + my $negated = ($regex =~ s/^\!//); + $match &= ($negated ? ($headings[$i] !~ /${regex}/) + : ($headings[$i] =~ /${regex}/)); + last unless ($match); + } + return 1 if ($match); + } + return 0; ## no match +} + +##--------------------------------------------------------------------------- + +=head1 B<is_selected()> + + $boolean = $parser->is_selected($paragraph); + +This method is used to determine if the block of text given in +C<$paragraph> falls within the currently selected set of POD sections +and subsections to be printed or processed. This method is also +responsible for keeping track of the current input section and +subsections. It is assumed that C<$paragraph> is the most recently read +(but not yet processed) input paragraph. + +The value returned will be true if the C<$paragraph> and the rest of the +text in the same section as C<$paragraph> should be selected (included) +for processing; otherwise a false value is returned. + +=cut + +sub is_selected { + my ($self, $paragraph) = @_; + local $_; + local *myData = $self; + + $self->_init_headings() unless (defined $myData{_SECTION_HEADINGS}); + + ## Keep track of current sections levels and headings + $_ = $paragraph; + if (/^=((?:sub)*)(?:head(?:ing)?|sec(?:tion)?)(\d*)\s+(.*?)\s*$/) + { + ## This is a section heading command + my ($level, $heading) = ($2, $3); + $level = 1 + (length($1) / 3) if ((! length $level) || (length $1)); + ## Reset the current section heading at this level + $myData{_SECTION_HEADINGS}->[$level - 1] = $heading; + ## Reset subsection headings of this one to empty + for (my $i = $level; $i < $MAX_HEADING_LEVEL; ++$i) { + $myData{_SECTION_HEADINGS}->[$i] = ''; + } + } + + return $self->match_section(); +} + +############################################################################# + +=head1 EXPORTED FUNCTIONS + +The following functions are exported by this module. Please note that +these are functions (not methods) and therefore C<do not> take an +implicit first argument. + +=cut + +##--------------------------------------------------------------------------- + +=head1 B<podselect()> + + podselect(\%options,@filelist); + +B<podselect> will print the raw (untranslated) POD paragraphs of all +POD sections in the given input files specified by C<@filelist> +according to the given options. + +If any argument to B<podselect> is a reference to a hash +(associative array) then the values with the following keys are +processed as follows: + +=over 4 + +=item B<-output> + +A string corresponding to the desired output file (or ">&STDOUT" +or ">&STDERR"). The default is to use standard output. + +=item B<-sections> + +A reference to an array of sections specifications (as described in +L<"SECTION SPECIFICATIONS">) which indicate the desired set of POD +sections and subsections to be selected from input. If no section +specifications are given, then all sections of the PODs are used. + +=begin _NOT_IMPLEMENTED_ + +=item B<-ranges> + +A reference to an array of range specifications (as described in +L<"RANGE SPECIFICATIONS">) which indicate the desired range of POD +paragraphs to be selected from the desired input sections. If no range +specifications are given, then all paragraphs of the desired sections +are used. + +=end _NOT_IMPLEMENTED_ + +=back + +All other arguments should correspond to the names of input files +containing POD sections. A file name of "-" or "<&STDIN" will +be interpreted to mean standard input (which is the default if no +filenames are given). + +=cut + +sub podselect { + my(@argv) = @_; + my %defaults = (); + my $pod_parser = new Pod::Select(%defaults); + my $num_inputs = 0; + my $output = '>&STDOUT'; + my %opts; + local $_; + for (@argv) { + if (ref($_)) { + next unless (ref($_) eq 'HASH'); + %opts = (%defaults, %{$_}); + + ##------------------------------------------------------------- + ## Need this for backward compatibility since we formerly used + ## options that were all uppercase words rather than ones that + ## looked like Unix command-line options. + ## to be uppercase keywords) + ##------------------------------------------------------------- + %opts = map { + my ($key, $val) = (lc $_, $opts{$_}); + $key =~ s/^(?=\w)/-/; + $key =~ /^-se[cl]/ and $key = '-sections'; + #! $key eq '-range' and $key .= 's'; + ($key => $val); + } (keys %opts); + + ## Process the options + (exists $opts{'-output'}) and $output = $opts{'-output'}; + + ## Select the desired sections + $pod_parser->select(@{ $opts{'-sections'} }) + if ( (defined $opts{'-sections'}) + && ((ref $opts{'-sections'}) eq 'ARRAY') ); + + #! ## Select the desired paragraph ranges + #! $pod_parser->select(@{ $opts{'-ranges'} }) + #! if ( (defined $opts{'-ranges'}) + #! && ((ref $opts{'-ranges'}) eq 'ARRAY') ); + } + else { + $pod_parser->parse_from_file($_, $output); + ++$num_inputs; + } + } + $pod_parser->parse_from_file('-') unless ($num_inputs > 0); +} + +############################################################################# + +=head1 PRIVATE METHODS AND DATA + +B<Pod::Select> makes uses a number of internal methods and data fields +which clients should not need to see or use. For the sake of avoiding +name collisions with client data and methods, these methods and fields +are briefly discussed here. Determined hackers may obtain further +information about them by reading the B<Pod::Select> source code. + +Private data fields are stored in the hash-object whose reference is +returned by the B<new()> constructor for this class. The names of all +private methods and data-fields used by B<Pod::Select> begin with a +prefix of "_" and match the regular expression C</^_\w+$/>. + +=cut + +##--------------------------------------------------------------------------- + +=begin _PRIVATE_ + +=head1 B<_compile_section_spec()> + + $listref = $parser->_compile_section_spec($section_spec); + +This function (note it is a function and I<not> a method) takes a +section specification (as described in L<"SECTION SPECIFICATIONS">) +given in C<$section_sepc>, and compiles it into a list of regular +expressions. If C<$section_spec> has no syntax errors, then a reference +to the list (array) of corresponding regular expressions is returned; +otherwise C<undef> is returned and an error message is printed (using +B<carp>) for each invalid regex. + +=end _PRIVATE_ + +=cut + +sub _compile_section_spec { + my ($section_spec) = @_; + my (@regexs, $negated); + + ## Compile the spec into a list of regexs + local $_ = $section_spec; + s{\\\\}{\001}g; ## handle escaped backward slashes + s{\\/}{\002}g; ## handle escaped forward slashes + + ## Parse the regexs for the heading titles + @regexs = split(/\//, $_, $MAX_HEADING_LEVEL); + + ## Set default regex for ommitted levels + for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { + $regexs[$i] = '.*' unless ((defined $regexs[$i]) + && (length $regexs[$i])); + } + ## Modify the regexs as needed and validate their syntax + my $bad_regexs = 0; + for (@regexs) { + $_ .= '.+' if ($_ eq '!'); + s{\001}{\\\\}g; ## restore escaped backward slashes + s{\002}{\\/}g; ## restore escaped forward slashes + $negated = s/^\!//; ## check for negation + eval "m{$_}"; ## check regex syntax + if ($@) { + ++$bad_regexs; + carp qq{Bad regular expression /$_/ in "$section_spec": $@\n}; + } + else { + ## Add the forward and rear anchors (and put the negator back) + $_ = '^' . $_ unless (/^\^/); + $_ = $_ . '$' unless (/\$$/); + $_ = '!' . $_ if ($negated); + } + } + return (! $bad_regexs) ? [ @regexs ] : undef; +} + +##--------------------------------------------------------------------------- + +=begin _PRIVATE_ + +=head2 $self->{_SECTION_HEADINGS} + +A reference to an array of the current section heading titles for each +heading level (note that the first heading level title is at index 0). + +=end _PRIVATE_ + +=cut + +##--------------------------------------------------------------------------- + +=begin _PRIVATE_ + +=head2 $self->{_SELECTED_SECTIONS} + +A reference to an array of references to arrays. Each subarray is a list +of anchored regular expressions (preceded by a "!" if the expression is to +be negated). The index of the expression in the subarray should correspond +to the index of the heading title in C<$self-E<gt>{_SECTION_HEADINGS}> +that it is to be matched against. + +=end _PRIVATE_ + +=cut + +############################################################################# + +=head1 SEE ALSO + +L<Pod::Parser> + +=head1 AUTHOR + +Please report bugs using L<http://rt.cpan.org>. + +Brad Appleton E<lt>bradapp@enteract.comE<gt> + +Based on code for B<pod2text> written by +Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> + +=cut + +1; +# vim: ts=4 sw=4 et diff --git a/ext/Pod-Parser/lib/Pod/Usage.pm b/ext/Pod-Parser/lib/Pod/Usage.pm new file mode 100644 index 0000000000..f463fb9c46 --- /dev/null +++ b/ext/Pod-Parser/lib/Pod/Usage.pm @@ -0,0 +1,730 @@ +############################################################################# +# Pod/Usage.pm -- print usage messages for the running script. +# +# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::Usage; +use strict; + +use vars qw($VERSION @ISA @EXPORT); +$VERSION = '1.36'; ## Current version of this package +require 5.005; ## requires this Perl version or later + +=head1 NAME + +Pod::Usage, pod2usage() - print a usage message from embedded pod documentation + +=head1 SYNOPSIS + + use Pod::Usage + + my $message_text = "This text precedes the usage message."; + my $exit_status = 2; ## The exit status to use + my $verbose_level = 0; ## The verbose level to use + my $filehandle = \*STDERR; ## The filehandle to write to + + pod2usage($message_text); + + pod2usage($exit_status); + + pod2usage( { -message => $message_text , + -exitval => $exit_status , + -verbose => $verbose_level, + -output => $filehandle } ); + + pod2usage( -msg => $message_text , + -exitval => $exit_status , + -verbose => $verbose_level, + -output => $filehandle ); + + pod2usage( -verbose => 2, + -noperldoc => 1 ) + +=head1 ARGUMENTS + +B<pod2usage> should be given either a single argument, or a list of +arguments corresponding to an associative array (a "hash"). When a single +argument is given, it should correspond to exactly one of the following: + +=over 4 + +=item * + +A string containing the text of a message to print I<before> printing +the usage message + +=item * + +A numeric value corresponding to the desired exit status + +=item * + +A reference to a hash + +=back + +If more than one argument is given then the entire argument list is +assumed to be a hash. If a hash is supplied (either as a reference or +as a list) it should contain one or more elements with the following +keys: + +=over 4 + +=item C<-message> + +=item C<-msg> + +The text of a message to print immediately prior to printing the +program's usage message. + +=item C<-exitval> + +The desired exit status to pass to the B<exit()> function. +This should be an integer, or else the string "NOEXIT" to +indicate that control should simply be returned without +terminating the invoking process. + +=item C<-verbose> + +The desired level of "verboseness" to use when printing the usage +message. If the corresponding value is 0, then only the "SYNOPSIS" +section of the pod documentation is printed. If the corresponding value +is 1, then the "SYNOPSIS" section, along with any section entitled +"OPTIONS", "ARGUMENTS", or "OPTIONS AND ARGUMENTS" is printed. If the +corresponding value is 2 or more then the entire manpage is printed. + +The special verbosity level 99 requires to also specify the -sections +parameter; then these sections are extracted (see L<Pod::Select>) +and printed. + +=item C<-sections> + +A string representing a selection list for sections to be printed +when -verbose is set to 99, e.g. C<"NAME|SYNOPSIS|DESCRIPTION|VERSION">. + +Alternatively, an array reference of section specifications can be used: + + pod2usage(-verbose => 99, + -sections => [ qw(fred fred/subsection) ] ); + +=item C<-output> + +A reference to a filehandle, or the pathname of a file to which the +usage message should be written. The default is C<\*STDERR> unless the +exit value is less than 2 (in which case the default is C<\*STDOUT>). + +=item C<-input> + +A reference to a filehandle, or the pathname of a file from which the +invoking script's pod documentation should be read. It defaults to the +file indicated by C<$0> (C<$PROGRAM_NAME> for users of F<English.pm>). + +If you are calling B<pod2usage()> from a module and want to display +that module's POD, you can use this: + + use Pod::Find qw(pod_where); + pod2usage( -input => pod_where({-inc => 1}, __PACKAGE__) ); + +=item C<-pathlist> + +A list of directory paths. If the input file does not exist, then it +will be searched for in the given directory list (in the order the +directories appear in the list). It defaults to the list of directories +implied by C<$ENV{PATH}>. The list may be specified either by a reference +to an array, or by a string of directory paths which use the same path +separator as C<$ENV{PATH}> on your system (e.g., C<:> for Unix, C<;> for +MSWin32 and DOS). + +=item C<-noperldoc> + +By default, Pod::Usage will call L<perldoc> when -verbose >= 2 is +specified. This does not work well e.g. if the script was packed +with L<PAR>. The -noperldoc option suppresses the external call to +L<perldoc> and uses the simple text formatter (L<Pod::Text>) to +output the POD. + +=back + +=head1 DESCRIPTION + +B<pod2usage> will print a usage message for the invoking script (using +its embedded pod documentation) and then exit the script with the +desired exit status. The usage message printed may have any one of three +levels of "verboseness": If the verbose level is 0, then only a synopsis +is printed. If the verbose level is 1, then the synopsis is printed +along with a description (if present) of the command line options and +arguments. If the verbose level is 2, then the entire manual page is +printed. + +Unless they are explicitly specified, the default values for the exit +status, verbose level, and output stream to use are determined as +follows: + +=over 4 + +=item * + +If neither the exit status nor the verbose level is specified, then the +default is to use an exit status of 2 with a verbose level of 0. + +=item * + +If an exit status I<is> specified but the verbose level is I<not>, then the +verbose level will default to 1 if the exit status is less than 2 and +will default to 0 otherwise. + +=item * + +If an exit status is I<not> specified but verbose level I<is> given, then +the exit status will default to 2 if the verbose level is 0 and will +default to 1 otherwise. + +=item * + +If the exit status used is less than 2, then output is printed on +C<STDOUT>. Otherwise output is printed on C<STDERR>. + +=back + +Although the above may seem a bit confusing at first, it generally does +"the right thing" in most situations. This determination of the default +values to use is based upon the following typical Unix conventions: + +=over 4 + +=item * + +An exit status of 0 implies "success". For example, B<diff(1)> exits +with a status of 0 if the two files have the same contents. + +=item * + +An exit status of 1 implies possibly abnormal, but non-defective, program +termination. For example, B<grep(1)> exits with a status of 1 if +it did I<not> find a matching line for the given regular expression. + +=item * + +An exit status of 2 or more implies a fatal error. For example, B<ls(1)> +exits with a status of 2 if you specify an illegal (unknown) option on +the command line. + +=item * + +Usage messages issued as a result of bad command-line syntax should go +to C<STDERR>. However, usage messages issued due to an explicit request +to print usage (like specifying B<-help> on the command line) should go +to C<STDOUT>, just in case the user wants to pipe the output to a pager +(such as B<more(1)>). + +=item * + +If program usage has been explicitly requested by the user, it is often +desirable to exit with a status of 1 (as opposed to 0) after issuing +the user-requested usage message. It is also desirable to give a +more verbose description of program usage in this case. + +=back + +B<pod2usage> doesn't force the above conventions upon you, but it will +use them by default if you don't expressly tell it to do otherwise. The +ability of B<pod2usage()> to accept a single number or a string makes it +convenient to use as an innocent looking error message handling function: + + use Pod::Usage; + use Getopt::Long; + + ## Parse options + GetOptions("help", "man", "flag1") || pod2usage(2); + pod2usage(1) if ($opt_help); + pod2usage(-verbose => 2) if ($opt_man); + + ## Check for too many filenames + pod2usage("$0: Too many files given.\n") if (@ARGV > 1); + +Some user's however may feel that the above "economy of expression" is +not particularly readable nor consistent and may instead choose to do +something more like the following: + + use Pod::Usage; + use Getopt::Long; + + ## Parse options + GetOptions("help", "man", "flag1") || pod2usage(-verbose => 0); + pod2usage(-verbose => 1) if ($opt_help); + pod2usage(-verbose => 2) if ($opt_man); + + ## Check for too many filenames + pod2usage(-verbose => 2, -message => "$0: Too many files given.\n") + if (@ARGV > 1); + +As with all things in Perl, I<there's more than one way to do it>, and +B<pod2usage()> adheres to this philosophy. If you are interested in +seeing a number of different ways to invoke B<pod2usage> (although by no +means exhaustive), please refer to L<"EXAMPLES">. + +=head1 EXAMPLES + +Each of the following invocations of C<pod2usage()> will print just the +"SYNOPSIS" section to C<STDERR> and will exit with a status of 2: + + pod2usage(); + + pod2usage(2); + + pod2usage(-verbose => 0); + + pod2usage(-exitval => 2); + + pod2usage({-exitval => 2, -output => \*STDERR}); + + pod2usage({-verbose => 0, -output => \*STDERR}); + + pod2usage(-exitval => 2, -verbose => 0); + + pod2usage(-exitval => 2, -verbose => 0, -output => \*STDERR); + +Each of the following invocations of C<pod2usage()> will print a message +of "Syntax error." (followed by a newline) to C<STDERR>, immediately +followed by just the "SYNOPSIS" section (also printed to C<STDERR>) and +will exit with a status of 2: + + pod2usage("Syntax error."); + + pod2usage(-message => "Syntax error.", -verbose => 0); + + pod2usage(-msg => "Syntax error.", -exitval => 2); + + pod2usage({-msg => "Syntax error.", -exitval => 2, -output => \*STDERR}); + + pod2usage({-msg => "Syntax error.", -verbose => 0, -output => \*STDERR}); + + pod2usage(-msg => "Syntax error.", -exitval => 2, -verbose => 0); + + pod2usage(-message => "Syntax error.", + -exitval => 2, + -verbose => 0, + -output => \*STDERR); + +Each of the following invocations of C<pod2usage()> will print the +"SYNOPSIS" section and any "OPTIONS" and/or "ARGUMENTS" sections to +C<STDOUT> and will exit with a status of 1: + + pod2usage(1); + + pod2usage(-verbose => 1); + + pod2usage(-exitval => 1); + + pod2usage({-exitval => 1, -output => \*STDOUT}); + + pod2usage({-verbose => 1, -output => \*STDOUT}); + + pod2usage(-exitval => 1, -verbose => 1); + + pod2usage(-exitval => 1, -verbose => 1, -output => \*STDOUT}); + +Each of the following invocations of C<pod2usage()> will print the +entire manual page to C<STDOUT> and will exit with a status of 1: + + pod2usage(-verbose => 2); + + pod2usage({-verbose => 2, -output => \*STDOUT}); + + pod2usage(-exitval => 1, -verbose => 2); + + pod2usage({-exitval => 1, -verbose => 2, -output => \*STDOUT}); + +=head2 Recommended Use + +Most scripts should print some type of usage message to C<STDERR> when a +command line syntax error is detected. They should also provide an +option (usually C<-H> or C<-help>) to print a (possibly more verbose) +usage message to C<STDOUT>. Some scripts may even wish to go so far as to +provide a means of printing their complete documentation to C<STDOUT> +(perhaps by allowing a C<-man> option). The following complete example +uses B<Pod::Usage> in combination with B<Getopt::Long> to do all of these +things: + + use Getopt::Long; + use Pod::Usage; + + my $man = 0; + my $help = 0; + ## Parse options and print usage if there is a syntax error, + ## or if usage was explicitly requested. + GetOptions('help|?' => \$help, man => \$man) or pod2usage(2); + pod2usage(1) if $help; + pod2usage(-verbose => 2) if $man; + + ## If no arguments were given, then allow STDIN to be used only + ## if it's not connected to a terminal (otherwise print usage) + pod2usage("$0: No files given.") if ((@ARGV == 0) && (-t STDIN)); + __END__ + + =head1 NAME + + sample - Using GetOpt::Long and Pod::Usage + + =head1 SYNOPSIS + + sample [options] [file ...] + + Options: + -help brief help message + -man full documentation + + =head1 OPTIONS + + =over 8 + + =item B<-help> + + Print a brief help message and exits. + + =item B<-man> + + Prints the manual page and exits. + + =back + + =head1 DESCRIPTION + + B<This program> will read the given input file(s) and do something + useful with the contents thereof. + + =cut + +=head1 CAVEATS + +By default, B<pod2usage()> will use C<$0> as the path to the pod input +file. Unfortunately, not all systems on which Perl runs will set C<$0> +properly (although if C<$0> isn't found, B<pod2usage()> will search +C<$ENV{PATH}> or else the list specified by the C<-pathlist> option). +If this is the case for your system, you may need to explicitly specify +the path to the pod docs for the invoking script using something +similar to the following: + + pod2usage(-exitval => 2, -input => "/path/to/your/pod/docs"); + +In the pathological case that a script is called via a relative path +I<and> the script itself changes the current working directory +(see L<perlfunc/chdir>) I<before> calling pod2usage, Pod::Usage will +fail even on robust platforms. Don't do that. + +=head1 AUTHOR + +Please report bugs using L<http://rt.cpan.org>. + +Marek Rouchal E<lt>marekr@cpan.orgE<gt> + +Brad Appleton E<lt>bradapp@enteract.comE<gt> + +Based on code for B<Pod::Text::pod2text()> written by +Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> + +=head1 ACKNOWLEDGMENTS + +Steven McDougall E<lt>swmcd@world.std.comE<gt> for his help and patience +with re-writing this manpage. + +=head1 SEE ALSO + +L<Pod::Parser>, L<Getopt::Long>, L<Pod::Find> + +=cut + +############################################################################# + +#use diagnostics; +use Carp; +use Config; +use Exporter; +use File::Spec; + +@EXPORT = qw(&pod2usage); +BEGIN { + if ( $] >= 5.005_58 ) { + require Pod::Text; + @ISA = qw( Pod::Text ); + } + else { + require Pod::PlainText; + @ISA = qw( Pod::PlainText ); + } +} + +require Pod::Select; + +##--------------------------------------------------------------------------- + +##--------------------------------- +## Function definitions begin here +##--------------------------------- + +sub pod2usage { + local($_) = shift; + my %opts; + ## Collect arguments + if (@_ > 0) { + ## Too many arguments - assume that this is a hash and + ## the user forgot to pass a reference to it. + %opts = ($_, @_); + } + elsif (!defined $_) { + $_ = ''; + } + elsif (ref $_) { + ## User passed a ref to a hash + %opts = %{$_} if (ref($_) eq 'HASH'); + } + elsif (/^[-+]?\d+$/) { + ## User passed in the exit value to use + $opts{'-exitval'} = $_; + } + else { + ## User passed in a message to print before issuing usage. + $_ and $opts{'-message'} = $_; + } + + ## Need this for backward compatibility since we formerly used + ## options that were all uppercase words rather than ones that + ## looked like Unix command-line options. + ## to be uppercase keywords) + %opts = map { + my ($key, $val) = ($_, $opts{$_}); + $key =~ s/^(?=\w)/-/; + $key =~ /^-msg/i and $key = '-message'; + $key =~ /^-exit/i and $key = '-exitval'; + lc($key) => $val; + } (keys %opts); + + ## Now determine default -exitval and -verbose values to use + if ((! defined $opts{'-exitval'}) && (! defined $opts{'-verbose'})) { + $opts{'-exitval'} = 2; + $opts{'-verbose'} = 0; + } + elsif (! defined $opts{'-exitval'}) { + $opts{'-exitval'} = ($opts{'-verbose'} > 0) ? 1 : 2; + } + elsif (! defined $opts{'-verbose'}) { + $opts{'-verbose'} = (lc($opts{'-exitval'}) eq 'noexit' || + $opts{'-exitval'} < 2); + } + + ## Default the output file + $opts{'-output'} = (lc($opts{'-exitval'}) eq 'noexit' || + $opts{'-exitval'} < 2) ? \*STDOUT : \*STDERR + unless (defined $opts{'-output'}); + ## Default the input file + $opts{'-input'} = $0 unless (defined $opts{'-input'}); + + ## Look up input file in path if it doesnt exist. + unless ((ref $opts{'-input'}) || (-e $opts{'-input'})) { + my $basename = $opts{'-input'}; + my $pathsep = ($^O =~ /^(?:dos|os2|MSWin32)$/i) ? ';' + : (($^O eq 'MacOS' || $^O eq 'VMS') ? ',' : ':'); + my $pathspec = $opts{'-pathlist'} || $ENV{PATH} || $ENV{PERL5LIB}; + + my @paths = (ref $pathspec) ? @$pathspec : split($pathsep, $pathspec); + for my $dirname (@paths) { + $_ = File::Spec->catfile($dirname, $basename) if length; + last if (-e $_) && ($opts{'-input'} = $_); + } + } + + ## Now create a pod reader and constrain it to the desired sections. + my $parser = new Pod::Usage(USAGE_OPTIONS => \%opts); + if ($opts{'-verbose'} == 0) { + $parser->select('(?:SYNOPSIS|USAGE)\s*'); + } + elsif ($opts{'-verbose'} == 1) { + my $opt_re = '(?i)' . + '(?:OPTIONS|ARGUMENTS)' . + '(?:\s*(?:AND|\/)\s*(?:OPTIONS|ARGUMENTS))?'; + $parser->select( '(?:SYNOPSIS|USAGE)\s*', $opt_re, "DESCRIPTION/$opt_re" ); + } + elsif ($opts{'-verbose'} >= 2 && $opts{'-verbose'} != 99) { + $parser->select('.*'); + } + elsif ($opts{'-verbose'} == 99) { + my $sections = $opts{'-sections'}; + $parser->select( (ref $sections) ? @$sections : $sections ); + $opts{'-verbose'} = 1; + } + + ## Now translate the pod document and then exit with the desired status + if ( !$opts{'-noperldoc'} + and $opts{'-verbose'} >= 2 + and !ref($opts{'-input'}) + and $opts{'-output'} == \*STDOUT ) + { + ## spit out the entire PODs. Might as well invoke perldoc + my $progpath = File::Spec->catfile($Config{scriptdir}, 'perldoc'); + print { $opts{'-output'} } ($opts{'-message'}, "\n") if($opts{'-message'}); + if(defined $opts{-input} && $opts{-input} =~ /^\s*(\S.*?)\s*$/) { + # the perldocs back to 5.005 should all have -F + # without -F there are warnings in -T scripts + system($progpath, '-F', $1); + if($?) { + # RT16091: fall back to more if perldoc failed + system(($Config{pager} || $ENV{PAGER} || '/bin/more'), $1); + } + } else { + croak "Unspecified input file or insecure argument.\n"; + } + } + else { + $parser->parse_from_file($opts{'-input'}, $opts{'-output'}); + } + + exit($opts{'-exitval'}) unless (lc($opts{'-exitval'}) eq 'noexit'); +} + +##--------------------------------------------------------------------------- + +##------------------------------- +## Method definitions begin here +##------------------------------- + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my %params = @_; + my $self = {%params}; + bless $self, $class; + if ($self->can('initialize')) { + $self->initialize(); + } else { + $self = $self->SUPER::new(); + %$self = (%$self, %params); + } + return $self; +} + +sub select { + my ($self, @sections) = @_; + if ($ISA[0]->can('select')) { + $self->SUPER::select(@sections); + } else { + # we're using Pod::Simple - need to mimic the behavior of Pod::Select + my $add = ($sections[0] eq '+') ? shift(@sections) : ''; + ## Reset the set of sections to use + unless (@sections) { + delete $self->{USAGE_SELECT} unless ($add); + return; + } + $self->{USAGE_SELECT} = [] + unless ($add && $self->{USAGE_SELECT}); + my $sref = $self->{USAGE_SELECT}; + ## Compile each spec + for my $spec (@sections) { + my $cs = Pod::Select::_compile_section_spec($spec); + if ( defined $cs ) { + ## Store them in our sections array + push(@$sref, $cs); + } else { + carp qq{Ignoring section spec "$spec"!\n}; + } + } + } +} + +# Override Pod::Text->seq_i to return just "arg", not "*arg*". +sub seq_i { return $_[1] } + +# This overrides the Pod::Text method to do something very akin to what +# Pod::Select did as well as the work done below by preprocess_paragraph. +# Note that the below is very, very specific to Pod::Text. +sub _handle_element_end { + my ($self, $element) = @_; + if ($element eq 'head1') { + $self->{USAGE_HEADINGS} = [ $$self{PENDING}[-1][1] ]; + if ($self->{USAGE_OPTIONS}->{-verbose} < 2) { + $$self{PENDING}[-1][1] =~ s/^\s*SYNOPSIS\s*$/USAGE/; + } + } elsif ($element =~ /^head(\d+)$/ && $1) { # avoid 0 + my $idx = $1 - 1; + $self->{USAGE_HEADINGS} = [] unless($self->{USAGE_HEADINGS}); + $self->{USAGE_HEADINGS}->[$idx] = $$self{PENDING}[-1][1]; + } + if ($element =~ /^head\d+$/) { + $$self{USAGE_SKIPPING} = 1; + if (!$$self{USAGE_SELECT} || !@{ $$self{USAGE_SELECT} }) { + $$self{USAGE_SKIPPING} = 0; + } else { + my @headings = @{$$self{USAGE_HEADINGS}}; + for my $section_spec ( @{$$self{USAGE_SELECT}} ) { + my $match = 1; + for (my $i = 0; $i < $Pod::Select::MAX_HEADING_LEVEL; ++$i) { + $headings[$i] = '' unless defined $headings[$i]; + my $regex = $section_spec->[$i]; + my $negated = ($regex =~ s/^\!//); + $match &= ($negated ? ($headings[$i] !~ /${regex}/) + : ($headings[$i] =~ /${regex}/)); + last unless ($match); + } # end heading levels + if ($match) { + $$self{USAGE_SKIPPING} = 0; + last; + } + } # end sections + } + + # Try to do some lowercasing instead of all-caps in headings, and use + # a colon to end all headings. + if($self->{USAGE_OPTIONS}->{-verbose} < 2) { + local $_ = $$self{PENDING}[-1][1]; + s{([A-Z])([A-Z]+)}{((length($2) > 2) ? $1 : lc($1)) . lc($2)}ge; + s/\s*$/:/ unless (/:\s*$/); + $_ .= "\n"; + $$self{PENDING}[-1][1] = $_; + } + } + if ($$self{USAGE_SKIPPING} && $element !~ m/^over-/) { + pop @{ $$self{PENDING} }; + } else { + $self->SUPER::_handle_element_end($element); + } +} + +# required for Pod::Simple API +sub start_document { + my $self = shift; + $self->SUPER::start_document(); + my $msg = $self->{USAGE_OPTIONS}->{-message} or return 1; + my $out_fh = $self->output_fh(); + print $out_fh "$msg\n"; +} + +# required for old Pod::Parser API +sub begin_pod { + my $self = shift; + $self->SUPER::begin_pod(); ## Have to call superclass + my $msg = $self->{USAGE_OPTIONS}->{-message} or return 1; + my $out_fh = $self->output_handle(); + print $out_fh "$msg\n"; +} + +sub preprocess_paragraph { + my $self = shift; + local $_ = shift; + my $line = shift; + ## See if this is a heading and we arent printing the entire manpage. + if (($self->{USAGE_OPTIONS}->{-verbose} < 2) && /^=head/) { + ## Change the title of the SYNOPSIS section to USAGE + s/^=head1\s+SYNOPSIS\s*$/=head1 USAGE/; + ## Try to do some lowercasing instead of all-caps in headings + s{([A-Z])([A-Z]+)}{((length($2) > 2) ? $1 : lc($1)) . lc($2)}ge; + ## Use a colon to end all headings + s/\s*$/:/ unless (/:\s*$/); + $_ .= "\n"; + } + return $self->SUPER::preprocess_paragraph($_); +} + +1; # keep require happy diff --git a/ext/Pod-Parser/t/pod/contains_bad_pod.xr b/ext/Pod-Parser/t/pod/contains_bad_pod.xr new file mode 100644 index 0000000000..ad65663e22 --- /dev/null +++ b/ext/Pod-Parser/t/pod/contains_bad_pod.xr @@ -0,0 +1,5 @@ +=head foo + +bar baz. + +=cut diff --git a/ext/Pod-Parser/t/pod/contains_pod.t b/ext/Pod-Parser/t/pod/contains_pod.t new file mode 100644 index 0000000000..6326e72eb3 --- /dev/null +++ b/ext/Pod-Parser/t/pod/contains_pod.t @@ -0,0 +1,19 @@ +#!/usr/bin/env perl + +# Copyright (C) 2005 Joshua Hoblitt +# +# $Id$ + +use strict; + +use Test::More tests => 2; + +use Pod::Find qw( contains_pod ); + +{ + ok(contains_pod('t/pod/contains_pod.xr'), "contains pod"); +} + +{ + ok(contains_pod('t/pod/contains_bad_pod.xr'), "contains bad pod"); +} diff --git a/ext/Pod-Parser/t/pod/contains_pod.xr b/ext/Pod-Parser/t/pod/contains_pod.xr new file mode 100644 index 0000000000..7ea408de46 --- /dev/null +++ b/ext/Pod-Parser/t/pod/contains_pod.xr @@ -0,0 +1,5 @@ +=head1 foo + +bar baz. + +=cut diff --git a/ext/Pod-Parser/t/pod/emptycmd.t b/ext/Pod-Parser/t/pod/emptycmd.t new file mode 100644 index 0000000000..59e395ea04 --- /dev/null +++ b/ext/Pod-Parser/t/pod/emptycmd.t @@ -0,0 +1,21 @@ +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testp2pt.pl"; + import TestPodIncPlainText; +} + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodplaintext \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + +__END__ + +=pod + += this is a test +of the emergency +broadcast system + +=cut diff --git a/ext/Pod-Parser/t/pod/emptycmd.xr b/ext/Pod-Parser/t/pod/emptycmd.xr new file mode 100644 index 0000000000..f06d2dbb09 --- /dev/null +++ b/ext/Pod-Parser/t/pod/emptycmd.xr @@ -0,0 +1,2 @@ + = this is a test of the emergency broadcast system + diff --git a/ext/Pod-Parser/t/pod/find.t b/ext/Pod-Parser/t/pod/find.t new file mode 100644 index 0000000000..f6272d8b44 --- /dev/null +++ b/ext/Pod-Parser/t/pod/find.t @@ -0,0 +1,140 @@ +# Testing of Pod::Find +# Author: Marek Rouchal <marek@saftsack.fs.uni-bayreuth.de> + +$| = 1; + +use Test::More tests => 4; + +BEGIN { + # 1. load successful + use_ok('Pod::Find', qw(pod_find pod_where)); +} + +use File::Spec; + +require Cwd; +my $THISDIR = Cwd::cwd(); +my $VERBOSE = $ENV{PERL_CORE} ? 0 : ($ENV{TEST_VERBOSE} || 0); +my $lib_dir = File::Spec->catdir($THISDIR,'lib'); + +my $vms_unix_rpt = 0; +my $vms_efs = 0; +my $unix_mode = 1; + +if ($^O eq 'VMS') { + $lib_dir = VMS::Filespec::unixify(File::Spec->catdir($THISDIR,'-','lib','pod')); + $Qlib_dir = $lib_dir; + $Qlib_dir =~ s#\/#::#g; + + $unix_mode = 0; + if (eval 'require VMS::Feature') { + $vms_unix_rpt = VMS::Feature::current("filename_unix_report"); + $vms_efs = VMS::Feature::current("efs_charset"); + } else { + my $unix_rpt = $ENV{'DECC$FILENAME_UNIX_REPORT'} || ''; + my $efs_charset = $ENV{'DECC$EFS_CHARSET'} || ''; + $vms_unix_rpt = $unix_rpt =~ /^[ET1]/i; + $vms_efs = $efs_charset =~ /^[ET1]/i; + } + + # Traditional VMS mode only if VMS is not in UNIX compatible mode. + $unix_mode = ($vms_efs && $vms_unix_rpt); +} + +print "### 2. searching $lib_dir\n"; +my %pods = pod_find($lib_dir); +my $result = join(',', sort values %pods); +print "### found $result\n"; +my $compare = join(',', sort qw( + Pod::Checker + Pod::Find + Pod::InputObjects + Pod::ParseUtils + Pod::Parser + Pod::PlainText + Pod::Select + Pod::Usage +)); +if ($^O eq 'VMS') { + $compare = lc($compare); + my $undollared = $Qlib_dir; + $undollared =~ s/\$/\\\$/g; + $undollared =~ s/\-/\\\-/g; + $result =~ s/$undollared/pod::/g; + $result =~ s/\$//g; + my $count = 0; + my @result = split(/,/,$result); + my @compare = split(/,/,$compare); + foreach(@compare) { + $count += grep {/$_/} @result; + } + is($count/($#result+1)-1,$#compare); +} +elsif (File::Spec->case_tolerant || $^O eq 'dos') { + is(lc $result,lc $compare); +} +else { + is($result,$compare); +} + +print "### 3. searching for File::Find\n"; +$result = pod_where({ -inc => 1, -verbose => $VERBOSE }, 'File::Find') + || 'undef - pod not found!'; +print "### found $result\n"; + +require Config; +if ($^O eq 'VMS') { # privlib is perl_root:[lib] OK but not under mms + if ($unix_mode) { + $compare = "../lib/File/Find.pm"; + } else { + $compare = "lib.File]Find.pm"; + } + $result =~ s/perl_root:\[\-?\.?//i; + $result =~ s/\[\-?\.?//i; # needed under `mms test` + is($result,$compare); +} +else { + $compare = $ENV{PERL_CORE} ? + File::Spec->catfile(File::Spec->updir, File::Spec->updir, 'lib','File','Find.pm') + : File::Spec->catfile($Config::Config{privlibexp},"File","Find.pm"); + my $resfile = _canon($result); + my $cmpfile = _canon($compare); + if($^O =~ /dos|win32/i && $resfile =~ /~\d(?=\\|$)/) { + # we have ~1 short filenames + $resfile = quotemeta($resfile); + $resfile =~ s/\\~\d(?=\\|$)/[^\\\\]+/g; + ok($cmpfile =~ /^$resfile$/, "pod_where found File::Find (with long filename matching)") || + diag("'$cmpfile' does not match /^$resfile\$/"); + } else { + is($resfile,$cmpfile,"pod_where found File::Find"); + } +} + +# Search for a documentation pod rather than a module +my $searchpod = 'Stuff'; +print "### 4. searching for $searchpod.pod\n"; +$result = pod_where( + { -dirs => [ File::Spec->catdir( qw(t), 'pod', 'testpods', 'lib', 'Pod') ], + -verbose => $VERBOSE }, $searchpod) + || "undef - $searchpod.pod not found!"; +print "### found $result\n"; + +$compare = File::Spec->catfile( + qw(t), 'pod', 'testpods', 'lib', 'Pod' ,'Stuff.pm'); +is(_canon($result),_canon($compare)); + + +# make the path as generic as possible +sub _canon +{ + my ($path) = @_; + $path = File::Spec->canonpath($path); + my @comp = File::Spec->splitpath($path); + my @dir = File::Spec->splitdir($comp[1]); + $comp[1] = File::Spec->catdir(@dir); + $path = File::Spec->catpath(@comp); + $path = uc($path) if File::Spec->case_tolerant; + print "### general path: $path\n" if $VERBOSE; + $path; +} + diff --git a/ext/Pod-Parser/t/pod/for.t b/ext/Pod-Parser/t/pod/for.t new file mode 100644 index 0000000000..44af44f17d --- /dev/null +++ b/ext/Pod-Parser/t/pod/for.t @@ -0,0 +1,59 @@ +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testp2pt.pl"; + import TestPodIncPlainText; +} + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodplaintext \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + + +__END__ + + +=pod + +This is a test + +=for theloveofpete +You shouldn't see this +or this +or this + +=for text +pod2text should see this +and this +and this + +and everything should see this! + +=begin text + +Similarly, this line ... + +and this one ... + +as well this one, + +should all be in pod2text output + +=end text + +Tweedley-deedley-dee, Im as happy as can be! +Tweedley-deedley-dum, cuz youre my honey sugar plum! + +=begin atthebeginning + +But I expect to see neither hide ... + +nor tail ... + +of this text + +=end atthebeginning + +The rest of this should show up in everything. + diff --git a/ext/Pod-Parser/t/pod/for.xr b/ext/Pod-Parser/t/pod/for.xr new file mode 100644 index 0000000000..5f6b8b2ce8 --- /dev/null +++ b/ext/Pod-Parser/t/pod/for.xr @@ -0,0 +1,21 @@ + This is a test + + pod2text should see this + and this + and this + + and everything should see this! + +Similarly, this line ... + +and this one ... + +as well this one, + +should all be in pod2text output + + Tweedley-deedley-dee, Im as happy as can be! Tweedley-deedley-dum, cuz + youre my honey sugar plum! + + The rest of this should show up in everything. + diff --git a/ext/Pod-Parser/t/pod/headings.t b/ext/Pod-Parser/t/pod/headings.t new file mode 100644 index 0000000000..78608d0fd9 --- /dev/null +++ b/ext/Pod-Parser/t/pod/headings.t @@ -0,0 +1,140 @@ +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testp2pt.pl"; + import TestPodIncPlainText; +} + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodplaintext \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + + +__END__ + + +################################################################# + use Pod::Usage; + pod2usage( VERBOSE => 2, EXIT => 1 ); + +=pod + +=head1 NAME + +B<rdb2pg> - insert an rdb table into a PostgreSQL database + +=head1 SYNOPSIS + +B<rdb2pg> [I<param>=I<value> ...] + +=head1 PARAMETERS + +B<rdb2pg> uses an IRAF-compatible parameter interface. +A template parameter file is in F</proj/axaf/simul/lib/uparm/rdb2pg.par>. + +=over 4 + +=item B<input> I<file> + +The B<RDB> file to insert into the database. If the given name +is the string C<stdin>, it reads from the UNIX standard input stream. + + +=back + +=head1 DESCRIPTION + +B<rdb2pg> will enter the data from an B<RDB> database into a +PostgreSQL database table, optionally creating the database and the +table if they do not exist. It automatically determines the +PostgreSQL data type from the column definition in the B<RDB> file, +but may be overriden via a series of definition files or directly +via one of its parameters. + +The target database and table are specified by the C<db> and C<table> +parameters. If they do not exist, and the C<createdb> parameter is +set, they will be created. Table field definitions are determined +in the following order: + +=cut + +################################################################# + +results in: + + +################################################################# + + rdb2pg - insert an rdb table into a PostgreSQL database + + rdb2pg [*param*=*value* ...] + + rdb2pg uses an IRAF-compatible parameter interface. A template + parameter file is in /proj/axaf/simul/lib/uparm/rdb2pg.par. + + The RDB file to insert into the database. If the given name is + the string `stdin', it reads from the UNIX standard input + stream. + + rdb2pg will enter the data from an RDB database into a + PostgreSQL database table, optionally creating the database and + the table if they do not exist. It automatically determines the + PostgreSQL data type from the column definition in the RDB file, + but may be overriden via a series of definition files or + directly via one of its parameters. + + The target database and table are specified by the `db' and + `table' parameters. If they do not exist, and the `createdb' + parameter is set, they will be created. Table field definitions + are determined in the following order: + + +################################################################# + +while the original version of Text (using pod2text) gives + +################################################################# + +NAME + rdb2pg - insert an rdb table into a PostgreSQL database + +SYNOPSIS + rdb2pg [*param*=*value* ...] + +PARAMETERS + rdb2pg uses an IRAF-compatible parameter interface. A template + parameter file is in /proj/axaf/simul/lib/uparm/rdb2pg.par. + + input *file* + The RDB file to insert into the database. If the given name + is the string `stdin', it reads from the UNIX standard input + stream. + +DESCRIPTION + rdb2pg will enter the data from an RDB database into a + PostgreSQL database table, optionally creating the database and + the table if they do not exist. It automatically determines the + PostgreSQL data type from the column definition in the RDB file, + but may be overriden via a series of definition files or + directly via one of its parameters. + + The target database and table are specified by the `db' and + `table' parameters. If they do not exist, and the `createdb' + parameter is set, they will be created. Table field definitions + are determined in the following order: + + +################################################################# + + +Thanks for any help. If, as your email indicates, you've not much +time to look at this, I can work around things by calling pod2text() +directly using the official Text.pm. + +Diab + +------------- +Diab Jerius +djerius@cfa.harvard.edu + diff --git a/ext/Pod-Parser/t/pod/headings.xr b/ext/Pod-Parser/t/pod/headings.xr new file mode 100644 index 0000000000..fb37a2b0cf --- /dev/null +++ b/ext/Pod-Parser/t/pod/headings.xr @@ -0,0 +1,26 @@ +NAME + rdb2pg - insert an rdb table into a PostgreSQL database + +SYNOPSIS + rdb2pg [*param*=*value* ...] + +PARAMETERS + rdb2pg uses an IRAF-compatible parameter interface. A template parameter + file is in /proj/axaf/simul/lib/uparm/rdb2pg.par. + + input *file* + The RDB file to insert into the database. If the given name is the + string `stdin', it reads from the UNIX standard input stream. + +DESCRIPTION + rdb2pg will enter the data from an RDB database into a PostgreSQL + database table, optionally creating the database and the table if they + do not exist. It automatically determines the PostgreSQL data type from + the column definition in the RDB file, but may be overriden via a series + of definition files or directly via one of its parameters. + + The target database and table are specified by the `db' and `table' + parameters. If they do not exist, and the `createdb' parameter is set, + they will be created. Table field definitions are determined in the + following order: + diff --git a/ext/Pod-Parser/t/pod/include.t b/ext/Pod-Parser/t/pod/include.t new file mode 100644 index 0000000000..4e73b78356 --- /dev/null +++ b/ext/Pod-Parser/t/pod/include.t @@ -0,0 +1,36 @@ +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testp2pt.pl"; + import TestPodIncPlainText; +} + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodplaintext \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + + +__END__ + + +=pod + +This file tries to demonstrate a simple =include directive +for pods. It is used as follows: + + =include filename + +where "filename" is expected to be an absolute pathname, or else +reside be relative to the directory in which the current processed +podfile resides, or be relative to the current directory. + +Lets try it out with the file "included.t" shall we. + +***THIS TEXT IS IMMEDIATELY BEFORE THE INCLUDE*** + +=include included.t + +***THIS TEXT IS IMMEDIATELY AFTER THE INCLUDE*** + +So how did we do??? diff --git a/ext/Pod-Parser/t/pod/include.xr b/ext/Pod-Parser/t/pod/include.xr new file mode 100644 index 0000000000..624ee44447 --- /dev/null +++ b/ext/Pod-Parser/t/pod/include.xr @@ -0,0 +1,22 @@ + This file tries to demonstrate a simple =include directive for pods. It + is used as follows: + + =include filename + + where "filename" is expected to be an absolute pathname, or else reside + be relative to the directory in which the current processed podfile + resides, or be relative to the current directory. + + Lets try it out with the file "included.t" shall we. + + ***THIS TEXT IS IMMEDIATELY BEFORE THE INCLUDE*** + +###### begin =include included.t ##### + This is the text of the included file named "included.t". It should + appear in the final pod document from pod2xxx + +###### end =include included.t ##### + ***THIS TEXT IS IMMEDIATELY AFTER THE INCLUDE*** + + So how did we do??? + diff --git a/ext/Pod-Parser/t/pod/included.t b/ext/Pod-Parser/t/pod/included.t new file mode 100644 index 0000000000..4f171c454b --- /dev/null +++ b/ext/Pod-Parser/t/pod/included.t @@ -0,0 +1,35 @@ +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testp2pt.pl"; + import TestPodIncPlainText; +} + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodplaintext \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + + +__END__ + + +##------------------------------------------------------------ +# This file is =included by "include.t" +# +# This text should NOT be in the resultant pod document +# because we havent seen an =xxx pod directive in this file! +##------------------------------------------------------------ + +=pod + +This is the text of the included file named "included.t". +It should appear in the final pod document from pod2xxx + +=cut + +##------------------------------------------------------------ +# This text should NOT be in the resultant pod document +# because it is *after* an =cut an no other pod directives +# proceed it! +##------------------------------------------------------------ diff --git a/ext/Pod-Parser/t/pod/included.xr b/ext/Pod-Parser/t/pod/included.xr new file mode 100644 index 0000000000..54142fa0d3 --- /dev/null +++ b/ext/Pod-Parser/t/pod/included.xr @@ -0,0 +1,3 @@ + This is the text of the included file named "included.t". It should + appear in the final pod document from pod2xxx + diff --git a/ext/Pod-Parser/t/pod/lref.t b/ext/Pod-Parser/t/pod/lref.t new file mode 100644 index 0000000000..02e2c9e307 --- /dev/null +++ b/ext/Pod-Parser/t/pod/lref.t @@ -0,0 +1,66 @@ +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testp2pt.pl"; + import TestPodIncPlainText; +} + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodplaintext \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + + +__END__ + + +=pod + +Try out I<LOTS> of different ways of specifying references: + +Reference the L<manpage/section> + +Reference the L<manpage / section> + +Reference the L<manpage/ section> + +Reference the L<manpage /section> + +Reference the L<"manpage/section"> + +Reference the L<"manpage"/section> + +Reference the L<manpage/"section"> + +Reference the L<manpage/ +section> + +Reference the L<manpage +/section> + +Now try it using the new "|" stuff ... + +Reference the L<thistext|manpage/section> + +Reference the L<thistext | manpage / section> + +Reference the L<thistext| manpage/ section> + +Reference the L<thistext |manpage /section> + +Reference the L<thistext| +"manpage/section"> + +Reference the L<thistext +|"manpage"/section> + +Reference the L<thistext|manpage/"section"> + +Reference the L<thistext| +manpage/ +section> + +Reference the L<thistext +|manpage +/section> + diff --git a/ext/Pod-Parser/t/pod/lref.xr b/ext/Pod-Parser/t/pod/lref.xr new file mode 100644 index 0000000000..297053b1ac --- /dev/null +++ b/ext/Pod-Parser/t/pod/lref.xr @@ -0,0 +1,40 @@ + Try out *LOTS* of different ways of specifying references: + + Reference the the section entry in the manpage manpage + + Reference the the section entry in the manpage manpage + + Reference the the section entry in the manpage manpage + + Reference the the section entry in the manpage manpage + + Reference the the section on "manpage/section" + + Reference the the section entry in the "manpage" manpage + + Reference the the section on "section" in the manpage manpage + + Reference the the section entry in the manpage manpage + + Reference the the section entry in the manpage manpage + + Now try it using the new "|" stuff ... + + Reference the thistext + + Reference the thistext + + Reference the thistext + + Reference the thistext + + Reference the thistext + + Reference the thistext + + Reference the thistext + + Reference the thistext + + Reference the thistext + diff --git a/ext/Pod-Parser/t/pod/multiline_items.t b/ext/Pod-Parser/t/pod/multiline_items.t new file mode 100644 index 0000000000..0fe410a4e6 --- /dev/null +++ b/ext/Pod-Parser/t/pod/multiline_items.t @@ -0,0 +1,31 @@ +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testp2pt.pl"; + import TestPodIncPlainText; +} + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodplaintext \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + + +__END__ + + +=head1 Test multiline item lists + +This is a test to ensure that multiline =item paragraphs +get indented appropriately. + +=over 4 + +=item This +is +a +test. + +=back + +=cut diff --git a/ext/Pod-Parser/t/pod/multiline_items.xr b/ext/Pod-Parser/t/pod/multiline_items.xr new file mode 100644 index 0000000000..9eea63a8f0 --- /dev/null +++ b/ext/Pod-Parser/t/pod/multiline_items.xr @@ -0,0 +1,6 @@ +Test multiline item lists + This is a test to ensure that multiline =item paragraphs get indented + appropriately. + + This is a test. + diff --git a/ext/Pod-Parser/t/pod/nested_items.t b/ext/Pod-Parser/t/pod/nested_items.t new file mode 100644 index 0000000000..c8e9b22427 --- /dev/null +++ b/ext/Pod-Parser/t/pod/nested_items.t @@ -0,0 +1,64 @@ +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testp2pt.pl"; + import TestPodIncPlainText; +} + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodplaintext \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + + +__END__ + + +=head1 Test nested item lists + +This is a test to ensure the nested =item paragraphs +get indented appropriately. + +=over 2 + +=item 1 + +First section. + +=over 2 + +=item a + +this is item a + +=item b + +this is item b + +=back + +=item 2 + +Second section. + +=over 2 + +=item a + +this is item a + +=item b + +this is item b + +=item c + +=item d + +This is item c & d. + +=back + +=back + +=cut diff --git a/ext/Pod-Parser/t/pod/nested_items.xr b/ext/Pod-Parser/t/pod/nested_items.xr new file mode 100644 index 0000000000..dd1adac127 --- /dev/null +++ b/ext/Pod-Parser/t/pod/nested_items.xr @@ -0,0 +1,19 @@ +Test nested item lists + This is a test to ensure the nested =item paragraphs get indented + appropriately. + + 1 First section. + + a this is item a + + b this is item b + + 2 Second section. + + a this is item a + + b this is item b + + c + d This is item c & d. + diff --git a/ext/Pod-Parser/t/pod/nested_seqs.t b/ext/Pod-Parser/t/pod/nested_seqs.t new file mode 100644 index 0000000000..8559f1f25f --- /dev/null +++ b/ext/Pod-Parser/t/pod/nested_seqs.t @@ -0,0 +1,23 @@ +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testp2pt.pl"; + import TestPodIncPlainText; +} + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodplaintext \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + + +__END__ + + +=pod + +The statement: C<This is dog kind's I<finest> hour!> is a parody of a +quotation from Winston Churchill. + +=cut + diff --git a/ext/Pod-Parser/t/pod/nested_seqs.xr b/ext/Pod-Parser/t/pod/nested_seqs.xr new file mode 100644 index 0000000000..f981061f94 --- /dev/null +++ b/ext/Pod-Parser/t/pod/nested_seqs.xr @@ -0,0 +1,3 @@ + The statement: `This is dog kind's *finest* hour!' is a parody of a + quotation from Winston Churchill. + diff --git a/ext/Pod-Parser/t/pod/oneline_cmds.t b/ext/Pod-Parser/t/pod/oneline_cmds.t new file mode 100644 index 0000000000..28bd1d09e5 --- /dev/null +++ b/ext/Pod-Parser/t/pod/oneline_cmds.t @@ -0,0 +1,46 @@ +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testp2pt.pl"; + import TestPodIncPlainText; +} + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodplaintext \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + + +__END__ + + +==head1 NAME +B<rdb2pg> - insert an rdb table into a PostgreSQL database + +==head1 SYNOPSIS +B<rdb2pg> [I<param>=I<value> ...] + +==head1 PARAMETERS +B<rdb2pg> uses an IRAF-compatible parameter interface. +A template parameter file is in F</proj/axaf/simul/lib/uparm/rdb2pg.par>. + +==over 4 +==item B<input> I<file> +The B<RDB> file to insert into the database. If the given name +is the string C<stdin>, it reads from the UNIX standard input stream. + +==back + +==head1 DESCRIPTION +B<rdb2pg> will enter the data from an B<RDB> database into a +PostgreSQL database table, optionally creating the database and the +table if they do not exist. It automatically determines the +PostgreSQL data type from the column definition in the B<RDB> file, +but may be overriden via a series of definition files or directly +via one of its parameters. + +The target database and table are specified by the C<db> and C<table> +parameters. If they do not exist, and the C<createdb> parameter is +set, they will be created. Table field definitions are determined +in the following order: + diff --git a/ext/Pod-Parser/t/pod/oneline_cmds.xr b/ext/Pod-Parser/t/pod/oneline_cmds.xr new file mode 100644 index 0000000000..fb37a2b0cf --- /dev/null +++ b/ext/Pod-Parser/t/pod/oneline_cmds.xr @@ -0,0 +1,26 @@ +NAME + rdb2pg - insert an rdb table into a PostgreSQL database + +SYNOPSIS + rdb2pg [*param*=*value* ...] + +PARAMETERS + rdb2pg uses an IRAF-compatible parameter interface. A template parameter + file is in /proj/axaf/simul/lib/uparm/rdb2pg.par. + + input *file* + The RDB file to insert into the database. If the given name is the + string `stdin', it reads from the UNIX standard input stream. + +DESCRIPTION + rdb2pg will enter the data from an RDB database into a PostgreSQL + database table, optionally creating the database and the table if they + do not exist. It automatically determines the PostgreSQL data type from + the column definition in the RDB file, but may be overriden via a series + of definition files or directly via one of its parameters. + + The target database and table are specified by the `db' and `table' + parameters. If they do not exist, and the `createdb' parameter is set, + they will be created. Table field definitions are determined in the + following order: + diff --git a/ext/Pod-Parser/t/pod/p2u_data.pl b/ext/Pod-Parser/t/pod/p2u_data.pl new file mode 100644 index 0000000000..ec0e3a7e50 --- /dev/null +++ b/ext/Pod-Parser/t/pod/p2u_data.pl @@ -0,0 +1,18 @@ +use Pod::Usage; +pod2usage(-verbose => 2, -exit => 17, -input => \*DATA); + +__DATA__ +=head1 NAME + +Test + +=head1 SYNOPSIS + +perl podusagetest.pl + +=head1 DESCRIPTION + +This is a test. + +=cut + diff --git a/ext/Pod-Parser/t/pod/pod2usage.t b/ext/Pod-Parser/t/pod/pod2usage.t new file mode 100644 index 0000000000..cf2c31b83f --- /dev/null +++ b/ext/Pod-Parser/t/pod/pod2usage.t @@ -0,0 +1,18 @@ +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testp2pt.pl"; + import TestPodIncPlainText; +} + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodplaintext \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + + +__END__ + +=include pod2usage.PL + + diff --git a/ext/Pod-Parser/t/pod/pod2usage.xr b/ext/Pod-Parser/t/pod/pod2usage.xr new file mode 100644 index 0000000000..b7c3da563e --- /dev/null +++ b/ext/Pod-Parser/t/pod/pod2usage.xr @@ -0,0 +1,57 @@ +###### begin =include pod2usage.PL ##### +NAME + pod2usage - print usage messages from embedded pod docs in files + +SYNOPSIS + pod2usage [-help] [-man] [-exit *exitval*] [-output *outfile*] + [-verbose *level*] [-pathlist *dirlist*] *file* + +OPTIONS AND ARGUMENTS + -help Print a brief help message and exit. + + -man Print this command's manual page and exit. + + -exit *exitval* + The exit status value to return. + + -output *outfile* + The output file to print to. If the special names "-" or ">&1" + or ">&STDOUT" are used then standard output is used. If ">&2" or + ">&STDERR" is used then standard error is used. + + -verbose *level* + The desired level of verbosity to use: + + 1 : print SYNOPSIS only + 2 : print SYNOPSIS sections and any OPTIONS/ARGUMENTS sections + 3 : print the entire manpage (similar to running pod2text) + + -pathlist *dirlist* + Specifies one or more directories to search for the input file + if it was not supplied with an absolute path. Each directory + path in the given list should be separated by a ':' on Unix (';' + on MSWin32 and DOS). + + *file* The pathname of a file containing pod documentation to be output + in usage message format (defaults to standard input). + +DESCRIPTION + pod2usage will read the given input file looking for pod documentation + and will print the corresponding usage message. If no input file is + specified then standard input is read. + + pod2usage invokes the pod2usage() function in the Pod::Usage module. + Please see the pod2usage() entry in the Pod::Usage manpage. + +SEE ALSO + the Pod::Usage manpage, the pod2text(1) manpage + +AUTHOR + Please report bugs using http://rt.cpan.org. + + Brad Appleton <bradapp@enteract.com> + + Based on code for pod2text(1) written by Tom Christiansen + <tchrist@mox.perl.com> + +###### end =include pod2usage.PL ##### diff --git a/ext/Pod-Parser/t/pod/pod2usage2.t b/ext/Pod-Parser/t/pod/pod2usage2.t new file mode 100644 index 0000000000..80678a019f --- /dev/null +++ b/ext/Pod-Parser/t/pod/pod2usage2.t @@ -0,0 +1,357 @@ +#!/usr/bin/perl -w + +use Test::More; +use strict; + +BEGIN { + if ($^O eq 'MSWin32' || $^O eq 'VMS') { + plan skip_all => "Not portable on Win32 or VMS\n"; + } + else { + plan tests => 34; + } + use_ok ("Pod::Usage"); +} + +sub getoutput +{ + my ($code) = @_; + my $pid = open(TEST_IN, "-|"); + unless(defined $pid) { + die "Cannot fork: $!"; + } + if($pid) { + # parent + my @out = <TEST_IN>; + close(TEST_IN); + my $exit = $?>>8; + s/^/#/ for @out; + local $" = ""; + print "#EXIT=$exit OUTPUT=+++#@out#+++\n"; + return($exit, join("",@out)); + } + # child + open(STDERR, ">&STDOUT"); + Test::More->builder->no_ending(1); + &$code; + print "--NORMAL-RETURN--\n"; + exit 0; +} + +sub compare +{ + my ($left,$right) = @_; + $left =~ s/^#\s+/#/gm; + $right =~ s/^#\s+/#/gm; + $left =~ s/\s+/ /gm; + $right =~ s/\s+/ /gm; + $left eq $right; +} + +SKIP: { +if('Pod::Usage'->isa('Pod::Text') && $Pod::Text::VERSION < 2.18) { + skip("Formatting with Pod::Text $Pod::Text::VERSION not reliable", 33); +} + +my ($exit, $text) = getoutput( sub { pod2usage() } ); +is ($exit, 2, "Exit status pod2usage ()"); +ok (compare ($text, <<'EOT'), "Output test pod2usage ()"); +#Usage: +# frobnicate [ -r | --recursive ] [ -f | --force ] file ... +# +EOT + +($exit, $text) = getoutput( sub { pod2usage( + -message => 'You naughty person, what did you say?', + -verbose => 1 ) }); +is ($exit, 1, "Exit status pod2usage (-message => '...', -verbose => 1)"); +ok (compare ($text, <<'EOT'), "Output test pod2usage (-message => '...', -verbose => 1)") or diag("Got:\n$text\n"); +#You naughty person, what did you say? +# Usage: +# frobnicate [ -r | --recursive ] [ -f | --force ] file ... +# +# Options: +# -r | --recursive +# Run recursively. +# +# -f | --force +# Just do it! +# +# -n number +# Specify number of frobs, default is 42. +# +EOT + +($exit, $text) = getoutput( sub { pod2usage( + -verbose => 2, -exit => 42 ) } ); +is ($exit, 42, "Exit status pod2usage (-verbose => 2, -exit => 42)"); +ok (compare ($text, <<'EOT'), "Output test pod2usage (-verbose => 2, -exit => 42)"); +#NAME +# frobnicate - do what I mean +# +# SYNOPSIS +# frobnicate [ -r | --recursive ] [ -f | --force ] file ... +# +# DESCRIPTION +# frobnicate does foo and bar and what not. +# +# OPTIONS +# -r | --recursive +# Run recursively. +# +# -f | --force +# Just do it! +# +# -n number +# Specify number of frobs, default is 42. +# +EOT + +($exit, $text) = getoutput( sub { pod2usage(0) } ); +is ($exit, 0, "Exit status pod2usage (0)"); +ok (compare ($text, <<'EOT'), "Output test pod2usage (0)"); +#Usage: +# frobnicate [ -r | --recursive ] [ -f | --force ] file ... +# +# Options: +# -r | --recursive +# Run recursively. +# +# -f | --force +# Just do it! +# +# -n number +# Specify number of frobs, default is 42. +# +EOT + +($exit, $text) = getoutput( sub { pod2usage(42) } ); +is ($exit, 42, "Exit status pod2usage (42)"); +ok (compare ($text, <<'EOT'), "Output test pod2usage (42)"); +#Usage: +# frobnicate [ -r | --recursive ] [ -f | --force ] file ... +# +EOT + +($exit, $text) = getoutput( sub { pod2usage(-verbose => 0, -exit => 'NOEXIT') } ); +is ($exit, 0, "Exit status pod2usage (-verbose => 0, -exit => 'NOEXIT')"); +ok (compare ($text, <<'EOT'), "Output test pod2usage (-verbose => 0, -exit => 'NOEXIT')"); +#Usage: +# frobnicate [ -r | --recursive ] [ -f | --force ] file ... +# +# --NORMAL-RETURN-- +EOT + +($exit, $text) = getoutput( sub { pod2usage(-verbose => 99, -sections => 'DESCRIPTION') } ); +is ($exit, 1, "Exit status pod2usage (-verbose => 99, -sections => 'DESCRIPTION')"); +ok (compare ($text, <<'EOT'), "Output test pod2usage (-verbose => 99, -sections => 'DESCRIPTION')"); +#Description: +# frobnicate does foo and bar and what not. +# +EOT + +# does the __DATA__ work ok as input +my (@blib, $test_script, $pod_file1, , $pod_file2); +if (!$ENV{PERL_CORE}) { + @blib = '-Mblib'; +} +$test_script = File::Spec->catfile(qw(t pod p2u_data.pl)); +$pod_file1 = File::Spec->catfile(qw(t pod usage.pod)); +$pod_file2 = File::Spec->catfile(qw(t pod usage2.pod)); + + +($exit, $text) = getoutput( sub { system($^X, @blib, $test_script); exit($? >> 8); } ); +$text =~ s{#Using.*/blib.*\n}{}; # older blib's emit something to STDERR +is ($exit, 17, "Exit status pod2usage (-verbose => 2, -input => \*DATA)"); +ok (compare ($text, <<'EOT'), "Output test pod2usage (-verbose => 2, -input => \*DATA)") or diag "Got:\n$text\n"; +#NAME +# Test +# +#SYNOPSIS +# perl podusagetest.pl +# +#DESCRIPTION +# This is a test. +# +EOT + +# test that SYNOPSIS and USAGE are printed +($exit, $text) = getoutput( sub { pod2usage(-input => $pod_file1, + -exitval => 0, -verbose => 0); }); +$text =~ s{#Using.*/blib.*\n}{}; # older blib's emit something to STDERR +is ($exit, 0, "Exit status pod2usage with USAGE"); +ok (compare ($text, <<'EOT'), "Output test pod2usage with USAGE") or diag "Got:\n$text\n"; +#Usage: +# This is a test for CPAN#33020 +# +#Usage: +# And this will be also printed. +# +EOT + +# test that SYNOPSIS and USAGE are printed with options +($exit, $text) = getoutput( sub { pod2usage(-input => $pod_file1, + -exitval => 0, -verbose => 1); }); +$text =~ s{#Using.*/blib.*\n}{}; # older blib's emit something to STDERR +is ($exit, 0, "Exit status pod2usage with USAGE and verbose=1"); +ok (compare ($text, <<'EOT'), "Output test pod2usage with USAGE and verbose=1") or diag "Got:\n$text\n"; +#Usage: +# This is a test for CPAN#33020 +# +#Usage: +# And this will be also printed. +# +#Options: +# And this with verbose == 1 +# +EOT + +# test that only USAGE is printed when requested +($exit, $text) = getoutput( sub { pod2usage(-input => $pod_file1, + -exitval => 0, -verbose => 99, -sections => 'USAGE'); }); +$text =~ s{#Using.*/blib.*\n}{}; # older blib's emit something to STDERR +is ($exit, 0, "Exit status pod2usage with USAGE and verbose=99"); +ok (compare ($text, <<'EOT'), "Output test pod2usage with USAGE and verbose=99") or diag "Got:\n$text\n"; +#Usage: +# This is a test for CPAN#33020 +# +EOT + +# test with pod_where +use_ok('Pod::Find', qw(pod_where)); + +($exit, $text) = getoutput( sub { pod2usage( -input => pod_where({-inc => 1}, 'Pod::Usage'), + -exitval => 0, -verbose => 0) } ); +$text =~ s{#Using.*/blib.*\n}{}; # older blib's emit something to STDERR +is ($exit, 0, "Exit status pod2usage with Pod::Find"); +ok (compare ($text, <<'EOT'), "Output test pod2usage with Pod::Find") or diag "Got:\n$text\n"; +#Usage: +# use Pod::Usage +# +# my $message_text = "This text precedes the usage message."; +# my $exit_status = 2; ## The exit status to use +# my $verbose_level = 0; ## The verbose level to use +# my $filehandle = \*STDERR; ## The filehandle to write to +# +# pod2usage($message_text); +# +# pod2usage($exit_status); +# +# pod2usage( { -message => $message_text , +# -exitval => $exit_status , +# -verbose => $verbose_level, +# -output => $filehandle } ); +# +# pod2usage( -msg => $message_text , +# -exitval => $exit_status , +# -verbose => $verbose_level, +# -output => $filehandle ); +# +# pod2usage( -verbose => 2, +# -noperldoc => 1 ) +# +EOT + +# verify that sections are correctly found after nested headings +($exit, $text) = getoutput( sub { pod2usage(-input => $pod_file2, + -exitval => 0, -verbose => 99, + -sections => [qw(BugHeader BugHeader/.*')]) }); +$text =~ s{#Using.*/blib.*\n}{}; # older blib's emit something to STDERR +is ($exit, 0, "Exit status pod2usage with nested headings"); +ok (compare ($text, <<'EOT'), "Output test pod2usage with nested headings") or diag "Got:\n$text\n"; +#BugHeader: +# Some text +# +# BugHeader2: +# More +# Still More +# +EOT + +# Verify that =over =back work OK +($exit, $text) = getoutput( sub { + pod2usage(-input => $pod_file2, + -exitval => 0, -verbose => 99, -sections => 'BugHeader/BugHeader2') } ); +$text =~ s{#Using.*/blib.*\n}{}; # older blib's emit something to STDERR +is ($exit, 0, "Exit status pod2usage with over/back"); +ok (compare ($text, <<'EOT'), "Output test pod2usage with over/back") or diag "Got:\n$text\n"; +# BugHeader2: +# More +# Still More +# +EOT + +# new array API for -sections +($exit, $text) = getoutput( sub { + pod2usage(-input => $pod_file2, + -exitval => 0, -verbose => 99, -sections => [qw(Heading-1/!.+ Heading-2/.+)]) } ); +$text =~ s{#Using.*/blib.*\n}{}; # older blib's emit something to STDERR +is ($exit, 0, "Exit status pod2usage with -sections => []"); +ok (compare ($text, <<'EOT'), "Output test pod2usage with -sections => []") or diag "Got:\n$text\n"; +#Heading-1: +# One +# Two +# +# Heading-2.2: +# More text. +# +EOT + +# allow subheadings in OPTIONS and ARGUMENTS +($exit, $text) = getoutput( sub { + pod2usage(-input => $pod_file2, + -exitval => 0, -verbose => 1) } ); +$text =~ s{#Using.*/blib.*\n}{}; # older blib's emit something to STDERR +$text =~ s{[*](destination|files)[*]}{$1}g; # strip * chars +is ($exit, 0, "Exit status pod2usage with subheadings in OPTIONS"); +ok (compare ($text, <<'EOT'), "Output test pod2usage with subheadings in OPTIONS") or diag "Got:\n$text\n"; +#Options and Arguments: +# Arguments: +# The required arguments (which typically follow any options on the +# command line) are: +# +# destination +# files +# +# Options: +# Options may be abbreviated. Options which take values may be separated +# from the values by whitespace or the "=" character. +# +EOT +} # end SKIP + +__END__ + +=head1 NAME + +frobnicate - do what I mean + +=head1 SYNOPSIS + +B<frobnicate> S<[ B<-r> | B<--recursive> ]> S<[ B<-f> | B<--force> ]> + file ... + +=head1 DESCRIPTION + +B<frobnicate> does foo and bar and what not. + +=head1 OPTIONS + +=over 4 + +=item B<-r> | B<--recursive> + +Run recursively. + +=item B<-f> | B<--force> + +Just do it! + +=item B<-n> number + +Specify number of frobs, default is 42. + +=back + +=cut + diff --git a/ext/Pod-Parser/t/pod/podchkenc.t b/ext/Pod-Parser/t/pod/podchkenc.t new file mode 100644 index 0000000000..ccc2421a5a --- /dev/null +++ b/ext/Pod-Parser/t/pod/podchkenc.t @@ -0,0 +1,29 @@ +#!/usr/bin/perl +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testpchk.pl"; + import TestPodChecker; +} + +# this tests Pod::Checker accepts =encoding directive + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodchecker \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + +__END__ + +=encoding utf8 + +=encode utf8 + +dummy error + +=head1 An example. + +'Twas brillig, and the slithy toves did gyre and gimble in the wabe. + +=cut + diff --git a/ext/Pod-Parser/t/pod/podchkenc.xr b/ext/Pod-Parser/t/pod/podchkenc.xr new file mode 100644 index 0000000000..45ec573fa2 --- /dev/null +++ b/ext/Pod-Parser/t/pod/podchkenc.xr @@ -0,0 +1 @@ +*** ERROR: Unknown command 'encode' at line 20 in file t/pod/podchkenc.t diff --git a/ext/Pod-Parser/t/pod/poderrs.t b/ext/Pod-Parser/t/pod/poderrs.t new file mode 100644 index 0000000000..98c6320792 --- /dev/null +++ b/ext/Pod-Parser/t/pod/poderrs.t @@ -0,0 +1,209 @@ +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testpchk.pl"; + import TestPodChecker; +} + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodchecker \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + +### Deliberately throw in some blank but non-empty lines + +### The above line should contain spaces + + +__END__ + +=head2 This should cause a warning + +=head1 NAME + +poderrors.t - test Pod::Checker on some pod syntax errors + +=unknown1 this is an unknown command with two N<unknownA> +and D<unknownB> interior sequences. + +This is some paragraph text with some unknown interior sequences, +such as Q<unknown2>, +A<unknown3>, +and Y<unknown4 V<unknown5>>. + +Now try some unterminated sequences like +I<hello mudda! +B<hello fadda! + +Here I am at C<camp granada! + +Camps is very, +entertaining. +And they say we'll have some fun if it stops raining! + +Okay, now use a non-empty blank line to terminate a paragraph and make +sure we get a warning. + +The above blank line contains tabs and spaces only + +=head1 Additional tests + +=head2 item without over + +=item oops + +=head2 back without over + +=back + +=head2 over without back + +=over 4 + +=item aaps + +=head2 end without begin + +=end + +=head2 begin and begin + +=begin html + +=begin text + +=end + +=end + +second one results in end w/o begin + +=head2 begin w/o formatter + +=begin + +=end + +=head2 for w/o formatter + +=for + +something... + +=head2 Nested sequences of the same type + +C<code I<italic C<code again!>>> + +=head2 Garbled entities + +E<alea iacta est> +E<C<auml>> +E<abcI<bla>> +E<0x100> +E<07777> +E<300> + +=head2 Unresolved internal links + +L</"begin or begin"> +L<"end with begin"> +L</OoPs> + +=head2 Some links with problems + +L<abc +def> +L<> +L< aha> +L<oho > +L<"Warnings"> this one is ok +L</unescaped> ok too, this POD has an X of the same name + +=head2 Warnings + +L<passwd(5)> +L<some text with / in it|perlvar/$|> should give warnings as hell + +=over 4 + +=item bla + +=back 200 + +the 200 is evil + +=begin html + +What? + +=end xml + +X<unescaped>see these unescaped < and > in the text? + +=head2 Misc + +Z<ddd> should be empty + +X<> should not be empty + +=over four + +This paragrapgh is misplaced - it ought to be an item. + +=item four should be numeric! + +=item + +=item blah + +=item previous is all empty!!! + +=back + +All empty over/back: + +=over 4 + +=back + +item w/o name + +=cut + +=pod bla + +bla is evil + +=cut blub + +blub is evil + +=head2 reoccurence + +=over 4 + +=item Misc + +we already have a head Misc + +=back + +=head2 some heading + +=head2 another one + +previous section is empty! + +=head1 LINK TESTS + +Due to bug reported by Rafael Garcia-Suarez "rgarciasuarez@free.fr": + +The following hyperlinks : +L<"I/O Operators"> +L<perlop/"I/O Operators"> +trigger a podchecker warning (using bleadperl) : + node 'I/O Operators' contains non-escaped | or / + +=cut + + diff --git a/ext/Pod-Parser/t/pod/poderrs.xr b/ext/Pod-Parser/t/pod/poderrs.xr new file mode 100644 index 0000000000..5b40d7a138 --- /dev/null +++ b/ext/Pod-Parser/t/pod/poderrs.xr @@ -0,0 +1,47 @@ +*** WARNING: =head2 without preceding higher level at line 20 in file t/pod/poderrs.t +*** WARNING: empty section in previous paragraph at line 22 in file t/pod/poderrs.t +*** ERROR: Unknown command 'unknown1' at line 26 in file t/pod/poderrs.t +*** ERROR: Unknown interior-sequence 'Q' at line 30 in file t/pod/poderrs.t +*** ERROR: Unknown interior-sequence 'A' at line 31 in file t/pod/poderrs.t +*** ERROR: Unknown interior-sequence 'Y' at line 32 in file t/pod/poderrs.t +*** ERROR: Unknown interior-sequence 'V' at line 32 in file t/pod/poderrs.t +*** ERROR: unterminated B<...> at line 36 in file t/pod/poderrs.t +*** ERROR: unterminated I<...> at line 35 in file t/pod/poderrs.t +*** ERROR: unterminated C<...> at line 38 in file t/pod/poderrs.t +*** WARNING: line containing nothing but whitespace in paragraph at line 46 in file t/pod/poderrs.t +*** ERROR: =item without previous =over at line 53 in file t/pod/poderrs.t +*** ERROR: =back without previous =over at line 57 in file t/pod/poderrs.t +*** ERROR: =over on line 61 without closing =back (at head2) at line 65 in file t/pod/poderrs.t +*** ERROR: =end without =begin at line 67 in file t/pod/poderrs.t +*** ERROR: Nested =begin's (first at line 71:html) at line 73 in file t/pod/poderrs.t +*** ERROR: =end without =begin at line 77 in file t/pod/poderrs.t +*** ERROR: No argument for =begin at line 83 in file t/pod/poderrs.t +*** ERROR: =for without formatter specification at line 89 in file t/pod/poderrs.t +*** WARNING: nested commands C<...C<...>...> at line 95 in file t/pod/poderrs.t +*** ERROR: garbled entity E<alea iacta est> at line 99 in file t/pod/poderrs.t +*** ERROR: garbled entity E<C<auml>> at line 100 in file t/pod/poderrs.t +*** ERROR: garbled entity E<abcI<bla>> at line 101 in file t/pod/poderrs.t +*** ERROR: Entity number out of range E<0x100> at line 102 in file t/pod/poderrs.t +*** ERROR: Entity number out of range E<07777> at line 103 in file t/pod/poderrs.t +*** ERROR: Entity number out of range E<300> at line 104 in file t/pod/poderrs.t +*** ERROR: malformed link L<> : empty link at line 116 in file t/pod/poderrs.t +*** WARNING: ignoring leading whitespace in link at line 117 in file t/pod/poderrs.t +*** WARNING: ignoring trailing whitespace in link at line 118 in file t/pod/poderrs.t +*** WARNING: (section) in 'passwd(5)' deprecated at line 124 in file t/pod/poderrs.t +*** WARNING: node '$|' contains non-escaped | or / at line 125 in file t/pod/poderrs.t +*** WARNING: alternative text '$|' contains non-escaped | or / at line 125 in file t/pod/poderrs.t +*** ERROR: Spurious character(s) after =back at line 131 in file t/pod/poderrs.t +*** ERROR: Nonempty Z<> at line 145 in file t/pod/poderrs.t +*** ERROR: Empty X<> at line 147 in file t/pod/poderrs.t +*** WARNING: preceding non-item paragraph(s) at line 153 in file t/pod/poderrs.t +*** WARNING: No argument for =item at line 155 in file t/pod/poderrs.t +*** WARNING: previous =item has no contents at line 157 in file t/pod/poderrs.t +*** WARNING: No items in =over (at line 165) / =back list at line 167 in file t/pod/poderrs.t +*** ERROR: Spurious text after =pod at line 173 in file t/pod/poderrs.t +*** ERROR: Spurious text after =cut at line 177 in file t/pod/poderrs.t +*** WARNING: empty section in previous paragraph at line 193 in file t/pod/poderrs.t +*** ERROR: unresolved internal link 'begin or begin' at line 108 in file t/pod/poderrs.t +*** ERROR: unresolved internal link 'end with begin' at line 109 in file t/pod/poderrs.t +*** ERROR: unresolved internal link 'OoPs' at line 110 in file t/pod/poderrs.t +*** ERROR: unresolved internal link 'abc def' at line 114 in file t/pod/poderrs.t +*** ERROR: unresolved internal link 'I/O Operators' at line 202 in file t/pod/poderrs.t diff --git a/ext/Pod-Parser/t/pod/podselect.t b/ext/Pod-Parser/t/pod/podselect.t new file mode 100644 index 0000000000..0004548cb7 --- /dev/null +++ b/ext/Pod-Parser/t/pod/podselect.t @@ -0,0 +1,18 @@ +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testp2pt.pl"; + import TestPodIncPlainText; +} + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodplaintext \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + + +__END__ + +=include podselect.PL + + diff --git a/ext/Pod-Parser/t/pod/podselect.xr b/ext/Pod-Parser/t/pod/podselect.xr new file mode 100644 index 0000000000..c288e91280 --- /dev/null +++ b/ext/Pod-Parser/t/pod/podselect.xr @@ -0,0 +1,44 @@ +###### begin =include podselect.PL ##### +NAME + podselect - print selected sections of pod documentation on standard + output + +SYNOPSIS + podselect [-help] [-man] [-section *section-spec*] [*file* ...] + +OPTIONS AND ARGUMENTS + -help Print a brief help message and exit. + + -man Print the manual page and exit. + + -section *section-spec* + Specify a section to include in the output. See the section on + "SECTION SPECIFICATIONS" in the Pod::Parser manpage for the + format to use for *section-spec*. This option may be given + multiple times on the command line. + + *file* The pathname of a file from which to select sections of pod + documentation (defaults to standard input). + +DESCRIPTION + podselect will read the given input files looking for pod documentation + and will print out (in raw pod format) all sections that match one ore + more of the given section specifications. If no section specifications + are given than all pod sections encountered are output. + + podselect invokes the podselect() function exported by Pod::Select + Please see the podselect() entry in the Pod::Select manpage for more + details. + +SEE ALSO + the Pod::Parser manpage and the Pod::Select manpage + +AUTHOR + Please report bugs using http://rt.cpan.org. + + Brad Appleton <bradapp@enteract.com> + + Based on code for Pod::Text::pod2text(1) written by Tom Christiansen + <tchrist@mox.perl.com> + +###### end =include podselect.PL ##### diff --git a/ext/Pod-Parser/t/pod/special_seqs.t b/ext/Pod-Parser/t/pod/special_seqs.t new file mode 100644 index 0000000000..ecd99ecde8 --- /dev/null +++ b/ext/Pod-Parser/t/pod/special_seqs.t @@ -0,0 +1,46 @@ +BEGIN { + use File::Basename; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testp2pt.pl"; + import TestPodIncPlainText; +} + +my %options = map { $_ => 1 } @ARGV; ## convert cmdline to options-hash +my $passed = testpodplaintext \%options, $0; +exit( ($passed == 1) ? 0 : -1 ) unless $ENV{HARNESS_ACTIVE}; + + +__END__ + + +=pod + +This is a test to see if I can do not only C<$self> and C<method()>, but +also C<< $self->method() >> and C<< $self->{FIELDNAME} >> and +C<< $Foo <=> $Bar >> without resorting to escape sequences. If +I want to refer to the right-shift operator I can do something +like C<<< $x >> 3 >>> or even C<<<< $y >> 5 >>>>. + +Now for the grand finale of C<< $self->method()->{FIELDNAME} = {FOO=>BAR} >>. +And I also want to make sure that newlines work like this +C<<< +$self->{FOOBAR} >> 3 and [$b => $a]->[$a <=> $b] +>>> + +Of course I should still be able to do all this I<with> escape sequences +too: C<$self-E<gt>method()> and C<$self-E<gt>{FIELDNAME}> and C<{FOO=E<gt>BAR}>. + +Dont forget C<$self-E<gt>method()-E<gt>{FIELDNAME} = {FOO=E<gt>BAR}>. + +And make sure that C<0> works too! + +Now, if I use << or >> as my delimiters, then I have to use whitespace. +So things like C<<$self->method()>> and C<<$self->{FIELDNAME}>> wont end +up doing what you might expect since the first > will still terminate +the first < seen. + +Lets make sure these work for empty ones too, like C<< >> and C<< >> >> +(just to be obnoxious) + +=cut diff --git a/ext/Pod-Parser/t/pod/special_seqs.xr b/ext/Pod-Parser/t/pod/special_seqs.xr new file mode 100644 index 0000000000..a8c715ae0a --- /dev/null +++ b/ext/Pod-Parser/t/pod/special_seqs.xr @@ -0,0 +1,25 @@ + This is a test to see if I can do not only `$self' and `method()', but + also `$self->method()' and `$self->{FIELDNAME}' and `$Foo <=> $Bar' + without resorting to escape sequences. If I want to refer to the + right-shift operator I can do something like `$x >> 3' or even `$y >> + 5'. + + Now for the grand finale of `$self->method()->{FIELDNAME} = {FOO=>BAR}'. + And I also want to make sure that newlines work like this + `$self->{FOOBAR} >> 3 and [$b => $a]->[$a <=> $b]' + + Of course I should still be able to do all this *with* escape sequences + too: `$self->method()' and `$self->{FIELDNAME}' and `{FOO=>BAR}'. + + Dont forget `$self->method()->{FIELDNAME} = {FOO=>BAR}'. + + And make sure that `0' works too! + + Now, if I use << or >> as my delimiters, then I have to use whitespace. + So things like `<$self-'method()>> and `<$self-'{FIELDNAME}>> wont end + up doing what you might expect since the first > will still terminate + the first < seen. + + Lets make sure these work for empty ones too, like and `>>' (just to be + obnoxious) + diff --git a/ext/Pod-Parser/t/pod/testcmp.pl b/ext/Pod-Parser/t/pod/testcmp.pl new file mode 100644 index 0000000000..17f0b0b4c2 --- /dev/null +++ b/ext/Pod-Parser/t/pod/testcmp.pl @@ -0,0 +1,94 @@ +package TestCompare; + +use vars qw(@ISA @EXPORT $MYPKG); +#use strict; +#use diagnostics; +use Carp; +use Exporter; +use File::Basename; +use File::Spec; +use FileHandle; + +@ISA = qw(Exporter); +@EXPORT = qw(&testcmp); +$MYPKG = eval { (caller)[0] }; + +##-------------------------------------------------------------------------- + +=head1 NAME + +testcmp -- compare two files line-by-line + +=head1 SYNOPSIS + + $is_diff = testcmp($file1, $file2); + +or + + $is_diff = testcmp({-cmplines => \&mycmp}, $file1, $file2); + +=head2 DESCRIPTION + +Compare two text files line-by-line and return 0 if they are the +same, 1 if they differ. Each of $file1 and $file2 may be a filenames, +or a filehandles (in which case it must already be open for reading). + +If the first argument is a hashref, then the B<-cmplines> key in the +hash may have a subroutine reference as its corresponding value. +The referenced user-defined subroutine should be a line-comparator +function that takes two pre-chomped text-lines as its arguments +(the first is from $file1 and the second is from $file2). It should +return 0 if it considers the two lines equivalent, and non-zero +otherwise. + +=cut + +##-------------------------------------------------------------------------- + +sub testcmp( $ $ ; $) { + my %opts = ref($_[0]) eq 'HASH' ? %{shift()} : (); + my ($file1, $file2) = @_; + my ($fh1, $fh2) = ($file1, $file2); + unless (ref $fh1) { + $fh1 = FileHandle->new($file1, "r") or die "Can't open $file1: $!"; + } + unless (ref $fh2) { + $fh2 = FileHandle->new($file2, "r") or die "Can't open $file2: $!"; + } + + my $cmplines = $opts{'-cmplines'} || undef; + my ($f1text, $f2text) = ("", ""); + my ($line, $diffs) = (0, 0); + + while ( defined($f1text) and defined($f2text) ) { + defined($f1text = <$fh1>) and chomp($f1text); + defined($f2text = <$fh2>) and chomp($f2text); + ++$line; + last unless ( defined($f1text) and defined($f2text) ); + # kill any extra line endings + $f1text =~ s/[\r\n]+$//s; + $f2text =~ s/[\r\n]+$//s; + $diffs = (ref $cmplines) ? &$cmplines($f1text, $f2text) + : ($f1text ne $f2text); + last if $diffs; + } + close($fh1) unless (ref $file1); + close($fh2) unless (ref $file2); + + $diffs = 1 if (defined($f1text) or defined($f2text)); + if ( defined($f1text) and defined($f2text) ) { + ## these two lines must be different + warn "$file1 and $file2 differ at line $line\n"; + } + elsif (defined($f1text) and (! defined($f1text))) { + ## file1 must be shorter + warn "$file1 is shorter than $file2\n"; + } + elsif (defined $f2text) { + ## file2 must be longer + warn "$file1 is shorter than $file2\n"; + } + return $diffs; +} + +1; diff --git a/ext/Pod-Parser/t/pod/testp2pt.pl b/ext/Pod-Parser/t/pod/testp2pt.pl new file mode 100644 index 0000000000..308cd1ccd6 --- /dev/null +++ b/ext/Pod-Parser/t/pod/testp2pt.pl @@ -0,0 +1,192 @@ +package TestPodIncPlainText; + +BEGIN { + use File::Basename; + use File::Spec; + use Cwd qw(abs_path); + push @INC, '..'; + my $THISDIR = abs_path(dirname $0); + unshift @INC, $THISDIR; + require "testcmp.pl"; + import TestCompare; + my $PARENTDIR = dirname $THISDIR; + push @INC, map { File::Spec->catfile($_, 'lib') } ($PARENTDIR, $THISDIR); +} + +#use strict; +#use diagnostics; +use Carp; +use Exporter; +#use File::Compare; +#use Cwd qw(abs_path); + +use vars qw($MYPKG @EXPORT @ISA); +$MYPKG = eval { (caller)[0] }; +@EXPORT = qw(&testpodplaintext); +BEGIN { + require Pod::PlainText; + @ISA = qw( Pod::PlainText ); + require VMS::Filespec if $^O eq 'VMS'; +} + +## Hardcode settings for TERMCAP and COLUMNS so we can try to get +## reproducible results between environments +@ENV{qw(TERMCAP COLUMNS)} = ('co=76:do=^J', 76); + +sub catfile(@) { File::Spec->catfile(@_); } + +my $INSTDIR = abs_path(dirname $0); +$INSTDIR = VMS::Filespec::unixpath($INSTDIR) if $^O eq 'VMS'; +$INSTDIR =~ s#/$## if $^O eq 'VMS'; +$INSTDIR =~ s#:$## if $^O eq 'MacOS'; +$INSTDIR = (dirname $INSTDIR) if (basename($INSTDIR) eq 'pod'); +$INSTDIR =~ s#:$## if $^O eq 'MacOS'; +$INSTDIR = (dirname $INSTDIR) if (basename($INSTDIR) eq 't'); +my @PODINCDIRS = ( catfile($INSTDIR, 'lib', 'Pod'), + catfile($INSTDIR, 'scripts'), + catfile($INSTDIR, 'pod'), + catfile($INSTDIR, 't', 'pod') + ); + +# FIXME - we should make the core capable of finding utilities built in +# locations in ext. +push @PODINCDIRS, catfile((File::Spec->updir()) x 2, 'pod') if $ENV{PERL_CORE}; + +## Find the path to the file to =include +sub findinclude { + my $self = shift; + my $incname = shift; + + ## See if its already found w/out any "searching; + return $incname if (-r $incname); + + ## Need to search for it. Look in the following directories ... + ## 1. the directory containing this pod file + my $thispoddir = dirname $self->input_file; + ## 2. the parent directory of the above + my $parentdir = dirname $thispoddir; + my @podincdirs = ($thispoddir, $parentdir, @PODINCDIRS); + + for (@podincdirs) { + my $incfile = catfile($_, $incname); + return $incfile if (-r $incfile); + } + warn("*** Can't find =include file $incname in @podincdirs\n"); + return ""; +} + +sub command { + my $self = shift; + my ($cmd, $text, $line_num, $pod_para) = @_; + $cmd = '' unless (defined $cmd); + local $_ = $text || ''; + my $out_fh = $self->output_handle; + + ## Defer to the superclass for everything except '=include' + return $self->SUPER::command(@_) unless ($cmd eq "include"); + + ## We have an '=include' command + my $incdebug = 1; ## debugging + my @incargs = split; + if (@incargs == 0) { + warn("*** No filename given for '=include'\n"); + return; + } + my $incfile = $self->findinclude(shift @incargs) or return; + my $incbase = basename $incfile; + print $out_fh "###### begin =include $incbase #####\n" if ($incdebug); + $self->parse_from_file( {-cutting => 1}, $incfile ); + print $out_fh "###### end =include $incbase #####\n" if ($incdebug); +} + +sub begin_input { + $_[0]->{_INFILE} = VMS::Filespec::unixify($_[0]->{_INFILE}) if $^O eq 'VMS'; +} + +sub podinc2plaintext( $ $ ) { + my ($infile, $outfile) = @_; + local $_; + my $text_parser = $MYPKG->new; + $text_parser->parse_from_file($infile, $outfile); +} + +sub testpodinc2plaintext( @ ) { + my %args = @_; + my $infile = $args{'-In'} || croak "No input file given!"; + my $outfile = $args{'-Out'} || croak "No output file given!"; + my $cmpfile = $args{'-Cmp'} || croak "No compare-result file given!"; + + my $different = ''; + my $testname = basename $cmpfile, '.t', '.xr'; + + unless (-e $cmpfile) { + my $msg = "*** Can't find comparison file $cmpfile for testing $infile"; + warn "$msg\n"; + return $msg; + } + + print "# Running testpodinc2plaintext for '$testname'...\n"; + ## Compare the output against the expected result + podinc2plaintext($infile, $outfile); + if ( testcmp($outfile, $cmpfile) ) { + $different = "$outfile is different from $cmpfile"; + } + else { + unlink($outfile); + } + return $different; +} + +sub testpodplaintext( @ ) { + my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : (); + my @testpods = @_; + my ($testname, $testdir) = ("", ""); + my ($podfile, $cmpfile) = ("", ""); + my ($outfile, $errfile) = ("", ""); + my $passes = 0; + my $failed = 0; + local $_; + + print "1..", scalar @testpods, "\n" unless ($opts{'-xrgen'}); + + for $podfile (@testpods) { + ($testname, $_) = fileparse($podfile); + $testdir ||= $_; + $testname =~ s/\.t$//; + $cmpfile = $testdir . $testname . '.xr'; + $outfile = $testdir . $testname . '.OUT'; + + if ($opts{'-xrgen'}) { + if ($opts{'-force'} or ! -e $cmpfile) { + ## Create the comparison file + print "# Creating expected result for \"$testname\"" . + " pod2plaintext test ...\n"; + podinc2plaintext($podfile, $cmpfile); + } + else { + print "# File $cmpfile already exists" . + " (use '-force' to regenerate it).\n"; + } + next; + } + + my $failmsg = testpodinc2plaintext + -In => $podfile, + -Out => $outfile, + -Cmp => $cmpfile; + if ($failmsg) { + ++$failed; + print "#\tFAILED. ($failmsg)\n"; + print "not ok ", $failed+$passes, "\n"; + } + else { + ++$passes; + unlink($outfile); + print "#\tPASSED.\n"; + print "ok ", $failed+$passes, "\n"; + } + } + return $passes; +} + +1; diff --git a/ext/Pod-Parser/t/pod/testpchk.pl b/ext/Pod-Parser/t/pod/testpchk.pl new file mode 100644 index 0000000000..8aa10b94f8 --- /dev/null +++ b/ext/Pod-Parser/t/pod/testpchk.pl @@ -0,0 +1,129 @@ +package TestPodChecker; + +BEGIN { + use File::Basename; + use File::Spec; + push @INC, '..'; + my $THISDIR = dirname $0; + unshift @INC, $THISDIR; + require "testcmp.pl"; + import TestCompare; + my $PARENTDIR = dirname $THISDIR; + push @INC, map { File::Spec->catfile($_, 'lib') } ($PARENTDIR, $THISDIR); + require VMS::Filespec if $^O eq 'VMS'; +} + +use Pod::Checker; +use vars qw(@ISA @EXPORT $MYPKG); +#use strict; +#use diagnostics; +use Carp; +use Exporter; +#use File::Compare; + +@ISA = qw(Exporter); +@EXPORT = qw(&testpodchecker); +$MYPKG = eval { (caller)[0] }; + +sub stripname( $ ) { + local $_ = shift; + return /(\w[.\w]*)\s*$/ ? $1 : $_; +} + +sub msgcmp( $ $ ) { + ## filter out platform-dependent aspects of error messages + my ($line1, $line2) = @_; + for ($line1, $line2) { + ## remove filenames from error messages to avoid any + ## filepath naming differences between OS platforms + s/(at line \S+ in file) .*\W(\w+\.[tT])\s*$/$1 \L$2\E/; + s/.*\W(\w+\.[tT]) (has \d+ pod syntax error)/\L$1\E $2/; + } + return ($line1 ne $line2); +} + +sub testpodcheck( @ ) { + my %args = @_; + my $infile = $args{'-In'} || croak "No input file given!"; + my $outfile = $args{'-Out'} || croak "No output file given!"; + my $cmpfile = $args{'-Cmp'} || croak "No compare-result file given!"; + + my $different = ''; + my $testname = basename $cmpfile, '.t', '.xr'; + + unless (-e $cmpfile) { + my $msg = "*** Can't find comparison file $cmpfile for testing $infile"; + warn "$msg\n"; + return $msg; + } + + print "# Running podchecker for '$testname'...\n"; + ## Compare the output against the expected result + if ($^O eq 'VMS') { + for ($infile, $outfile, $cmpfile) { + $_ = VMS::Filespec::unixify($_) unless ref; + } + } + podchecker($infile, $outfile); + if ( testcmp({'-cmplines' => \&msgcmp}, $outfile, $cmpfile) ) { + $different = "$outfile is different from $cmpfile"; + } + else { + unlink($outfile); + } + return $different; +} + +sub testpodchecker( @ ) { + my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : (); + my @testpods = @_; + my ($testname, $testdir) = ("", ""); + my ($podfile, $cmpfile) = ("", ""); + my ($outfile, $errfile) = ("", ""); + my $passes = 0; + my $failed = 0; + local $_; + + print "1..", scalar @testpods, "\n" unless ($opts{'-xrgen'}); + + for $podfile (@testpods) { + ($testname, $_) = fileparse($podfile); + $testdir ||= $_; + $testname =~ s/\.t$//; + $cmpfile = $testdir . $testname . '.xr'; + $outfile = $testdir . $testname . '.OUT'; + + if ($opts{'-xrgen'}) { + if ($opts{'-force'} or ! -e $cmpfile) { + ## Create the comparison file + print "# Creating expected result for \"$testname\"" . + " podchecker test ...\n"; + podchecker($podfile, $cmpfile); + } + else { + print "# File $cmpfile already exists" . + " (use '-force' to regenerate it).\n"; + } + next; + } + + my $failmsg = testpodcheck + -In => $podfile, + -Out => $outfile, + -Cmp => $cmpfile; + if ($failmsg) { + ++$failed; + print "#\tFAILED. ($failmsg)\n"; + print "not ok ", $failed+$passes, "\n"; + } + else { + ++$passes; + unlink($outfile); + print "#\tPASSED.\n"; + print "ok ", $failed+$passes, "\n"; + } + } + return $passes; +} + +1; diff --git a/ext/Pod-Parser/t/pod/testpods/lib/Pod/Stuff.pm b/ext/Pod-Parser/t/pod/testpods/lib/Pod/Stuff.pm new file mode 100644 index 0000000000..d5c1120303 --- /dev/null +++ b/ext/Pod-Parser/t/pod/testpods/lib/Pod/Stuff.pm @@ -0,0 +1,20 @@ +=head1 NAME + +Pod::Stuff - dummy testing pod + +=head1 DESCRIPTION + +This isn't really anything, its just some dummy pod code. +And stuff. + +Lots of stuff. + +=head2 STUFF + +For all your stuff [tm] + +Stuffit + +Mmmm, stuffed pizza bread. + +=cut diff --git a/ext/Pod-Parser/t/pod/twice.t b/ext/Pod-Parser/t/pod/twice.t new file mode 100644 index 0000000000..098bc3c628 --- /dev/null +++ b/ext/Pod-Parser/t/pod/twice.t @@ -0,0 +1,36 @@ +use strict; +use Test; +use File::Spec; + +BEGIN { plan tests => 1 } + +use Pod::Parser; +use Carp; +$SIG{__DIE__} = \&Carp::confess; + +eval {require IO::String;}; +skip($@ ? 'no IO::String' : '', sub { + { + my $pod_string = 'some I<silly> text'; + my $handle = IO::String->new( \$pod_string ); + my $parser = Pod::Parser->new(); + $parser->parse_from_file( $0, $handle ); + } + # free the reference + { + my $parser = Pod::Parser->new(); + $parser->parse_from_file( $0, File::Spec->devnull ); + } + 1; +}); + +exit 0; + +__END__ + +=head1 EXAMPLE + +This test makes sure the parse_from_file is re-entrant + +=cut + diff --git a/ext/Pod-Parser/t/pod/usage.pod b/ext/Pod-Parser/t/pod/usage.pod new file mode 100644 index 0000000000..c81cc82c51 --- /dev/null +++ b/ext/Pod-Parser/t/pod/usage.pod @@ -0,0 +1,18 @@ +=head1 NAME + +usage.pod - example for testing USAGE and SYNOPSIS + +=head1 USAGE + +This is a test for CPAN#33020 + +=head1 SYNOPSIS + +And this will be also printed. + +=head1 OPTIONS + +And this with verbose == 1 + +=cut + diff --git a/ext/Pod-Parser/t/pod/usage2.pod b/ext/Pod-Parser/t/pod/usage2.pod new file mode 100644 index 0000000000..5c4817b8b1 --- /dev/null +++ b/ext/Pod-Parser/t/pod/usage2.pod @@ -0,0 +1,56 @@ +=head1 Heading-1 + +=over 100 + +=item One + +=item Two + +=back + +=head2 Heading 2 + +Some text + +=head1 BugHeader + +Some text + +=head2 BugHeader2 + +=over 4 + +=item More + +=item Still More + +=back + +=head1 Heading-2 + +=head2 Heading-2.2 + +More text. + +=head1 OPTIONS AND ARGUMENTS + +=head2 Arguments + +The required arguments (which typically follow any options on the +command line) are: + +=over + +=item I<destination> + +=item I<files> + +=back + +=head2 Options + +Options may be abbreviated. Options which take values may be separated +from the values by whitespace or the "=" character. + +=cut + |