From 891c29af147fcbe6c4dd5d8ffbbb426665d4b558 Mon Sep 17 00:00:00 2001 From: Lorry Date: Mon, 21 May 2012 16:44:15 +0100 Subject: Tarball conversion --- Parser/Encodings/Japanese_Encodings.msg | 117 ++++++++++++++++++++ Parser/Encodings/README | 51 +++++++++ Parser/Encodings/big5.enc | Bin 0 -> 40706 bytes Parser/Encodings/euc-kr.enc | Bin 0 -> 45802 bytes Parser/Encodings/ibm866.enc | Bin 0 -> 1072 bytes Parser/Encodings/iso-8859-2.enc | Bin 0 -> 1072 bytes Parser/Encodings/iso-8859-3.enc | Bin 0 -> 1072 bytes Parser/Encodings/iso-8859-4.enc | Bin 0 -> 1072 bytes Parser/Encodings/iso-8859-5.enc | Bin 0 -> 1072 bytes Parser/Encodings/iso-8859-7.enc | Bin 0 -> 1072 bytes Parser/Encodings/iso-8859-8.enc | Bin 0 -> 1072 bytes Parser/Encodings/iso-8859-9.enc | Bin 0 -> 1072 bytes Parser/Encodings/koi8-r.enc | Bin 0 -> 1072 bytes Parser/Encodings/windows-1250.enc | Bin 0 -> 1072 bytes Parser/Encodings/windows-1251.enc | Bin 0 -> 1072 bytes Parser/Encodings/windows-1252.enc | Bin 0 -> 1072 bytes Parser/Encodings/windows-1255.enc | Bin 0 -> 1072 bytes Parser/Encodings/x-euc-jp-jisx0221.enc | Bin 0 -> 37890 bytes Parser/Encodings/x-euc-jp-unicode.enc | Bin 0 -> 37890 bytes Parser/Encodings/x-sjis-cp932.enc | Bin 0 -> 20368 bytes Parser/Encodings/x-sjis-jdk117.enc | Bin 0 -> 18202 bytes Parser/Encodings/x-sjis-jisx0221.enc | Bin 0 -> 18202 bytes Parser/Encodings/x-sjis-unicode.enc | Bin 0 -> 18202 bytes Parser/LWPExternEnt.pl | 71 ++++++++++++ Parser/Style/Debug.pm | 52 +++++++++ Parser/Style/Objects.pm | 78 ++++++++++++++ Parser/Style/Stream.pm | 184 ++++++++++++++++++++++++++++++++ Parser/Style/Subs.pm | 58 ++++++++++ Parser/Style/Tree.pm | 90 ++++++++++++++++ 29 files changed, 701 insertions(+) create mode 100644 Parser/Encodings/Japanese_Encodings.msg create mode 100644 Parser/Encodings/README create mode 100644 Parser/Encodings/big5.enc create mode 100644 Parser/Encodings/euc-kr.enc create mode 100644 Parser/Encodings/ibm866.enc create mode 100644 Parser/Encodings/iso-8859-2.enc create mode 100644 Parser/Encodings/iso-8859-3.enc create mode 100644 Parser/Encodings/iso-8859-4.enc create mode 100644 Parser/Encodings/iso-8859-5.enc create mode 100644 Parser/Encodings/iso-8859-7.enc create mode 100644 Parser/Encodings/iso-8859-8.enc create mode 100644 Parser/Encodings/iso-8859-9.enc create mode 100644 Parser/Encodings/koi8-r.enc create mode 100644 Parser/Encodings/windows-1250.enc create mode 100644 Parser/Encodings/windows-1251.enc create mode 100644 Parser/Encodings/windows-1252.enc create mode 100644 Parser/Encodings/windows-1255.enc create mode 100644 Parser/Encodings/x-euc-jp-jisx0221.enc create mode 100644 Parser/Encodings/x-euc-jp-unicode.enc create mode 100644 Parser/Encodings/x-sjis-cp932.enc create mode 100644 Parser/Encodings/x-sjis-jdk117.enc create mode 100644 Parser/Encodings/x-sjis-jisx0221.enc create mode 100644 Parser/Encodings/x-sjis-unicode.enc create mode 100644 Parser/LWPExternEnt.pl create mode 100644 Parser/Style/Debug.pm create mode 100644 Parser/Style/Objects.pm create mode 100644 Parser/Style/Stream.pm create mode 100644 Parser/Style/Subs.pm create mode 100644 Parser/Style/Tree.pm (limited to 'Parser') diff --git a/Parser/Encodings/Japanese_Encodings.msg b/Parser/Encodings/Japanese_Encodings.msg new file mode 100644 index 0000000..6912e70 --- /dev/null +++ b/Parser/Encodings/Japanese_Encodings.msg @@ -0,0 +1,117 @@ +Mapping files for Japanese encodings + +1998 12/25 + +Fuji Xerox Information Systems +MURATA Makoto + +1. Overview + +This version of XML::Parser and XML::Encoding does not come with map files for +the charset "Shift_JIS" and the charset "euc-jp". Unfortunately, each of these +charsets has more than one mapping. None of these mappings are +considered as authoritative. + +Therefore, we have come to believe that it is dangerous to provide map files +for these charsets. Rather, we introduce several private charsets and map +files for these private charsets. If IANA, Unicode Consoritum, and JIS +eventually reach a consensus, we will be able to provide map files for +"Shift_JIS" and "euc-jp". + +2. Different mappings from existing charsets to Unicode + +1) Different mappings in JIS X0221 and Unicode + +The mapping between JIS X0208:1990 and Unicode 1.1 and the mapping +between JIS X0212:1990 and Unicode 1.1 are published from Unicode +consortium. They are available at +ftp://ftp.unicode.org/Public/MAPPINGS/EASTASIA/JIS/JIS0208.TXT and +ftp://ftp.unicode.org/Public/MAPPINGS/EASTASIA/JIS/JIS0212.TXT, +respectively.) These mapping files have a note as below: + +# The kanji mappings are a normative part of ISO/IEC 10646. The +# non-kanji mappings are provisional, pending definition of +# official mappings by Japanese standards bodies. + +Unfortunately, the non-kanji mappings in the Japanese standard for ISO 10646/1, +namely JIS X 0221:1995, is different from the Unicode Consortium mapping since +0x213D of JIS X 0208 is mapped to U+2014 (em dash) rather than U+2015 +(horizontal bar). Furthermore, JIS X 0221 clearly says that the mapping is +informational and non-normative. As a result, some companies (e.g., Microsoft and +Apple) have introduced slightly different mappings. Therefore, neither the +Unicode consortium mapping nor the JIS X 0221 mapping are considered as +authoritative. + +2) Shift-JIS + +This charset is especially problematic, since its definition has been unclear +since its inception. + +The current registration of the charset "Shift_JIS" is as below: + +>Name: Shift_JIS (preferred MIME name) +>MIBenum: 17 +>Source: A Microsoft code that extends csHalfWidthKatakana to include +> kanji by adding a second byte when the value of the first +> byte is in the ranges 81-9F or E0-EF. +>Alias: MS_Kanji +>Alias: csShiftJIS + +First, this does not reference to the mapping "Shift-JIS to Unicode" +published by the Unicode consortium (available at +ftp://ftp.unicode.org/Public/MAPPINGS/EASTASIA/JIS/SHIFTJIS.TXT). + +Second, "kanji" in this registration can be interepreted in different ways. +Does this "kanji" reference to JIS X0208:1978, JIS X0208:1983, or JIS +X0208:1990(== JIS X0208:1997)? These three standards are *incompatible* with +each other. Moreover, we can even argue that "kanji" refers to JIS X0212 or +ideographic characters in other countries. + +Third, each company has extended Shift JIS. For example, Microsoft introduced +OEM extensions (NEC extensionsand IBM extensions). + +Forth, Shift JIS uses JIS X0201, which is almost upper-compatible with US-ASCII +but is not quite. 5C and 7E of JIS X 0201 are different from backslash and +tilde, respectively. However, many programming languages (e.g., Java) +ignore this difference and assumes that 5C and 7E of Shift JIS are backslash +and tilde. + + +3. Proposed charsets and mappings + +As a tentative solution, we introduce two private charsets for EUC-JP and four +priviate charsets for Shift JIS. + +1) EUC-JP + +We have two charsets, namely "x-eucjp-unicode" and "x-eucjp-jisx0221". Their +difference is only one code point. The mapping for the former is based +on the Unicode Consortium mapping, while the latter is based on the JIS X0221 +mapping. + +2) Shift JIS + +We have four charsets, namely x-sjis-unicode, x-sjis-jisx0221, +x-sjis-jdk117, and x-sjis-cp932. + +The mapping for the charset x-sjis-unicode is the one published by the Unicode +consortium. The mapping for x-sjis-jisx0221 is almost equivalent to +x-sjis-unicode, but 0x213D of JIS X 0208 is mapped to U+2014 (em dash) rather +than U+2015. The charset x-sjis-jdk117 is again almost equivalent to +x-sjis-unicode, but 0x5C and 0x7E of JIS X0201 are mapped to backslash and +tilde. + +The charset x-sjis-cp932 is used by Microsoft Windows, and its mapping is +published from the Unicode Consortium (available at: +ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.txt). The +coded character set for this charset includes NEC-extensions and +IBM-extensions. 0x5C and 0x7E of JIS X0201 are mapped to backslash and tilde; +0x213D is mapped to U+2015; and 0x2140, 0x2141, 0x2142, and 0x215E of JIS X +0208 are mapped to compatibility characters. + +Makoto + +Fuji Xerox Information Systems + +Tel: +81-44-812-7230 Fax: +81-44-812-7231 +E-mail: murata@apsdc.ksp.fujixerox.co.jp diff --git a/Parser/Encodings/README b/Parser/Encodings/README new file mode 100644 index 0000000..f2e69f9 --- /dev/null +++ b/Parser/Encodings/README @@ -0,0 +1,51 @@ +This directory contains binary encoding maps for some selected encodings. +If they are placed in a directory listed in @XML::Parser::Expat::Encoding_Path, +then they are automatically loaded by the XML::Parser::Expat::load_encoding +function as needed. Otherwise you may load what you need directly by +explicitly calling this function. + +These maps were generated by a perl script that comes with the module +XML::Encoding, compile_encoding, from XML formatted encoding maps that +are distributed with that module. These XML encoding maps were generated +in turn with a different script, domap, from mapping information contained +on the Unicode version 2.0 CD-ROM. This CD-ROM comes with the Unicode +Standard reference manual and can be ordered from the Unicode Consortium +at http://www.unicode.org. The identical information is available on the +internet at ftp://ftp.unicode.org/Public/MAPPINGS. + +See the encoding.h header in the Expat sub-directory for a description of +the structure of these files. + +Clark Cooper +December 12, 1998 + +================================================================ + +Contributed maps + +This distribution contains four contributed encodings from MURATA Makoto + that are variations on the encoding +commonly called Shift_JIS: + +x-sjis-cp932.enc +x-sjis-jdk117.enc +x-sjis-jisx0221.enc +x-sjis-unicode.enc (This is the same encoding as the shift_jis.enc that + was distributed with this module in version 2.17) + +Please read his message (Japanese_Encodings.msg) about why these are here +and why I've removed the shift_jis.enc encoding. + +We also have two contributed encodings that are variations of the EUC-JP +encoding from Yoshida Masato : + +x-euc-jp-jisx0221.enc +x-euc-jp-unicode.enc + +The comments that MURATA Makoto made in his message apply to these +encodings too. + +KangChan Lee supplied the euc-kr encoding. + +Clark Cooper +December 26, 1998 diff --git a/Parser/Encodings/big5.enc b/Parser/Encodings/big5.enc new file mode 100644 index 0000000..94b2bd4 Binary files /dev/null and b/Parser/Encodings/big5.enc differ diff --git a/Parser/Encodings/euc-kr.enc b/Parser/Encodings/euc-kr.enc new file mode 100644 index 0000000..3da8a13 Binary files /dev/null and b/Parser/Encodings/euc-kr.enc differ diff --git a/Parser/Encodings/ibm866.enc b/Parser/Encodings/ibm866.enc new file mode 100644 index 0000000..71b4fb5 Binary files /dev/null and b/Parser/Encodings/ibm866.enc differ diff --git a/Parser/Encodings/iso-8859-2.enc b/Parser/Encodings/iso-8859-2.enc new file mode 100644 index 0000000..d320d7f Binary files /dev/null and b/Parser/Encodings/iso-8859-2.enc differ diff --git a/Parser/Encodings/iso-8859-3.enc b/Parser/Encodings/iso-8859-3.enc new file mode 100644 index 0000000..ba48378 Binary files /dev/null and b/Parser/Encodings/iso-8859-3.enc differ diff --git a/Parser/Encodings/iso-8859-4.enc b/Parser/Encodings/iso-8859-4.enc new file mode 100644 index 0000000..0294a24 Binary files /dev/null and b/Parser/Encodings/iso-8859-4.enc differ diff --git a/Parser/Encodings/iso-8859-5.enc b/Parser/Encodings/iso-8859-5.enc new file mode 100644 index 0000000..6dbd169 Binary files /dev/null and b/Parser/Encodings/iso-8859-5.enc differ diff --git a/Parser/Encodings/iso-8859-7.enc b/Parser/Encodings/iso-8859-7.enc new file mode 100644 index 0000000..f2b2215 Binary files /dev/null and b/Parser/Encodings/iso-8859-7.enc differ diff --git a/Parser/Encodings/iso-8859-8.enc b/Parser/Encodings/iso-8859-8.enc new file mode 100644 index 0000000..f211bd5 Binary files /dev/null and b/Parser/Encodings/iso-8859-8.enc differ diff --git a/Parser/Encodings/iso-8859-9.enc b/Parser/Encodings/iso-8859-9.enc new file mode 100644 index 0000000..fdc574b Binary files /dev/null and b/Parser/Encodings/iso-8859-9.enc differ diff --git a/Parser/Encodings/koi8-r.enc b/Parser/Encodings/koi8-r.enc new file mode 100644 index 0000000..326cae8 Binary files /dev/null and b/Parser/Encodings/koi8-r.enc differ diff --git a/Parser/Encodings/windows-1250.enc b/Parser/Encodings/windows-1250.enc new file mode 100644 index 0000000..d4a64b5 Binary files /dev/null and b/Parser/Encodings/windows-1250.enc differ diff --git a/Parser/Encodings/windows-1251.enc b/Parser/Encodings/windows-1251.enc new file mode 100644 index 0000000..e64960c Binary files /dev/null and b/Parser/Encodings/windows-1251.enc differ diff --git a/Parser/Encodings/windows-1252.enc b/Parser/Encodings/windows-1252.enc new file mode 100644 index 0000000..ab2d57c Binary files /dev/null and b/Parser/Encodings/windows-1252.enc differ diff --git a/Parser/Encodings/windows-1255.enc b/Parser/Encodings/windows-1255.enc new file mode 100644 index 0000000..87ee299 Binary files /dev/null and b/Parser/Encodings/windows-1255.enc differ diff --git a/Parser/Encodings/x-euc-jp-jisx0221.enc b/Parser/Encodings/x-euc-jp-jisx0221.enc new file mode 100644 index 0000000..ca79c07 Binary files /dev/null and b/Parser/Encodings/x-euc-jp-jisx0221.enc differ diff --git a/Parser/Encodings/x-euc-jp-unicode.enc b/Parser/Encodings/x-euc-jp-unicode.enc new file mode 100644 index 0000000..34d4d0d Binary files /dev/null and b/Parser/Encodings/x-euc-jp-unicode.enc differ diff --git a/Parser/Encodings/x-sjis-cp932.enc b/Parser/Encodings/x-sjis-cp932.enc new file mode 100644 index 0000000..c2a6bc4 Binary files /dev/null and b/Parser/Encodings/x-sjis-cp932.enc differ diff --git a/Parser/Encodings/x-sjis-jdk117.enc b/Parser/Encodings/x-sjis-jdk117.enc new file mode 100644 index 0000000..b6c2c07 Binary files /dev/null and b/Parser/Encodings/x-sjis-jdk117.enc differ diff --git a/Parser/Encodings/x-sjis-jisx0221.enc b/Parser/Encodings/x-sjis-jisx0221.enc new file mode 100644 index 0000000..cbb2db5 Binary files /dev/null and b/Parser/Encodings/x-sjis-jisx0221.enc differ diff --git a/Parser/Encodings/x-sjis-unicode.enc b/Parser/Encodings/x-sjis-unicode.enc new file mode 100644 index 0000000..6f88a06 Binary files /dev/null and b/Parser/Encodings/x-sjis-unicode.enc differ diff --git a/Parser/LWPExternEnt.pl b/Parser/LWPExternEnt.pl new file mode 100644 index 0000000..d0c940b --- /dev/null +++ b/Parser/LWPExternEnt.pl @@ -0,0 +1,71 @@ +# LWPExternEnt.pl +# +# Copyright (c) 2000 Clark Cooper +# All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. + +package XML::Parser; + +use URI; +use URI::file; +use LWP; + +## +## Note that this external entity handler reads the entire entity into +## memory, so it will choke on huge ones. It would be really nice if +## LWP::UserAgent optionally returned us an IO::Handle. +## + +sub lwp_ext_ent_handler { + my ($xp, $base, $sys) = @_; # We don't use public id + + my $uri; + + if (defined $base) { + # Base may have been set by parsefile, which is agnostic about + # whether its a file or URI. + my $base_uri = new URI($base); + unless (defined $base_uri->scheme) { + $base_uri = URI->new_abs($base_uri, URI::file->cwd); + } + + $uri = URI->new_abs($sys, $base_uri); + } + else { + $uri = new URI($sys); + unless (defined $uri->scheme) { + $uri = URI->new_abs($uri, URI::file->cwd); + } + } + + my $ua = $xp->{_lwpagent}; + unless (defined $ua) { + $ua = $xp->{_lwpagent} = new LWP::UserAgent(); + $ua->env_proxy(); + } + + my $req = new HTTP::Request('GET', $uri); + + my $res = $ua->request($req); + if ($res->is_error) { + $xp->{ErrorMessage} .= "\n" . $res->status_line . " $uri"; + return undef; + } + + $xp->{_BaseStack} ||= []; + push(@{$xp->{_BaseStack}}, $base); + + $xp->base($uri); + + return $res->content; +} # End lwp_ext_ent_handler + +sub lwp_ext_ent_cleanup { + my ($xp) = @_; + + $xp->base(pop(@{$xp->{_BaseStack}})); +} # End lwp_ext_ent_cleanup + +1; diff --git a/Parser/Style/Debug.pm b/Parser/Style/Debug.pm new file mode 100644 index 0000000..7d6b07e --- /dev/null +++ b/Parser/Style/Debug.pm @@ -0,0 +1,52 @@ +# $Id: Debug.pm,v 1.1 2003-07-27 16:07:49 matt Exp $ + +package XML::Parser::Style::Debug; +use strict; + +sub Start { + my $expat = shift; + my $tag = shift; + print STDERR "@{$expat->{Context}} \\\\ (@_)\n"; +} + +sub End { + my $expat = shift; + my $tag = shift; + print STDERR "@{$expat->{Context}} //\n"; +} + +sub Char { + my $expat = shift; + my $text = shift; + $text =~ s/([\x80-\xff])/sprintf "#x%X;", ord $1/eg; + $text =~ s/([\t\n])/sprintf "#%d;", ord $1/eg; + print STDERR "@{$expat->{Context}} || $text\n"; +} + +sub Proc { + my $expat = shift; + my $target = shift; + my $text = shift; + my @foo = @{$expat->{Context}}; + print STDERR "@foo $target($text)\n"; +} + +1; +__END__ + +=head1 NAME + +XML::Parser::Style::Debug - Debug style for XML::Parser + +=head1 SYNOPSIS + + use XML::Parser; + my $p = XML::Parser->new(Style => 'Debug'); + $p->parsefile('foo.xml'); + +=head1 DESCRIPTION + +This just prints out the document in outline form to STDERR. Nothing special is +returned by parse. + +=cut \ No newline at end of file diff --git a/Parser/Style/Objects.pm b/Parser/Style/Objects.pm new file mode 100644 index 0000000..c10e185 --- /dev/null +++ b/Parser/Style/Objects.pm @@ -0,0 +1,78 @@ +# $Id: Objects.pm,v 1.1 2003-08-18 20:20:51 matt Exp $ + +package XML::Parser::Style::Objects; +use strict; + +sub Init { + my $expat = shift; + $expat->{Lists} = []; + $expat->{Curlist} = $expat->{Tree} = []; +} + +sub Start { + my $expat = shift; + my $tag = shift; + my $newlist = [ ]; + my $class = "${$expat}{Pkg}::$tag"; + my $newobj = bless { @_, Kids => $newlist }, $class; + push @{ $expat->{Lists} }, $expat->{Curlist}; + push @{ $expat->{Curlist} }, $newobj; + $expat->{Curlist} = $newlist; +} + +sub End { + my $expat = shift; + my $tag = shift; + $expat->{Curlist} = pop @{ $expat->{Lists} }; +} + +sub Char { + my $expat = shift; + my $text = shift; + my $class = "${$expat}{Pkg}::Characters"; + my $clist = $expat->{Curlist}; + my $pos = $#$clist; + + if ($pos >= 0 and ref($clist->[$pos]) eq $class) { + $clist->[$pos]->{Text} .= $text; + } else { + push @$clist, bless { Text => $text }, $class; + } +} + +sub Final { + my $expat = shift; + delete $expat->{Curlist}; + delete $expat->{Lists}; + $expat->{Tree}; +} + +1; +__END__ + +=head1 NAME + +XML::Parser::Style::Objects + +=head1 SYNOPSIS + + use XML::Parser; + my $p = XML::Parser->new(Style => 'Objects', Pkg => 'MyNode'); + my $tree = $p->parsefile('foo.xml'); + +=head1 DESCRIPTION + +This module implements XML::Parser's Objects style parser. + +This is similar to the Tree style, except that a hash object is created for +each element. The corresponding object will be in the class whose name +is created by appending "::" and the element name to the package set with +the Pkg option. Non-markup text will be in the ::Characters class. The +contents of the corresponding object will be in an anonymous array that +is the value of the Kids property for that object. + +=head1 SEE ALSO + +L + +=cut \ No newline at end of file diff --git a/Parser/Style/Stream.pm b/Parser/Style/Stream.pm new file mode 100644 index 0000000..2907e5f --- /dev/null +++ b/Parser/Style/Stream.pm @@ -0,0 +1,184 @@ +# $Id: Stream.pm,v 1.1 2003-07-27 16:07:49 matt Exp $ + +package XML::Parser::Style::Stream; +use strict; + +# This style invented by Tim Bray + +sub Init { + no strict 'refs'; + my $expat = shift; + $expat->{Text} = ''; + my $sub = $expat->{Pkg} ."::StartDocument"; + &$sub($expat) + if defined(&$sub); +} + +sub Start { + no strict 'refs'; + my $expat = shift; + my $type = shift; + + doText($expat); + $_ = "<$type"; + + %_ = @_; + while (@_) { + $_ .= ' ' . shift() . '="' . shift() . '"'; + } + $_ .= '>'; + + my $sub = $expat->{Pkg} . "::StartTag"; + if (defined(&$sub)) { + &$sub($expat, $type); + } else { + print; + } +} + +sub End { + no strict 'refs'; + my $expat = shift; + my $type = shift; + + # Set right context for Text handler + push(@{$expat->{Context}}, $type); + doText($expat); + pop(@{$expat->{Context}}); + + $_ = ""; + + my $sub = $expat->{Pkg} . "::EndTag"; + if (defined(&$sub)) { + &$sub($expat, $type); + } else { + print; + } +} + +sub Char { + my $expat = shift; + $expat->{Text} .= shift; +} + +sub Proc { + no strict 'refs'; + my $expat = shift; + my $target = shift; + my $text = shift; + + doText($expat); + + $_ = ""; + + my $sub = $expat->{Pkg} . "::PI"; + if (defined(&$sub)) { + &$sub($expat, $target, $text); + } else { + print; + } +} + +sub Final { + no strict 'refs'; + my $expat = shift; + my $sub = $expat->{Pkg} . "::EndDocument"; + &$sub($expat) + if defined(&$sub); +} + +sub doText { + no strict 'refs'; + my $expat = shift; + $_ = $expat->{Text}; + + if (length($_)) { + my $sub = $expat->{Pkg} . "::Text"; + if (defined(&$sub)) { + &$sub($expat); + } else { + print; + } + + $expat->{Text} = ''; + } +} + +1; +__END__ + +=head1 NAME + +XML::Parser::Style::Stream - Stream style for XML::Parser + +=head1 SYNOPSIS + + use XML::Parser; + my $p = XML::Parser->new(Style => 'Stream', Pkg => 'MySubs'); + $p->parsefile('foo.xml'); + + { + package MySubs; + + sub StartTag { + my ($e, $name) = @_; + # do something with start tags + } + + sub EndTag { + my ($e, $name) = @_; + # do something with end tags + } + + sub Characters { + my ($e, $data) = @_; + # do something with text nodes + } + } + +=head1 DESCRIPTION + +This style uses the Pkg option to find subs in a given package to call for each event. +If none of the subs that this +style looks for is there, then the effect of parsing with this style is +to print a canonical copy of the document without comments or declarations. +All the subs receive as their 1st parameter the Expat instance for the +document they're parsing. + +It looks for the following routines: + +=over 4 + +=item * StartDocument + +Called at the start of the parse . + +=item * StartTag + +Called for every start tag with a second parameter of the element type. The $_ +variable will contain a copy of the tag and the %_ variable will contain +attribute values supplied for that element. + +=item * EndTag + +Called for every end tag with a second parameter of the element type. The $_ +variable will contain a copy of the end tag. + +=item * Text + +Called just before start or end tags with accumulated non-markup text in +the $_ variable. + +=item * PI + +Called for processing instructions. The $_ variable will contain a copy of +the PI and the target and data are sent as 2nd and 3rd parameters +respectively. + +=item * EndDocument + +Called at conclusion of the parse. + +=back + +=cut \ No newline at end of file diff --git a/Parser/Style/Subs.pm b/Parser/Style/Subs.pm new file mode 100644 index 0000000..b2b158a --- /dev/null +++ b/Parser/Style/Subs.pm @@ -0,0 +1,58 @@ +# $Id: Subs.pm,v 1.1 2003-07-27 16:07:49 matt Exp $ + +package XML::Parser::Style::Subs; + +sub Start { + no strict 'refs'; + my $expat = shift; + my $tag = shift; + my $sub = $expat->{Pkg} . "::$tag"; + eval { &$sub($expat, $tag, @_) }; +} + +sub End { + no strict 'refs'; + my $expat = shift; + my $tag = shift; + my $sub = $expat->{Pkg} . "::${tag}_"; + eval { &$sub($expat, $tag) }; +} + +1; +__END__ + +=head1 NAME + +XML::Parser::Style::Subs + +=head1 SYNOPSIS + + use XML::Parser; + my $p = XML::Parser->new(Style => 'Subs', Pkg => 'MySubs'); + $p->parsefile('foo.xml'); + + { + package MySubs; + + sub foo { + # start of foo tag + } + + sub foo_ { + # end of foo tag + } + } + +=head1 DESCRIPTION + +Each time an element starts, a sub by that name in the package specified +by the Pkg option is called with the same parameters that the Start +handler gets called with. + +Each time an element ends, a sub with that name appended with an underscore +("_"), is called with the same parameters that the End handler gets called +with. + +Nothing special is returned by parse. + +=cut \ No newline at end of file diff --git a/Parser/Style/Tree.pm b/Parser/Style/Tree.pm new file mode 100644 index 0000000..f8df3bd --- /dev/null +++ b/Parser/Style/Tree.pm @@ -0,0 +1,90 @@ +# $Id: Tree.pm,v 1.2 2003-07-31 07:54:51 matt Exp $ + +package XML::Parser::Style::Tree; +$XML::Parser::Built_In_Styles{Tree} = 1; + +sub Init { + my $expat = shift; + $expat->{Lists} = []; + $expat->{Curlist} = $expat->{Tree} = []; +} + +sub Start { + my $expat = shift; + my $tag = shift; + my $newlist = [ { @_ } ]; + push @{ $expat->{Lists} }, $expat->{Curlist}; + push @{ $expat->{Curlist} }, $tag => $newlist; + $expat->{Curlist} = $newlist; +} + +sub End { + my $expat = shift; + my $tag = shift; + $expat->{Curlist} = pop @{ $expat->{Lists} }; +} + +sub Char { + my $expat = shift; + my $text = shift; + my $clist = $expat->{Curlist}; + my $pos = $#$clist; + + if ($pos > 0 and $clist->[$pos - 1] eq '0') { + $clist->[$pos] .= $text; + } else { + push @$clist, 0 => $text; + } +} + +sub Final { + my $expat = shift; + delete $expat->{Curlist}; + delete $expat->{Lists}; + $expat->{Tree}; +} + +1; +__END__ + +=head1 NAME + +XML::Parser::Style::Tree + +=head1 SYNOPSIS + + use XML::Parser; + my $p = XML::Parser->new(Style => 'Tree'); + my $tree = $p->parsefile('foo.xml'); + +=head1 DESCRIPTION + +This module implements XML::Parser's Tree style parser. + +When parsing a document, C will return a parse tree for the +document. Each node in the tree +takes the form of a tag, content pair. Text nodes are represented with +a pseudo-tag of "0" and the string that is their content. For elements, +the content is an array reference. The first item in the array is a +(possibly empty) hash reference containing attributes. The remainder of +the array is a sequence of tag-content pairs representing the content +of the element. + +So for example the result of parsing: + + Hello thereHowdydo + +would be: + Tag Content + ================================================================== + [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], + bar, [ {}, 0, "Howdy", ref, [{}]], + 0, "do" + ] + ] + +The root document "foo", has 3 children: a "head" element, a "bar" +element and the text "do". After the empty attribute hash, these are +represented in it's contents by 3 tag-content pairs. + +=cut -- cgit v1.2.1