summaryrefslogtreecommitdiff
path: root/src/zope/tal/xmlparser.py
blob: 8e52a02e8af9b758f43b4b2d7bf8074e42c8280a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Generic Expat-based XML parser base class.

This creates a parser with namespace processing enabled.

$Id$
"""
import logging


class XMLParser(object):

    ordered_attributes = 0

    handler_names = [
        "StartElementHandler",
        "EndElementHandler",
        "ProcessingInstructionHandler",
        "CharacterDataHandler",
        "UnparsedEntityDeclHandler",
        "NotationDeclHandler",
        "StartNamespaceDeclHandler",
        "EndNamespaceDeclHandler",
        "CommentHandler",
        "StartCdataSectionHandler",
        "EndCdataSectionHandler",
        "DefaultHandler",
        "DefaultHandlerExpand",
        "NotStandaloneHandler",
        "ExternalEntityRefHandler",
        "XmlDeclHandler",
        "StartDoctypeDeclHandler",
        "EndDoctypeDeclHandler",
        "ElementDeclHandler",
        "AttlistDeclHandler"
        ]

    def __init__(self, encoding=None):
        self.parser = p = self.createParser(encoding)
        if self.ordered_attributes:
            try:
                self.parser.ordered_attributes = self.ordered_attributes
            except AttributeError:
                logging.warn("TAL.XMLParser: Can't set ordered_attributes")
                self.ordered_attributes = 0
        for name in self.handler_names:
            method = getattr(self, name, None)
            if method is not None:
                try:
                    setattr(p, name, method)
                except AttributeError:
                    logging.error("TAL.XMLParser: Can't set "
                                  "expat handler %s" % name)

    def createParser(self, encoding=None):
        global XMLParseError
        from xml.parsers import expat
        XMLParseError = expat.ExpatError
        return expat.ParserCreate(encoding, ' ')

    def parseFile(self, filename):
        self.parseStream(open(filename))

    def parseString(self, s):
        if isinstance(s, unicode):
            # Expat cannot deal with unicode strings, only with
            # encoded ones.  Also, its range of encodings is rather
            # limited, UTF-8 is the safest bet here.
            s = s.encode('utf-8')
        self.parser.Parse(s, 1)

    def parseURL(self, url):
        import urllib
        self.parseStream(urllib.urlopen(url))

    def parseStream(self, stream):
        self.parser.ParseFile(stream)

    def parseFragment(self, s, end=0):
        self.parser.Parse(s, end)

    def getpos(self):
        # Apparently ErrorLineNumber and ErrorLineNumber contain the current
        # position even when there was no error.  This contradicts the official
        # documentation[1], but expat.h[2] contains the following definition:
        #
        #   /* For backwards compatibility with previous versions. */
        #   #define XML_GetErrorLineNumber   XML_GetCurrentLineNumber
        #
        # [1] http://python.org/doc/current/lib/xmlparser-objects.html
        # [2] http://cvs.sourceforge.net/viewcvs.py/expat/expat/lib/expat.h
        return (self.parser.ErrorLineNumber, self.parser.ErrorColumnNumber)