summaryrefslogtreecommitdiff
path: root/src/lxml/dtd.pxi
blob: d1913b426e86569f923de1be96c678f7eae299e1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
# support for DTD validation
from lxml.includes cimport dtdvalid

class DTDError(LxmlError):
    u"""Base class for DTD errors.
    """
    pass

class DTDParseError(DTDError):
    u"""Error while parsing a DTD.
    """
    pass

class DTDValidateError(DTDError):
    u"""Error while validating an XML document with a DTD.
    """
    pass

cdef inline int _assertValidDTDNode(node, void *c_node) except -1:
    assert c_node is not NULL, u"invalid DTD proxy at %s" % id(node)


@cython.final
@cython.internal
@cython.freelist(8)
cdef class _DTDElementContentDecl:
    cdef DTD _dtd
    cdef tree.xmlElementContent* _c_node

    def __repr__(self):
        return "<%s.%s object name=%r type=%r occur=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.type, self.occur, id(self))

    property name:
       def __get__(self):
           _assertValidDTDNode(self, self._c_node)
           return funicode(self._c_node.name) if self._c_node.name is not NULL else None

    property type:
       def __get__(self):
           _assertValidDTDNode(self, self._c_node)
           cdef int type = self._c_node.type
           if type == tree.XML_ELEMENT_CONTENT_PCDATA:
               return "pcdata"
           elif type == tree.XML_ELEMENT_CONTENT_ELEMENT:
               return "element"
           elif type == tree.XML_ELEMENT_CONTENT_SEQ:
               return "seq"
           elif type == tree.XML_ELEMENT_CONTENT_OR:
               return "or"
           else:
               return None

    property occur:
       def __get__(self):
           _assertValidDTDNode(self, self._c_node)
           cdef int occur = self._c_node.ocur
           if occur == tree.XML_ELEMENT_CONTENT_ONCE:
               return "once"
           elif occur == tree.XML_ELEMENT_CONTENT_OPT:
               return "opt"
           elif occur == tree.XML_ELEMENT_CONTENT_MULT:
               return "mult"
           elif occur == tree.XML_ELEMENT_CONTENT_PLUS:
               return "plus"
           else:
               return None

    property left:
       def __get__(self):
           _assertValidDTDNode(self, self._c_node)
           c1 = self._c_node.c1
           if c1:
               node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
               node._dtd = self._dtd
               node._c_node = <tree.xmlElementContent*>c1
               return node
           else:
               return None

    property right:
       def __get__(self):
           _assertValidDTDNode(self, self._c_node)
           c2 = self._c_node.c2
           if c2:
               node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
               node._dtd = self._dtd
               node._c_node = <tree.xmlElementContent*>c2
               return node
           else:
               return None


@cython.final
@cython.internal
@cython.freelist(8)
cdef class _DTDAttributeDecl:
    cdef DTD _dtd
    cdef tree.xmlAttribute* _c_node

    def __repr__(self):
        return "<%s.%s object name=%r elemname=%r prefix=%r type=%r default=%r default_value=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.elemname, self.prefix, self.type, self.default, self.default_value, id(self))

    property name:
       def __get__(self):
           _assertValidDTDNode(self, self._c_node)
           return funicode(self._c_node.name) if self._c_node.name is not NULL else None

    property elemname:
       def __get__(self):
           _assertValidDTDNode(self, self._c_node)
           return funicode(self._c_node.elem) if self._c_node.elem is not NULL else None

    property prefix:
       def __get__(self):
           _assertValidDTDNode(self, self._c_node)
           return funicode(self._c_node.prefix) if self._c_node.prefix is not NULL else None

    property type:
       def __get__(self):
           _assertValidDTDNode(self, self._c_node)
           cdef int type = self._c_node.atype
           if type == tree.XML_ATTRIBUTE_CDATA:
               return "cdata"
           elif type == tree.XML_ATTRIBUTE_ID:
               return "id"
           elif type == tree.XML_ATTRIBUTE_IDREF:
               return "idref"
           elif type == tree.XML_ATTRIBUTE_IDREFS:
               return "idrefs"
           elif type == tree.XML_ATTRIBUTE_ENTITY:
               return "entity"
           elif type == tree.XML_ATTRIBUTE_ENTITIES:
               return "entities"
           elif type == tree.XML_ATTRIBUTE_NMTOKEN:
               return "nmtoken"
           elif type == tree.XML_ATTRIBUTE_NMTOKENS:
               return "nmtokens"
           elif type == tree.XML_ATTRIBUTE_ENUMERATION:
               return "enumeration"
           elif type == tree.XML_ATTRIBUTE_NOTATION:
               return "notation"
           else:
               return None

    property default:
       def __get__(self):
           _assertValidDTDNode(self, self._c_node)
           cdef int default = self._c_node.def_
           if default == tree.XML_ATTRIBUTE_NONE:
               return "none"
           elif default == tree.XML_ATTRIBUTE_REQUIRED:
               return "required"
           elif default == tree.XML_ATTRIBUTE_IMPLIED:
               return "implied"
           elif default == tree.XML_ATTRIBUTE_FIXED:
               return "fixed"
           else:
               return None

    property default_value:
       def __get__(self):
           _assertValidDTDNode(self, self._c_node)
           return funicode(self._c_node.defaultValue) if self._c_node.defaultValue is not NULL else None

    def itervalues(self):
        _assertValidDTDNode(self, self._c_node)
        cdef tree.xmlEnumeration *c_node = self._c_node.tree
        while c_node is not NULL:
            yield funicode(c_node.name)
            c_node = c_node.next

    def values(self):
        return list(self.itervalues())


@cython.final
@cython.internal
@cython.freelist(8)
cdef class _DTDElementDecl:
    cdef DTD _dtd
    cdef tree.xmlElement* _c_node

    def __repr__(self):
        return "<%s.%s object name=%r prefix=%r type=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.prefix, self.type, id(self))

    property name:
        def __get__(self):
            _assertValidDTDNode(self, self._c_node)
            return funicode(self._c_node.name) if self._c_node.name is not NULL else None

    property prefix:
       def __get__(self):
           _assertValidDTDNode(self, self._c_node)
           return funicode(self._c_node.prefix) if self._c_node.prefix is not NULL else None

    property type:
        def __get__(self):
           _assertValidDTDNode(self, self._c_node)
           cdef int type = self._c_node.etype
           if type == tree.XML_ELEMENT_TYPE_UNDEFINED:
               return "undefined"
           elif type == tree.XML_ELEMENT_TYPE_EMPTY:
               return "empty"
           elif type == tree.XML_ELEMENT_TYPE_ANY:
               return "any"
           elif type == tree.XML_ELEMENT_TYPE_MIXED:
               return "mixed"
           elif type == tree.XML_ELEMENT_TYPE_ELEMENT:
               return "element"
           else:
               return None

    property content:
       def __get__(self):
           _assertValidDTDNode(self, self._c_node)
           cdef tree.xmlElementContent *content = self._c_node.content
           if content:
               node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
               node._dtd = self._dtd
               node._c_node = content
               return node
           else:
               return None

    def iterattributes(self):
        _assertValidDTDNode(self, self._c_node)
        cdef tree.xmlAttribute *c_node = self._c_node.attributes
        while c_node:
            node = <_DTDAttributeDecl>_DTDAttributeDecl.__new__(_DTDAttributeDecl)
            node._dtd = self._dtd
            node._c_node = c_node
            yield node
            c_node = c_node.nexth

    def attributes(self):
        return list(self.iterattributes())


@cython.final
@cython.internal
@cython.freelist(8)
cdef class _DTDEntityDecl:
    cdef DTD _dtd
    cdef tree.xmlEntity* _c_node
    def __repr__(self):
        return "<%s.%s object name=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))

    property name:
        def __get__(self):
            _assertValidDTDNode(self, self._c_node)
            return funicode(self._c_node.name) if self._c_node.name is not NULL else None

    property orig:
        def __get__(self):
            _assertValidDTDNode(self, self._c_node)
            return funicode(self._c_node.orig) if self._c_node.orig is not NULL else None

    property content:
        def __get__(self):
            _assertValidDTDNode(self, self._c_node)
            return funicode(self._c_node.content) if self._c_node.content is not NULL else None


################################################################################
# DTD

cdef class DTD(_Validator):
    u"""DTD(self, file=None, external_id=None)
    A DTD validator.

    Can load from filesystem directly given a filename or file-like object.
    Alternatively, pass the keyword parameter ``external_id`` to load from a
    catalog.
    """
    cdef tree.xmlDtd* _c_dtd
    def __init__(self, file=None, *, external_id=None):
        _Validator.__init__(self)
        if file is not None:
            if _isString(file):
                file = _encodeFilename(file)
                with self._error_log:
                    self._c_dtd = xmlparser.xmlParseDTD(NULL, _xcstr(file))
            elif hasattr(file, 'read'):
                self._c_dtd = _parseDtdFromFilelike(file)
            else:
                raise DTDParseError, u"file must be a filename or file-like object"
        elif external_id is not None:
            with self._error_log:
                self._c_dtd = xmlparser.xmlParseDTD(<const_xmlChar*>external_id, NULL)
        else:
            raise DTDParseError, u"either filename or external ID required"

        if self._c_dtd is NULL:
            raise DTDParseError(
                self._error_log._buildExceptionMessage(u"error parsing DTD"),
                self._error_log)

    property name:
       def __get__(self):
           if self._c_dtd is NULL:
               return None
           return funicodeOrNone(self._c_dtd.name)

    property external_id:
       def __get__(self):
           if self._c_dtd is NULL:
               return None
           return funicodeOrNone(self._c_dtd.ExternalID)

    property system_url:
       def __get__(self):
           if self._c_dtd is NULL:
               return None
           return funicodeOrNone(self._c_dtd.SystemID)

    def iterelements(self):
        cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL
        while c_node is not NULL:
            if c_node.type == tree.XML_ELEMENT_DECL:
                node = _DTDElementDecl()
                node._dtd = self
                node._c_node = <tree.xmlElement*>c_node
                yield node
            c_node = c_node.next

    def elements(self):
        return list(self.iterelements())

    def iterentities(self):
        cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL
        while c_node is not NULL:
            if c_node.type == tree.XML_ENTITY_DECL:
                node = _DTDEntityDecl()
                node._dtd = self
                node._c_node = <tree.xmlEntity*>c_node
                yield node
            c_node = c_node.next

    def entities(self):
        return list(self.iterentities())

    def __dealloc__(self):
        tree.xmlFreeDtd(self._c_dtd)

    def __call__(self, etree):
        u"""__call__(self, etree)

        Validate doc using the DTD.

        Returns true if the document is valid, false if not.
        """
        cdef _Document doc
        cdef _Element root_node
        cdef xmlDoc* c_doc
        cdef dtdvalid.xmlValidCtxt* valid_ctxt
        cdef int ret = -1

        assert self._c_dtd is not NULL, "DTD not initialised"
        doc = _documentOrRaise(etree)
        root_node = _rootNodeOrRaise(etree)

        valid_ctxt = dtdvalid.xmlNewValidCtxt()
        if valid_ctxt is NULL:
            raise DTDError(u"Failed to create validation context")

        # work around error reporting bug in libxml2 <= 2.9.1 (and later?)
        # https://bugzilla.gnome.org/show_bug.cgi?id=724903
        valid_ctxt.error = <dtdvalid.xmlValidityErrorFunc>_nullGenericErrorFunc
        valid_ctxt.userData = NULL

        try:
            with self._error_log:
                c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
                ret = dtdvalid.xmlValidateDtd(valid_ctxt, c_doc, self._c_dtd)
                _destroyFakeDoc(doc._c_doc, c_doc)
        finally:
            dtdvalid.xmlFreeValidCtxt(valid_ctxt)

        if ret == -1:
            raise DTDValidateError(u"Internal error in DTD validation",
                                   self._error_log)
        return ret == 1


cdef tree.xmlDtd* _parseDtdFromFilelike(file) except NULL:
    cdef _ExceptionContext exc_context
    cdef _FileReaderContext dtd_parser
    cdef _ErrorLog error_log
    cdef tree.xmlDtd* c_dtd
    exc_context = _ExceptionContext()
    dtd_parser = _FileReaderContext(file, exc_context, None)
    error_log = _ErrorLog()

    with error_log:
        c_dtd = dtd_parser._readDtd()

    exc_context._raise_if_stored()
    if c_dtd is NULL:
        raise DTDParseError(u"error parsing DTD", error_log)
    return c_dtd

cdef DTD _dtdFactory(tree.xmlDtd* c_dtd):
    # do not run through DTD.__init__()!
    cdef DTD dtd
    if c_dtd is NULL:
        return None
    dtd = DTD.__new__(DTD)
    dtd._c_dtd = _copyDtd(c_dtd)
    _Validator.__init__(dtd)
    return dtd


cdef tree.xmlDtd* _copyDtd(tree.xmlDtd* c_orig_dtd) except NULL:
    """
    Copy a DTD.  libxml2 (currently) fails to set up the element->attributes
    links when copying DTDs, so we have to rebuild them here.
    """
    c_dtd = tree.xmlCopyDtd(c_orig_dtd)
    if not c_dtd:
        raise MemoryError
    cdef tree.xmlNode* c_node = c_dtd.children
    while c_node:
        if c_node.type == tree.XML_ATTRIBUTE_DECL:
            _linkDtdAttribute(c_dtd, <tree.xmlAttribute*>c_node)
        c_node = c_node.next
    return c_dtd


cdef void _linkDtdAttribute(tree.xmlDtd* c_dtd, tree.xmlAttribute* c_attr):
    """
    Create the link to the DTD attribute declaration from the corresponding
    element declaration.
    """
    c_elem = dtdvalid.xmlGetDtdElementDesc(c_dtd, c_attr.elem)
    if not c_elem:
        # no such element? something is wrong with the DTD ...
        return
    c_pos = c_elem.attributes
    if not c_pos:
        c_elem.attributes = c_attr
        c_attr.nexth = NULL
        return
    # libxml2 keeps namespace declarations first, and we need to make
    # sure we don't re-insert attributes that are already there
    if _isDtdNsDecl(c_attr):
        if not _isDtdNsDecl(c_pos):
            c_elem.attributes = c_attr
            c_attr.nexth = c_pos
            return
        while c_pos != c_attr and c_pos.nexth and _isDtdNsDecl(c_pos.nexth):
            c_pos = c_pos.nexth
    else:
        # append at end
        while c_pos != c_attr and c_pos.nexth:
            c_pos = c_pos.nexth
    if c_pos == c_attr:
        return
    c_attr.nexth = c_pos.nexth
    c_pos.nexth = c_attr


cdef bint _isDtdNsDecl(tree.xmlAttribute* c_attr):
    if cstring_h.strcmp(<const_char*>c_attr.name, "xmlns") == 0:
        return True
    if (c_attr.prefix is not NULL and
            cstring_h.strcmp(<const_char*>c_attr.prefix, "xmlns") == 0):
        return True
    return False