summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Heimes <christian@python.org>2018-09-23 09:50:25 +0200
committerMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2018-09-23 00:50:25 -0700
commit17b1d5d4e36aa57a9b25a0e694affbd1ee637e45 (patch)
tree486acd3328d5e607bd05936fdfb73eb548d4fa90
parent9fb051f032c36b9f6086b79086b4d6b7755a3d70 (diff)
downloadcpython-git-17b1d5d4e36aa57a9b25a0e694affbd1ee637e45.tar.gz
bpo-17239: Disable external entities in SAX parser (GH-9217)
The SAX parser no longer processes general external entities by default to increase security. Before, the parser created network connections to fetch remote files or loaded local files from the file system for DTD and entities. Signed-off-by: Christian Heimes <christian@python.org> https://bugs.python.org/issue17239
-rw-r--r--Doc/library/xml.dom.pulldom.rst14
-rw-r--r--Doc/library/xml.rst6
-rw-r--r--Doc/library/xml.sax.rst8
-rw-r--r--Doc/whatsnew/3.8.rst12
-rw-r--r--Lib/test/test_pulldom.py7
-rw-r--r--Lib/test/test_sax.py60
-rw-r--r--Lib/test/test_xml_etree.py13
-rw-r--r--Lib/xml/sax/expatreader.py2
-rw-r--r--Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst3
9 files changed, 120 insertions, 5 deletions
diff --git a/Doc/library/xml.dom.pulldom.rst b/Doc/library/xml.dom.pulldom.rst
index 56f545c0e6..eb2b16bd6c 100644
--- a/Doc/library/xml.dom.pulldom.rst
+++ b/Doc/library/xml.dom.pulldom.rst
@@ -25,6 +25,20 @@ events until either processing is finished or an error condition occurs.
maliciously constructed data. If you need to parse untrusted or
unauthenticated data see :ref:`xml-vulnerabilities`.
+.. versionchanged:: 3.8
+
+ The SAX parser no longer processes general external entities by default to
+ increase security by default. To enable processing of external entities,
+ pass a custom parser instance in::
+
+ from xml.dom.pulldom import parse
+ from xml.sax import make_parser
+ from xml.sax.handler import feature_external_ges
+
+ parser = make_parser()
+ parser.setFeature(feature_external_ges, True)
+ parse(filename, parser=parser)
+
Example::
diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst
index 63c24f80ac..9b8ba6b17c 100644
--- a/Doc/library/xml.rst
+++ b/Doc/library/xml.rst
@@ -65,8 +65,8 @@ kind sax etree minidom p
========================= ============== =============== ============== ============== ==============
billion laughs **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable**
quadratic blowup **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable** **Vulnerable**
-external entity expansion **Vulnerable** Safe (1) Safe (2) **Vulnerable** Safe (3)
-`DTD`_ retrieval **Vulnerable** Safe Safe **Vulnerable** Safe
+external entity expansion Safe (4) Safe (1) Safe (2) Safe (4) Safe (3)
+`DTD`_ retrieval Safe (4) Safe Safe Safe (4) Safe
decompression bomb Safe Safe Safe Safe **Vulnerable**
========================= ============== =============== ============== ============== ==============
@@ -75,6 +75,8 @@ decompression bomb Safe Safe Safe S
2. :mod:`xml.dom.minidom` doesn't expand external entities and simply returns
the unexpanded entity verbatim.
3. :mod:`xmlrpclib` doesn't expand external entities and omits them.
+4. Since Python 3.8.0, external general entities are no longer processed by
+ default since Python.
billion laughs / exponential entity expansion
diff --git a/Doc/library/xml.sax.rst b/Doc/library/xml.sax.rst
index 78d6633e09..aa3ea9bfc5 100644
--- a/Doc/library/xml.sax.rst
+++ b/Doc/library/xml.sax.rst
@@ -24,6 +24,14 @@ the SAX API.
constructed data. If you need to parse untrusted or unauthenticated data see
:ref:`xml-vulnerabilities`.
+.. versionchanged:: 3.8
+
+ The SAX parser no longer processes general external entities by default
+ to increase security. Before, the parser created network connections
+ to fetch remote files or loaded local files from the file
+ system for DTD and entities. The feature can be enabled again with method
+ :meth:`~xml.sax.xmlreader.XMLReader.setFeature` on the parser object
+ and argument :data:`~xml.sax.handler.feature_external_ges`.
The convenience functions are:
diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst
index 9aaaa761cc..e37a70f32d 100644
--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@@ -155,6 +155,15 @@ venv
activating virtual environments under PowerShell Core 6.1.
(Contributed by Brett Cannon in :issue:`32718`.)
+xml
+---
+
+* As mitigation against DTD and external entity retrieval, the
+ :mod:`xml.dom.minidom` and mod:`xml.sax` modules no longer process
+ external entities by default.
+ (Contributed by Christian Heimes in :issue:`17239`.)
+
+
Optimizations
=============
@@ -333,6 +342,9 @@ Changes in the Python API
* :class:`uuid.UUID` now uses ``__slots__``, therefore instances can no longer
be weak-referenced and attributes can no longer be added.
+* :mod:`xml.dom.minidom` and mod:`xml.sax` modules no longer process
+ external entities by default.
+ (Contributed by Christian Heimes in :issue:`17239`.)
CPython bytecode changes
------------------------
diff --git a/Lib/test/test_pulldom.py b/Lib/test/test_pulldom.py
index f454098c65..4a1bad3442 100644
--- a/Lib/test/test_pulldom.py
+++ b/Lib/test/test_pulldom.py
@@ -3,6 +3,7 @@ import unittest
import xml.sax
from xml.sax.xmlreader import AttributesImpl
+from xml.sax.handler import feature_external_ges
from xml.dom import pulldom
from test.support import findfile
@@ -166,6 +167,12 @@ class PullDOMTestCase(unittest.TestCase):
# This should have returned 'END_ELEMENT'.
self.assertEqual(parser[-1][0], pulldom.START_DOCUMENT)
+ def test_external_ges_default(self):
+ parser = pulldom.parseString(SMALL_SAMPLE)
+ saxparser = parser.parser
+ ges = saxparser.getFeature(feature_external_ges)
+ self.assertEqual(ges, False)
+
class ThoroughTestCase(unittest.TestCase):
"""Test the hard-to-reach parts of pulldom."""
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
index 2eb62905ff..3044960a0e 100644
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -13,13 +13,14 @@ except SAXReaderNotAvailable:
from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
XMLFilterBase, prepare_input_source
from xml.sax.expatreader import create_parser
-from xml.sax.handler import feature_namespaces
+from xml.sax.handler import feature_namespaces, feature_external_ges
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
from io import BytesIO, StringIO
import codecs
import gc
import os.path
import shutil
+from urllib.error import URLError
from test import support
from test.support import findfile, run_unittest, TESTFN
@@ -911,6 +912,18 @@ class ExpatReaderTest(XmlTestBase):
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
self._entities.append((name, publicId, systemId, ndata))
+
+ class TestEntityRecorder:
+ def __init__(self):
+ self.entities = []
+
+ def resolveEntity(self, publicId, systemId):
+ self.entities.append((publicId, systemId))
+ source = InputSource()
+ source.setPublicId(publicId)
+ source.setSystemId(systemId)
+ return source
+
def test_expat_dtdhandler(self):
parser = create_parser()
handler = self.TestDTDHandler()
@@ -927,6 +940,32 @@ class ExpatReaderTest(XmlTestBase):
[("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)])
self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")])
+ def test_expat_external_dtd_enabled(self):
+ parser = create_parser()
+ parser.setFeature(feature_external_ges, True)
+ resolver = self.TestEntityRecorder()
+ parser.setEntityResolver(resolver)
+
+ with self.assertRaises(URLError):
+ parser.feed(
+ '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
+ )
+ self.assertEqual(
+ resolver.entities, [(None, 'unsupported://non-existing')]
+ )
+
+ def test_expat_external_dtd_default(self):
+ parser = create_parser()
+ resolver = self.TestEntityRecorder()
+ parser.setEntityResolver(resolver)
+
+ parser.feed(
+ '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
+ )
+ parser.feed('<doc />')
+ parser.close()
+ self.assertEqual(resolver.entities, [])
+
# ===== EntityResolver support
class TestEntityResolver:
@@ -936,8 +975,9 @@ class ExpatReaderTest(XmlTestBase):
inpsrc.setByteStream(BytesIO(b"<entity/>"))
return inpsrc
- def test_expat_entityresolver(self):
+ def test_expat_entityresolver_enabled(self):
parser = create_parser()
+ parser.setFeature(feature_external_ges, True)
parser.setEntityResolver(self.TestEntityResolver())
result = BytesIO()
parser.setContentHandler(XMLGenerator(result))
@@ -951,6 +991,22 @@ class ExpatReaderTest(XmlTestBase):
self.assertEqual(result.getvalue(), start +
b"<doc><entity></entity></doc>")
+ def test_expat_entityresolver_default(self):
+ parser = create_parser()
+ self.assertEqual(parser.getFeature(feature_external_ges), False)
+ parser.setEntityResolver(self.TestEntityResolver())
+ result = BytesIO()
+ parser.setContentHandler(XMLGenerator(result))
+
+ parser.feed('<!DOCTYPE doc [\n')
+ parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
+ parser.feed(']>\n')
+ parser.feed('<doc>&test;</doc>')
+ parser.close()
+
+ self.assertEqual(result.getvalue(), start +
+ b"<doc></doc>")
+
# ===== Attributes support
class AttrGatherer(ContentHandler):
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index a52529051b..ecb910f04f 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -91,6 +91,12 @@ ENTITY_XML = """\
<document>&entity;</document>
"""
+EXTERNAL_ENTITY_XML = """\
+<!DOCTYPE points [
+<!ENTITY entity SYSTEM "file:///non-existing-file.xml">
+]>
+<document>&entity;</document>
+"""
def checkwarnings(*filters, quiet=False):
def decorator(test):
@@ -861,6 +867,13 @@ class ElementTreeTest(unittest.TestCase):
root = parser.close()
self.serialize_check(root, '<document>text</document>')
+ # 4) external (SYSTEM) entity
+
+ with self.assertRaises(ET.ParseError) as cm:
+ ET.XML(EXTERNAL_ENTITY_XML)
+ self.assertEqual(str(cm.exception),
+ 'undefined entity &entity;: line 4, column 10')
+
def test_namespace(self):
# Test namespace issues.
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
index 421358fa5b..5066ffc2fa 100644
--- a/Lib/xml/sax/expatreader.py
+++ b/Lib/xml/sax/expatreader.py
@@ -95,7 +95,7 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
self._lex_handler_prop = None
self._parsing = 0
self._entity_stack = []
- self._external_ges = 1
+ self._external_ges = 0
self._interning = None
# XMLReader methods
diff --git a/Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst b/Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst
new file mode 100644
index 0000000000..8dd0fe8c1b
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2018-09-11-18-30-55.bpo-17239.kOpwK2.rst
@@ -0,0 +1,3 @@
+The xml.sax and xml.dom.minidom parsers no longer processes external
+entities by default. External DTD and ENTITY declarations no longer
+load files or create network connections.