summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIsaac Muse <isaacmuse@gmail.com>2019-01-06 14:08:32 -0700
committerIsaac Muse <isaacmuse@gmail.com>2019-01-06 14:08:32 -0700
commitde264e1294a5e2a9a00685e729f2eb0ec5129330 (patch)
tree30ca6313e245fd6e30d601ecb218e2f84812d878
parent48c7ad53dade2827e89984ff40561f2d70bb6e46 (diff)
downloadbeautifulsoup4-de264e1294a5e2a9a00685e729f2eb0ec5129330.tar.gz
Don't track un-prefixed namespaces
-rw-r--r--bs4/builder/_lxml.py12
-rw-r--r--bs4/tests/test_lxml.py18
2 files changed, 24 insertions, 6 deletions
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 3a1700c..b7e172c 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -71,12 +71,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
This might be useful later on when creating CSS selectors.
"""
for key, value in mapping.items():
- if key not in self.soup._namespaces:
- # Let the BeautifulSoup object know about a new namespace.
- # If there are multiple namespaces defined with the same
- # prefix, the first one in the document takes precedence.
- self.soup._namespaces[key] = value
-
+ if key and key not in self.soup._namespaces:
+ # Let the BeautifulSoup object know about a new namespace.
+ # If there are multiple namespaces defined with the same
+ # prefix, the first one in the document takes precedence.
+ self.soup._namespaces[key] = value
+
def default_parser(self, encoding):
# This can either return a parser object or a class, which
# will be instantiated with default arguments.
diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py
index 8a8f690..1a4f27c 100644
--- a/bs4/tests/test_lxml.py
+++ b/bs4/tests/test_lxml.py
@@ -80,3 +80,21 @@ class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
@property
def default_builder(self):
return LXMLTreeBuilderForXML()
+
+ def test_namespace_indexing(self):
+ # We should not track un-prefixed namespaces as we can only hold one
+ # and it will be recognized as the default namespace by soupsieve,
+ # which may be confusing in some situations. When no namespace is provided
+ # for a selector, the default namespace (if defined) is assumed.
+
+ soup = self.soup(
+ '<?xml version="1.1"?>\n'
+ '<root>'
+ '<tag xmlns="http://unprefixed-namespace.com">content</tag>'
+ '<prefix:tag xmlns:prefix="http://prefixed-namespace.com">content</tag>'
+ '</root>'
+ )
+ self.assertEqual(
+ soup._namespaces,
+ {'xml': 'http://www.w3.org/XML/1998/namespace', 'prefix': 'http://prefixed-namespace.com'}
+ )