summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorStefan Behnel <scoder@users.berlios.de>2011-09-07 00:14:32 +0200
committerStefan Behnel <scoder@users.berlios.de>2011-09-07 00:14:32 +0200
commitb9bc13fdb63e06b1b7eeb3bd1039064b4ffba9af (patch)
tree0ff7ab0ee6463600160004fbbbad2af5cf998280 /src
parent65c8a3ff4b039fdd49be98f651849bb01d60a0c5 (diff)
downloadpython-lxml-b9bc13fdb63e06b1b7eeb3bd1039064b4ffba9af.tar.gz
support pi.get() and pi.attrib by parsing the text context for pseudo-attributes
Diffstat (limited to 'src')
-rw-r--r--src/lxml/lxml.etree.pyx26
-rw-r--r--src/lxml/tests/test_etree.py22
-rw-r--r--src/lxml/xslt.pxi36
3 files changed, 57 insertions, 27 deletions
diff --git a/src/lxml/lxml.etree.pyx b/src/lxml/lxml.etree.pyx
index de97facf..8f5a1a51 100644
--- a/src/lxml/lxml.etree.pyx
+++ b/src/lxml/lxml.etree.pyx
@@ -1579,6 +1579,32 @@ cdef class _ProcessingInstruction(__ContentOnlyElement):
else:
return u"<?%s?>" % self.target
+ def get(self, key, default=None):
+ u"""get(self, key, default=None)
+
+ Try to parse pseudo-attributes from the text content of the
+ processing instruction, search for one with the given key as
+ name and return its associated value.
+
+ Note that this is only a convenience method for the most
+ common case that all text content is structured in
+ attribute-like name-value pairs with properly quoted values.
+ It is not guaranteed to work for all possible text content.
+ """
+ return self.attrib.get(key, default)
+
+ property attrib:
+ u"""Returns a dict containing all pseudo-attributes that can be
+ parsed from the text content of this processing instruction.
+ Note that modifying the dict currently has no effect on the
+ XML node, although this is not guaranteed to stay this way.
+ """
+ def __get__(self):
+ return { attr : (value1 or value2)
+ for attr, value1, value2 in _FIND_PI_ATTRIBUTES(u' ' + self.text) }
+
+cdef object _FIND_PI_ATTRIBUTES = re.compile(ur'\s+(\w+)\s*=\s*(?:\'([^\']*)\'|"([^"]*)")', re.U).findall
+
cdef class _Entity(__ContentOnlyElement):
property tag:
def __get__(self):
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
index 9a7dd63c..093c0f11 100644
--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
@@ -449,6 +449,28 @@ class ETreeOnlyTestCase(HelperTestCase):
self.assertEquals(root[0].target, "mypi")
self.assertEquals(root[0].text, "my test ")
+ def test_pi_pseudo_attributes_get(self):
+ XML = self.etree.XML
+ root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
+ self.assertEquals(root[0].target, "mypi")
+ self.assertEquals(root[0].get('my'), "1")
+ self.assertEquals(root[0].get('test'), " abc ")
+ self.assertEquals(root[0].get('quotes'), "' '")
+ self.assertEquals(root[0].get('only'), None)
+ self.assertEquals(root[0].get('names'), None)
+ self.assertEquals(root[0].get('nope'), None)
+
+ def test_pi_pseudo_attributes_attrib(self):
+ XML = self.etree.XML
+ root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
+ self.assertEquals(root[0].target, "mypi")
+ self.assertEquals(root[0].attrib['my'], "1")
+ self.assertEquals(root[0].attrib['test'], " abc ")
+ self.assertEquals(root[0].attrib['quotes'], "' '")
+ self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
+ self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
+ self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
+
def test_deepcopy_pi(self):
# previously caused a crash
ProcessingInstruction = self.etree.ProcessingInstruction
diff --git a/src/lxml/xslt.pxi b/src/lxml/xslt.pxi
index 6cb2b349..1773c552 100644
--- a/src/lxml/xslt.pxi
+++ b/src/lxml/xslt.pxi
@@ -801,20 +801,10 @@ xslt.exsltRegisterAll()
################################################################################
# XSLT PI support
-cdef object _FIND_PI_ATTRIBUTES
-_FIND_PI_ATTRIBUTES = re.compile(ur'\s+(\w+)\s*=\s*["\']([^"\']+)["\']', re.U).findall
-
-cdef object _RE_PI_HREF
-_RE_PI_HREF = re.compile(ur'\s+href\s*=\s*["\']([^"\']+)["\']')
-
-cdef object _FIND_PI_HREF
-_FIND_PI_HREF = _RE_PI_HREF.findall
-
-cdef object _REPLACE_PI_HREF
-_REPLACE_PI_HREF = _RE_PI_HREF.sub
-
-cdef XPath __findStylesheetByID
-__findStylesheetByID = None
+cdef object _RE_PI_HREF = re.compile(ur'\s+href\s*=\s*(?:\'([^\']*)\'|"([^"]*)")')
+cdef object _FIND_PI_HREF = _RE_PI_HREF.findall
+cdef object _REPLACE_PI_HREF = _RE_PI_HREF.sub
+cdef XPath __findStylesheetByID = None
cdef _findStylesheetByID(_Document doc, id):
global __findStylesheetByID
@@ -843,10 +833,11 @@ cdef class _XSLTProcessingInstruction(PIBase):
_assertValidNode(self)
if self._c_node.content is NULL:
raise ValueError, u"PI lacks content"
- hrefs = _FIND_PI_HREF(u' ' + funicode(self._c_node.content))
+ hrefs = _FIND_PI_HREF(u' ' + self._c_node.content.decode('UTF-8'))
if len(hrefs) != 1:
raise ValueError, u"malformed PI attributes"
- href_utf = utf8(hrefs[0])
+ hrefs = hrefs[0]
+ href_utf = utf8(hrefs[0] or hrefs[1])
c_href = _cstr(href_utf)
if c_href[0] != c'#':
@@ -881,7 +872,8 @@ cdef class _XSLTProcessingInstruction(PIBase):
def set(self, key, value):
u"""set(self, key, value)
- Sets a pseudo attribute in the text of the processing instruction.
+ Supports setting the 'href' pseudo-attribute in the text of
+ the processing instruction.
"""
if key != u"href":
raise AttributeError, \
@@ -897,13 +889,3 @@ cdef class _XSLTProcessingInstruction(PIBase):
self.text = _REPLACE_PI_HREF(attrib, text)
else:
self.text = text + attrib
-
- def get(self, key, default=None):
- u"""get(self, key, default=None)
-
- Parses a pseudo attribute from the text of the processing instruction.
- """
- for attr, value in _FIND_PI_ATTRIBUTES(u' ' + self.text):
- if attr == key:
- return value
- return default