diff options
| author | Stefan Behnel <scoder@users.berlios.de> | 2011-09-07 00:14:32 +0200 |
|---|---|---|
| committer | Stefan Behnel <scoder@users.berlios.de> | 2011-09-07 00:14:32 +0200 |
| commit | b9bc13fdb63e06b1b7eeb3bd1039064b4ffba9af (patch) | |
| tree | 0ff7ab0ee6463600160004fbbbad2af5cf998280 /src | |
| parent | 65c8a3ff4b039fdd49be98f651849bb01d60a0c5 (diff) | |
| download | python-lxml-b9bc13fdb63e06b1b7eeb3bd1039064b4ffba9af.tar.gz | |
support pi.get() and pi.attrib by parsing the text context for pseudo-attributes
Diffstat (limited to 'src')
| -rw-r--r-- | src/lxml/lxml.etree.pyx | 26 | ||||
| -rw-r--r-- | src/lxml/tests/test_etree.py | 22 | ||||
| -rw-r--r-- | src/lxml/xslt.pxi | 36 |
3 files changed, 57 insertions, 27 deletions
diff --git a/src/lxml/lxml.etree.pyx b/src/lxml/lxml.etree.pyx index de97facf..8f5a1a51 100644 --- a/src/lxml/lxml.etree.pyx +++ b/src/lxml/lxml.etree.pyx @@ -1579,6 +1579,32 @@ cdef class _ProcessingInstruction(__ContentOnlyElement): else: return u"<?%s?>" % self.target + def get(self, key, default=None): + u"""get(self, key, default=None) + + Try to parse pseudo-attributes from the text content of the + processing instruction, search for one with the given key as + name and return its associated value. + + Note that this is only a convenience method for the most + common case that all text content is structured in + attribute-like name-value pairs with properly quoted values. + It is not guaranteed to work for all possible text content. + """ + return self.attrib.get(key, default) + + property attrib: + u"""Returns a dict containing all pseudo-attributes that can be + parsed from the text content of this processing instruction. + Note that modifying the dict currently has no effect on the + XML node, although this is not guaranteed to stay this way. + """ + def __get__(self): + return { attr : (value1 or value2) + for attr, value1, value2 in _FIND_PI_ATTRIBUTES(u' ' + self.text) } + +cdef object _FIND_PI_ATTRIBUTES = re.compile(ur'\s+(\w+)\s*=\s*(?:\'([^\']*)\'|"([^"]*)")', re.U).findall + cdef class _Entity(__ContentOnlyElement): property tag: def __get__(self): diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py index 9a7dd63c..093c0f11 100644 --- a/src/lxml/tests/test_etree.py +++ b/src/lxml/tests/test_etree.py @@ -449,6 +449,28 @@ class ETreeOnlyTestCase(HelperTestCase): self.assertEquals(root[0].target, "mypi") self.assertEquals(root[0].text, "my test ") + def test_pi_pseudo_attributes_get(self): + XML = self.etree.XML + root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>")) + self.assertEquals(root[0].target, "mypi") + self.assertEquals(root[0].get('my'), "1") + self.assertEquals(root[0].get('test'), " abc ") + self.assertEquals(root[0].get('quotes'), "' '") + self.assertEquals(root[0].get('only'), None) + self.assertEquals(root[0].get('names'), None) + self.assertEquals(root[0].get('nope'), None) + + def test_pi_pseudo_attributes_attrib(self): + XML = self.etree.XML + root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>")) + self.assertEquals(root[0].target, "mypi") + self.assertEquals(root[0].attrib['my'], "1") + self.assertEquals(root[0].attrib['test'], " abc ") + self.assertEquals(root[0].attrib['quotes'], "' '") + self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only') + self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names') + self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope') + def test_deepcopy_pi(self): # previously caused a crash ProcessingInstruction = self.etree.ProcessingInstruction diff --git a/src/lxml/xslt.pxi b/src/lxml/xslt.pxi index 6cb2b349..1773c552 100644 --- a/src/lxml/xslt.pxi +++ b/src/lxml/xslt.pxi @@ -801,20 +801,10 @@ xslt.exsltRegisterAll() ################################################################################ # XSLT PI support -cdef object _FIND_PI_ATTRIBUTES -_FIND_PI_ATTRIBUTES = re.compile(ur'\s+(\w+)\s*=\s*["\']([^"\']+)["\']', re.U).findall - -cdef object _RE_PI_HREF -_RE_PI_HREF = re.compile(ur'\s+href\s*=\s*["\']([^"\']+)["\']') - -cdef object _FIND_PI_HREF -_FIND_PI_HREF = _RE_PI_HREF.findall - -cdef object _REPLACE_PI_HREF -_REPLACE_PI_HREF = _RE_PI_HREF.sub - -cdef XPath __findStylesheetByID -__findStylesheetByID = None +cdef object _RE_PI_HREF = re.compile(ur'\s+href\s*=\s*(?:\'([^\']*)\'|"([^"]*)")') +cdef object _FIND_PI_HREF = _RE_PI_HREF.findall +cdef object _REPLACE_PI_HREF = _RE_PI_HREF.sub +cdef XPath __findStylesheetByID = None cdef _findStylesheetByID(_Document doc, id): global __findStylesheetByID @@ -843,10 +833,11 @@ cdef class _XSLTProcessingInstruction(PIBase): _assertValidNode(self) if self._c_node.content is NULL: raise ValueError, u"PI lacks content" - hrefs = _FIND_PI_HREF(u' ' + funicode(self._c_node.content)) + hrefs = _FIND_PI_HREF(u' ' + self._c_node.content.decode('UTF-8')) if len(hrefs) != 1: raise ValueError, u"malformed PI attributes" - href_utf = utf8(hrefs[0]) + hrefs = hrefs[0] + href_utf = utf8(hrefs[0] or hrefs[1]) c_href = _cstr(href_utf) if c_href[0] != c'#': @@ -881,7 +872,8 @@ cdef class _XSLTProcessingInstruction(PIBase): def set(self, key, value): u"""set(self, key, value) - Sets a pseudo attribute in the text of the processing instruction. + Supports setting the 'href' pseudo-attribute in the text of + the processing instruction. """ if key != u"href": raise AttributeError, \ @@ -897,13 +889,3 @@ cdef class _XSLTProcessingInstruction(PIBase): self.text = _REPLACE_PI_HREF(attrib, text) else: self.text = text + attrib - - def get(self, key, default=None): - u"""get(self, key, default=None) - - Parses a pseudo attribute from the text of the processing instruction. - """ - for attr, value in _FIND_PI_ATTRIBUTES(u' ' + self.text): - if attr == key: - return value - return default |
