summaryrefslogtreecommitdiff
path: root/src/lxml/serializer.pxi
diff options
context:
space:
mode:
authorBurak Arslan <burak.arslan@arskom.com.tr>2017-02-06 13:40:24 +0300
committerBurak Arslan <burak.arslan@arskom.com.tr>2017-02-06 13:40:24 +0300
commit83470a34934a231471353df42b5e136509c8e40d (patch)
tree50525fafa1f38d6bc1ca552207b739161bd20cd5 /src/lxml/serializer.pxi
parent0bfc0d9903258fcc9311652d26a22638eaa5ee7e (diff)
parentd64c789ab2bbdf8ccd63fa819e5c23597d4b7bd0 (diff)
downloadpython-lxml-83470a34934a231471353df42b5e136509c8e40d.tar.gz
Merge remote-tracking branch 'lxml/master' into method-ctxmanager
# Conflicts: # src/lxml/serializer.pxi
Diffstat (limited to 'src/lxml/serializer.pxi')
-rw-r--r--src/lxml/serializer.pxi217
1 files changed, 209 insertions, 8 deletions
diff --git a/src/lxml/serializer.pxi b/src/lxml/serializer.pxi
index 4b264a50..4ef53bc9 100644
--- a/src/lxml/serializer.pxi
+++ b/src/lxml/serializer.pxi
@@ -399,6 +399,208 @@ cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
pretty_print, encoding)
c_sibling = c_sibling.next
+
+# copied and adapted from libxml2
+cdef unsigned char *xmlSerializeHexCharRef(unsigned char *out, int val):
+ cdef xmlChar *ptr
+ cdef xmlChar c
+
+ out[0] = '&'
+ out += 1
+
+ out[0] = '#'
+ out += 1
+
+ out[0] = 'x'
+ out += 1
+
+ if (val < 0x10):
+ ptr = out
+ elif (val < 0x100):
+ ptr = out + 1
+ elif (val < 0x1000):
+ ptr = out + 2
+ elif (val < 0x10000):
+ ptr = out + 3
+ elif (val < 0x100000):
+ ptr = out + 4
+ else:
+ ptr = out + 5
+
+ out = ptr + 1
+ while val > 0:
+ c = (val & 0xF)
+
+ if c == 0:
+ ptr[0] = '0'
+ elif c == 1:
+ ptr[0] = '1'
+ elif c == 2:
+ ptr[0] = '2'
+ elif c == 3:
+ ptr[0] = '3'
+ elif c == 4:
+ ptr[0] = '4'
+ elif c == 5:
+ ptr[0] = '5'
+ elif c == 6:
+ ptr[0] = '6'
+ elif c == 7:
+ ptr[0] = '7'
+ elif c == 8:
+ ptr[0] = '8'
+ elif c == 9:
+ ptr[0] = '9'
+ elif c == 0xA:
+ ptr[0] = 'A'
+ elif c == 0xB:
+ ptr[0] = 'B'
+ elif c == 0xC:
+ ptr[0] = 'C'
+ elif c == 0xD:
+ ptr[0] = 'D'
+ elif c == 0xE:
+ ptr[0] = 'E'
+ elif c == 0xF:
+ ptr[0] = 'F'
+ else:
+ ptr[0] = '0'
+
+ ptr -= 1
+
+ val >>= 4
+
+ out[0] = ';'
+ out += 1
+ out[0] = 0
+
+ return out
+
+
+# copied and adapted from libxml2 (xmlBufAttrSerializeTxtContent())
+cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string):
+ cdef const char *base
+ cdef const char *cur
+
+ cdef unsigned char tmp[12]
+ cdef int val = 0
+ cdef int l
+
+ if string == NULL:
+ return
+
+ base = cur = <const char*>string
+ while (cur[0] != 0):
+ if (cur[0] == '\n'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ tree.xmlOutputBufferWrite(buf, 5, "&#10;")
+ cur += 1
+ base = cur
+
+ elif (cur[0] == '\r'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ tree.xmlOutputBufferWrite(buf, 5, "&#13;")
+ cur += 1
+ base = cur
+
+ elif (cur[0] == '\t'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ tree.xmlOutputBufferWrite(buf, 4, "&#9;")
+ cur += 1
+ base = cur
+
+ elif (cur[0] == '"'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ tree.xmlOutputBufferWrite(buf, 6, "&quot;")
+ cur += 1
+ base = cur
+
+ elif (cur[0] == '<'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ tree.xmlOutputBufferWrite(buf, 4, "&lt;")
+ cur += 1
+ base = cur
+
+ elif (cur[0] == '>'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ tree.xmlOutputBufferWrite(buf, 4, "&gt;")
+ cur += 1
+ base = cur
+ elif (cur[0] == '&'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ tree.xmlOutputBufferWrite(buf, 5, "&amp;")
+ cur += 1
+ base = cur
+
+ elif (cur[0] >= 0x80) and (cur[1] != 0):
+
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+ if (cur[0] < 0xC0):
+ # invalid UTF-8 sequence
+ val = cur[0]
+ l = 1
+
+ elif (cur[0] < 0xE0):
+ val = (cur[0]) & 0x1F
+ val <<= 6
+ val |= (cur[1]) & 0x3F
+ l = 2
+
+ elif ((cur[0] < 0xF0) and (cur[2] != 0)):
+ val = (cur[0]) & 0x0F
+ val <<= 6
+ val |= (cur[1]) & 0x3F
+ val <<= 6
+ val |= (cur[2]) & 0x3F
+ l = 3
+
+ elif ((cur[0] < 0xF8) and (cur[2] != 0) and (cur[3] != 0)):
+ val = (cur[0]) & 0x07
+ val <<= 6
+ val |= (cur[1]) & 0x3F
+ val <<= 6
+ val |= (cur[2]) & 0x3F
+ val <<= 6
+ val |= (cur[3]) & 0x3F
+ l = 4
+ else:
+ # invalid UTF-8 sequence
+ val = cur[0]
+ l = 1
+
+ if ((l == 1) or (not tree.xmlIsCharQ(val))):
+ raise ValueError("Invalid character: %X" % val)
+
+ # We could do multiple things here. Just save
+ # as a char ref
+ xmlSerializeHexCharRef(tmp, val)
+ tree.xmlOutputBufferWrite(buf, -1, <const char*> tmp)
+ cur += l
+ base = cur
+
+ else:
+ cur += 1
+
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
+
+
############################################################
# output to file-like objects
@@ -897,7 +1099,8 @@ cdef class _IncrementalFileWriter:
tree.xmlOutputBufferWrite(self._c_out, 1, ' ')
self._write_qname(name, prefix)
tree.xmlOutputBufferWrite(self._c_out, 2, '="')
- tree.xmlOutputBufferWriteEscape(self._c_out, _xcstr(value), NULL)
+ _write_attr_string(self._c_out, _cstr(value))
+
tree.xmlOutputBufferWrite(self._c_out, 1, '"')
cdef _write_end_element(self, element_config):
@@ -969,14 +1172,12 @@ cdef class _IncrementalFileWriter:
raise LxmlSyntaxError("not in an element")
content = _utf8(content)
- if len(self._element_stack) > 0:
- ns, name, _, _ = self._element_stack[-1]
- else:
- ns, name = None, None
+ ns, name, _, _ = self._element_stack[-1]
+ if (c_method == OUTPUT_METHOD_HTML and
+ ns in (None, b'http://www.w3.org/1999/xhtml') and
+ name in (b'script', b'style')):
+ tree.xmlOutputBufferWrite(self._c_out, len(content), _cstr(content))
- if c_method == OUTPUT_METHOD_HTML and \
- ns in (None, 'http://www.w3.org/1999/xhtml') and name in ('script', 'style'):
- tree.xmlOutputBufferWrite(self._c_out, len(content), content)
else:
tree.xmlOutputBufferWriteEscape(self._c_out, _xcstr(content), NULL)