summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2012-12-21 22:48:02 +0100
committerStefan Behnel <stefan_ml@behnel.de>2012-12-21 22:48:02 +0100
commitc37f36f2843f0f9c1ef938920b47978d6e28ed4e (patch)
treeafea03f1df470ebb8cda16fc7728d4358e06101e /src
parent4c6a7f4eba4208cbedc653e3277ebd7dc79b9614 (diff)
downloadpython-lxml-c37f36f2843f0f9c1ef938920b47978d6e28ed4e.tar.gz
add compile time option for Py2 that makes the API always return Unicode strings for names and text instead of byte strings for pure ASCII
Diffstat (limited to 'src')
-rw-r--r--src/lxml/apihelpers.pxi6
-rw-r--r--src/lxml/includes/etree_defs.h9
-rw-r--r--src/lxml/python.pxd1
3 files changed, 13 insertions, 3 deletions
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
index 67f5ff48..18ebd134 100644
--- a/src/lxml/apihelpers.pxi
+++ b/src/lxml/apihelpers.pxi
@@ -1301,7 +1301,7 @@ cdef object funicode(const_xmlChar* s):
cdef Py_ssize_t slen
cdef const_xmlChar* spos
cdef bint is_non_ascii
- if python.IS_PYTHON3:
+ if python.LXML_UNICODE_STRINGS:
return s.decode('UTF-8')
spos = s
is_non_ascii = 0
@@ -1551,11 +1551,11 @@ cdef inline object _namespacedName(xmlNode* c_node):
cdef object _namespacedNameFromNsName(const_xmlChar* href, const_xmlChar* name):
if href is NULL:
return funicode(name)
- elif python.IS_PYTHON3:
+ elif python.LXML_UNICODE_STRINGS and python.PY_VERSION_HEX >= 0x02060000:
return python.PyUnicode_FromFormat("{%s}%s", href, name)
else:
s = python.PyBytes_FromFormat("{%s}%s", href, name)
- if isutf8(_xcstr(s)):
+ if python.LXML_UNICODE_STRINGS or isutf8(_xcstr(s)):
return python.PyUnicode_FromEncodedObject(s, 'UTF-8', NULL)
else:
return s
diff --git a/src/lxml/includes/etree_defs.h b/src/lxml/includes/etree_defs.h
index 7440e427..8d4d963e 100644
--- a/src/lxml/includes/etree_defs.h
+++ b/src/lxml/includes/etree_defs.h
@@ -32,6 +32,15 @@
# define IS_PYTHON3 0
#endif
+#if IS_PYTHON3
+#undef LXML_UNICODE_STRINGS
+#define LXML_UNICODE_STRINGS 1
+#else
+#ifndef LXML_UNICODE_STRINGS
+#define LXML_UNICODE_STRINGS 0
+#endif
+#endif
+
#if !IS_PYPY
# define PyWeakref_LockObject(obj) (NULL)
#endif
diff --git a/src/lxml/python.pxd b/src/lxml/python.pxd
index 99e19ad9..980f6d0d 100644
--- a/src/lxml/python.pxd
+++ b/src/lxml/python.pxd
@@ -129,5 +129,6 @@ cdef extern from "etree_defs.h": # redefines some functions as macros
cdef bint _isString(object obj)
cdef const_char* _fqtypename(object t)
cdef object PY_NEW(object t)
+ cdef bint LXML_UNICODE_STRINGS
cdef bint IS_PYTHON3
cdef bint IS_PYPY