summaryrefslogtreecommitdiff
path: root/Objects/unicodectype.c
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2008-06-11 18:37:52 +0000
committerGeorg Brandl <georg@python.org>2008-06-11 18:37:52 +0000
commit559e5d7f4d1155e95fb6f925c927a263f9196935 (patch)
tree4688423e81e9ffed7a5b2c87c50b55419e8e885a /Objects/unicodectype.c
parentea6d58d9d3033436b52e84960b9571525a4f5412 (diff)
downloadcpython-git-559e5d7f4d1155e95fb6f925c927a263f9196935.tar.gz
#2630: Implement PEP 3138.
The repr() of a string now contains printable Unicode characters unescaped. The new ascii() builtin can be used to get a repr() with only ASCII characters in it. PEP and patch were written by Atsuo Ishimoto.
Diffstat (limited to 'Objects/unicodectype.c')
-rw-r--r--Objects/unicodectype.c21
1 files changed, 21 insertions, 0 deletions
diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c
index 911c53f65c..1a2bb69ea1 100644
--- a/Objects/unicodectype.c
+++ b/Objects/unicodectype.c
@@ -21,6 +21,7 @@
#define UPPER_MASK 0x80
#define XID_START_MASK 0x100
#define XID_CONTINUE_MASK 0x200
+#define NONPRINTABLE_MASK 0x400
typedef struct {
const Py_UNICODE upper;
@@ -675,6 +676,26 @@ int _PyUnicode_IsNumeric(Py_UNICODE ch)
return _PyUnicode_ToNumeric(ch) != -1.0;
}
+/* Returns 1 for Unicode characters to be hex-escaped when repr()ed,
+ 0 otherwise.
+ All characters except those characters defined in the Unicode character
+ database as following categories are considered printable.
+ * Cc (Other, Control)
+ * Cf (Other, Format)
+ * Cs (Other, Surrogate)
+ * Co (Other, Private Use)
+ * Cn (Other, Not Assigned)
+ * Zl Separator, Line ('\u2028', LINE SEPARATOR)
+ * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
+ * Zs (Separator, Space) other than ASCII space('\x20').
+*/
+int _PyUnicode_IsPrintable(Py_UNICODE ch)
+{
+ const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+
+ return (ctype->flags & NONPRINTABLE_MASK) == 0;
+}
+
#ifndef WANT_WCTYPE_FUNCTIONS
/* Returns 1 for Unicode characters having the bidirectional type