summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEli Zaretskii <eliz@gnu.org>2006-06-09 18:22:30 +0000
committerEli Zaretskii <eliz@gnu.org>2006-06-09 18:22:30 +0000
commit71b169b8c49d4c2f593b7074e8555f6e479b10f3 (patch)
tree351837ea1b19a7f1b4a68cff90c9596cf983da0d
parenta9ab79a844b232ce7971c6234c86be3cc634a78e (diff)
downloademacs-71b169b8c49d4c2f593b7074e8555f6e479b10f3.tar.gz
(read_escape): Provide a Unicode character escape syntax; \u followed by
exactly four or \U followed by exactly eight hex digits in a comment or string is read as a Unicode character with that code point.
-rw-r--r--src/lread.c49
1 files changed, 49 insertions, 0 deletions
diff --git a/src/lread.c b/src/lread.c
index 31f974d9bc0..a0d4ad825dd 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -1764,6 +1764,9 @@ read_escape (readcharfun, stringp, byterep)
int *byterep;
{
register int c = READCHAR;
+ /* \u allows up to four hex digits, \U up to eight. Default to the
+ behaviour for \u, and change this value in the case that \U is seen. */
+ int unicode_hex_count = 4;
*byterep = 0;
@@ -1928,6 +1931,52 @@ read_escape (readcharfun, stringp, byterep)
return i;
}
+ case 'U':
+ /* Post-Unicode-2.0: Up to eight hex chars. */
+ unicode_hex_count = 8;
+ case 'u':
+
+ /* A Unicode escape. We only permit them in strings and characters,
+ not arbitrarily in the source code, as in some other languages. */
+ {
+ int i = 0;
+ int count = 0;
+ Lisp_Object lisp_char;
+ struct gcpro gcpro1;
+
+ while (++count <= unicode_hex_count)
+ {
+ c = READCHAR;
+ /* isdigit(), isalpha() may be locale-specific, which we don't
+ want. */
+ if (c >= '0' && c <= '9') i = (i << 4) + (c - '0');
+ else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10;
+ else if (c >= 'A' && c <= 'F') i = (i << 4) + (c - 'A') + 10;
+ else
+ {
+ error ("Non-hex digit used for Unicode escape");
+ break;
+ }
+ }
+
+ GCPRO1 (readcharfun);
+ lisp_char = call2(intern("decode-char"), intern("ucs"),
+ make_number(i));
+ UNGCPRO;
+
+ if (EQ(Qnil, lisp_char))
+ {
+ /* This is ugly and horrible and trashes the user's data. */
+ XSETFASTINT (i, MAKE_CHAR (charset_katakana_jisx0201,
+ 34 + 128, 46 + 128));
+ return i;
+ }
+ else
+ {
+ return XFASTINT (lisp_char);
+ }
+ }
+
default:
if (BASE_LEADING_CODE_P (c))
c = read_multibyte (c, readcharfun);