diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2015-04-15 23:45:08 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2015-04-15 23:47:01 -0700 |
commit | 3074a9fad1c7c57948521125ee947bfa11ae185b (patch) | |
tree | 9f13c9d28a6e54ea12c7096f0d34652ffce15f6e | |
parent | 5161c9ca6a6107da30d411fb2ad72e01d08e5704 (diff) | |
download | emacs-3074a9fad1c7c57948521125ee947bfa11ae185b.tar.gz |
'[:graph:]' now excludes whitespace, not just ' '
* doc/lispref/searching.texi (Char Classes):
* lisp/emacs-lisp/rx.el (rx): Document [:graph:] to be [:print:]
sans whitespace (not sans space).
* src/character.c (graphicp): Exclude all Unicode whitespace chars,
not just space.
* src/regex.c (ISGRAPH): Exclude U+00A0 (NO-BREAK SPACE).
-rw-r--r-- | doc/lispref/searching.texi | 4 | ||||
-rw-r--r-- | lisp/emacs-lisp/rx.el | 4 | ||||
-rw-r--r-- | src/character.c | 25 | ||||
-rw-r--r-- | src/regex.c | 2 |
4 files changed, 21 insertions, 14 deletions
diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index 10ea411d436..5a05c7c729d 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi @@ -558,7 +558,7 @@ This matches any @acronym{ASCII} control character. This matches @samp{0} through @samp{9}. Thus, @samp{[-+[:digit:]]} matches any digit, as well as @samp{+} and @samp{-}. @item [:graph:] -This matches graphic characters---everything except space, +This matches graphic characters---everything except whitespace, @acronym{ASCII} and non-@acronym{ASCII} control characters, surrogates, and codepoints unassigned by Unicode, as indicated by the Unicode @samp{general-category} property (@pxref{Character @@ -572,7 +572,7 @@ This matches any multibyte character (@pxref{Text Representations}). @item [:nonascii:] This matches any non-@acronym{ASCII} character. @item [:print:] -This matches any printing character---either space, or a graphic +This matches any printing character---either whitespace, or a graphic character matched by @samp{[:graph:]}. @item [:punct:] This matches any punctuation character. (At present, for multibyte diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index ab9beb60928..520210614f5 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el @@ -965,12 +965,12 @@ CHAR matches space and tab only. `graphic', `graph' - matches graphic characters--everything except space, ASCII + matches graphic characters--everything except whitespace, ASCII and non-ASCII control characters, surrogates, and codepoints unassigned by Unicode. `printing', `print' - matches space and graphic characters. + matches whitespace and graphic characters. `alphanumeric', `alnum' matches alphabetic characters and digits. (For multibyte characters, diff --git a/src/character.c b/src/character.c index ea98cf68e6c..c143c0f0e3e 100644 --- a/src/character.c +++ b/src/character.c @@ -984,8 +984,7 @@ character is not ASCII nor 8-bit character, an error is signaled. */) #ifdef emacs -/* Return 'true' if C is an alphabetic character as defined by its - Unicode properties. */ +/* Return true if C is an alphabetic character. */ bool alphabeticp (int c) { @@ -1008,8 +1007,7 @@ alphabeticp (int c) || gen_cat == UNICODE_CATEGORY_Nl); } -/* Return 'true' if C is an decimal-number character as defined by its - Unicode properties. */ +/* Return true if C is a decimal-number character. */ bool decimalnump (int c) { @@ -1022,16 +1020,25 @@ decimalnump (int c) return gen_cat == UNICODE_CATEGORY_Nd; } -/* Return 'true' if C is a graphic character as defined by its - Unicode properties. */ +/* Return true if C is a graphic character. */ bool graphicp (int c) { - return c == ' ' || printablep (c); + Lisp_Object category = CHAR_TABLE_REF (Vunicode_category_table, c); + if (! INTEGERP (category)) + return false; + EMACS_INT gen_cat = XINT (category); + + /* See UTS #18. */ + return (!(gen_cat == UNICODE_CATEGORY_Zs /* space separator */ + || gen_cat == UNICODE_CATEGORY_Zl /* line separator */ + || gen_cat == UNICODE_CATEGORY_Zp /* paragraph separator */ + || gen_cat == UNICODE_CATEGORY_Cc /* control */ + || gen_cat == UNICODE_CATEGORY_Cs /* surrogate */ + || gen_cat == UNICODE_CATEGORY_Cn)); /* unassigned */ } -/* Return 'true' if C is a printable character as defined by its - Unicode properties. */ +/* Return true if C is a printable character. */ bool printablep (int c) { diff --git a/src/regex.c b/src/regex.c index 4af70c62cf5..38c5e350541 100644 --- a/src/regex.c +++ b/src/regex.c @@ -313,7 +313,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 }; /* The rest must handle multibyte characters. */ # define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \ - ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \ + ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \ : graphicp (c)) # define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \ |