summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2015-04-15 23:45:08 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2015-04-15 23:47:01 -0700
commit3074a9fad1c7c57948521125ee947bfa11ae185b (patch)
tree9f13c9d28a6e54ea12c7096f0d34652ffce15f6e
parent5161c9ca6a6107da30d411fb2ad72e01d08e5704 (diff)
downloademacs-3074a9fad1c7c57948521125ee947bfa11ae185b.tar.gz
'[:graph:]' now excludes whitespace, not just ' '
* doc/lispref/searching.texi (Char Classes): * lisp/emacs-lisp/rx.el (rx): Document [:graph:] to be [:print:] sans whitespace (not sans space). * src/character.c (graphicp): Exclude all Unicode whitespace chars, not just space. * src/regex.c (ISGRAPH): Exclude U+00A0 (NO-BREAK SPACE).
-rw-r--r--doc/lispref/searching.texi4
-rw-r--r--lisp/emacs-lisp/rx.el4
-rw-r--r--src/character.c25
-rw-r--r--src/regex.c2
4 files changed, 21 insertions, 14 deletions
diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi
index 10ea411d436..5a05c7c729d 100644
--- a/doc/lispref/searching.texi
+++ b/doc/lispref/searching.texi
@@ -558,7 +558,7 @@ This matches any @acronym{ASCII} control character.
This matches @samp{0} through @samp{9}. Thus, @samp{[-+[:digit:]]}
matches any digit, as well as @samp{+} and @samp{-}.
@item [:graph:]
-This matches graphic characters---everything except space,
+This matches graphic characters---everything except whitespace,
@acronym{ASCII} and non-@acronym{ASCII} control characters,
surrogates, and codepoints unassigned by Unicode, as indicated by the
Unicode @samp{general-category} property (@pxref{Character
@@ -572,7 +572,7 @@ This matches any multibyte character (@pxref{Text Representations}).
@item [:nonascii:]
This matches any non-@acronym{ASCII} character.
@item [:print:]
-This matches any printing character---either space, or a graphic
+This matches any printing character---either whitespace, or a graphic
character matched by @samp{[:graph:]}.
@item [:punct:]
This matches any punctuation character. (At present, for multibyte
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index ab9beb60928..520210614f5 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -965,12 +965,12 @@ CHAR
matches space and tab only.
`graphic', `graph'
- matches graphic characters--everything except space, ASCII
+ matches graphic characters--everything except whitespace, ASCII
and non-ASCII control characters, surrogates, and codepoints
unassigned by Unicode.
`printing', `print'
- matches space and graphic characters.
+ matches whitespace and graphic characters.
`alphanumeric', `alnum'
matches alphabetic characters and digits. (For multibyte characters,
diff --git a/src/character.c b/src/character.c
index ea98cf68e6c..c143c0f0e3e 100644
--- a/src/character.c
+++ b/src/character.c
@@ -984,8 +984,7 @@ character is not ASCII nor 8-bit character, an error is signaled. */)
#ifdef emacs
-/* Return 'true' if C is an alphabetic character as defined by its
- Unicode properties. */
+/* Return true if C is an alphabetic character. */
bool
alphabeticp (int c)
{
@@ -1008,8 +1007,7 @@ alphabeticp (int c)
|| gen_cat == UNICODE_CATEGORY_Nl);
}
-/* Return 'true' if C is an decimal-number character as defined by its
- Unicode properties. */
+/* Return true if C is a decimal-number character. */
bool
decimalnump (int c)
{
@@ -1022,16 +1020,25 @@ decimalnump (int c)
return gen_cat == UNICODE_CATEGORY_Nd;
}
-/* Return 'true' if C is a graphic character as defined by its
- Unicode properties. */
+/* Return true if C is a graphic character. */
bool
graphicp (int c)
{
- return c == ' ' || printablep (c);
+ Lisp_Object category = CHAR_TABLE_REF (Vunicode_category_table, c);
+ if (! INTEGERP (category))
+ return false;
+ EMACS_INT gen_cat = XINT (category);
+
+ /* See UTS #18. */
+ return (!(gen_cat == UNICODE_CATEGORY_Zs /* space separator */
+ || gen_cat == UNICODE_CATEGORY_Zl /* line separator */
+ || gen_cat == UNICODE_CATEGORY_Zp /* paragraph separator */
+ || gen_cat == UNICODE_CATEGORY_Cc /* control */
+ || gen_cat == UNICODE_CATEGORY_Cs /* surrogate */
+ || gen_cat == UNICODE_CATEGORY_Cn)); /* unassigned */
}
-/* Return 'true' if C is a printable character as defined by its
- Unicode properties. */
+/* Return true if C is a printable character. */
bool
printablep (int c)
{
diff --git a/src/regex.c b/src/regex.c
index 4af70c62cf5..38c5e350541 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -313,7 +313,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
/* The rest must handle multibyte characters. */
# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
- ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
+ ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \
: graphicp (c))
# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \