summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/character.c17
-rw-r--r--src/character.h1
-rw-r--r--src/regex.c12
3 files changed, 26 insertions, 4 deletions
diff --git a/src/character.c b/src/character.c
index b594af040c1..bc99daf0df0 100644
--- a/src/character.c
+++ b/src/character.c
@@ -1038,6 +1038,23 @@ printablep (int c)
|| gen_cat == UNICODE_CATEGORY_Cn)); /* unassigned */
}
+/* Return true if C is a horizontal whitespace character, as defined
+ by http://www.unicode.org/reports/tr18/tr18-19.html#blank. */
+bool
+blankp (int c)
+{
+ /* Fast path for ASCII characters that are always assumed to
+ constitute horizontal whitespace. */
+ if (c == ' ' || c == '\t')
+ return true;
+
+ Lisp_Object category = CHAR_TABLE_REF (Vunicode_category_table, c);
+ if (! INTEGERP (category))
+ return false;
+
+ return XINT (category) == UNICODE_CATEGORY_Zs; /* separator, space */
+}
+
void
syms_of_character (void)
{
diff --git a/src/character.h b/src/character.h
index fc8a0dd74d2..62d252e91ba 100644
--- a/src/character.h
+++ b/src/character.h
@@ -680,6 +680,7 @@ extern bool alphabeticp (int);
extern bool alphanumericp (int);
extern bool graphicp (int);
extern bool printablep (int);
+extern bool blankp (int);
/* Return a translation table of id number ID. */
#define GET_TRANSLATION_TABLE(id) \
diff --git a/src/regex.c b/src/regex.c
index ae3fde80c9e..7e70c494f47 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -310,11 +310,12 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
|| ((c) >= 'a' && (c) <= 'f') \
|| ((c) >= 'A' && (c) <= 'F'))
-/* This is only used for single-byte characters. */
-# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
-
/* The rest must handle multibyte characters. */
+# define ISBLANK(c) (IS_REAL_ASCII (c) \
+ ? ((c) == ' ' || (c) == '\t') \
+ : blankp (c))
+
# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \
: graphicp (c))
@@ -1790,6 +1791,7 @@ struct range_table_work_area
#define BIT_ALNUM 0x80
#define BIT_GRAPH 0x100
#define BIT_PRINT 0x200
+#define BIT_BLANK 0x400
/* Set the bit for character C in a list. */
@@ -2066,8 +2068,9 @@ re_wctype_to_bit (re_wctype_t cc)
case RECC_SPACE: return BIT_SPACE;
case RECC_GRAPH: return BIT_GRAPH;
case RECC_PRINT: return BIT_PRINT;
+ case RECC_BLANK: return BIT_BLANK;
case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
- case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
+ case RECC_UNIBYTE: case RECC_ERROR: return 0;
default:
abort ();
}
@@ -4658,6 +4661,7 @@ execute_charset (const_re_char **pp, unsigned c, unsigned corig, bool unibyte)
(class_bits & BIT_ALNUM && ISALNUM (c)) ||
(class_bits & BIT_ALPHA && ISALPHA (c)) ||
(class_bits & BIT_SPACE && ISSPACE (c)) ||
+ (class_bits & BIT_BLANK && ISBLANK (c)) ||
(class_bits & BIT_WORD && ISWORD (c)) ||
((class_bits & BIT_UPPER) &&
(ISUPPER (c) || (corig != c &&