summaryrefslogtreecommitdiff
path: root/src/syntax.c
diff options
context:
space:
mode:
authorMichal Nazarewicz <mina86@mina86.com>2016-07-17 03:09:38 +0200
committerMichal Nazarewicz <mina86@mina86.com>2016-08-02 15:39:10 +0200
commit4538a5e37e8dacde4b3e828d832c4c558a146912 (patch)
tree43a158bf0635a01bf5946730ac439fd0b3b8f606 /src/syntax.c
parente7257061317c604492d20f26f312b9e925aa1860 (diff)
downloademacs-4538a5e37e8dacde4b3e828d832c4c558a146912.tar.gz
Refactor regex character class parsing in [:name:]
re_wctype function is used in three separate places and in all of those places almost exact code extracting the name from [:name:] surrounds it. Furthermore, re_wctype requires a NUL-terminated string, so the name of the character class is copied to a temporary buffer. The code duplication and unnecessary memory copying can be avoided by pushing the responsibility of parsing the whole [:name:] sequence to the function. Furthermore, since now the function has access to the length of the character class name (since it’s doing the parsing), it can take advantage of that information in skipping some string comparisons and using a constant-length memcmp instead of strcmp which needs to take care of NUL bytes. * src/regex.c (re_wctype): Delete function. Replace it with: (re_wctype_parse): New function which parses a whole [:name:] string and returns a RECC_* constant or -1 if the string is not of [:name:] format. (regex_compile): Use re_wctype_parse. * src/syntax.c (skip_chars): Use re_wctype_parse.
Diffstat (limited to 'src/syntax.c')
-rw-r--r--src/syntax.c96
1 files changed, 26 insertions, 70 deletions
diff --git a/src/syntax.c b/src/syntax.c
index f8d987b377c..667de402ec4 100644
--- a/src/syntax.c
+++ b/src/syntax.c
@@ -1691,44 +1691,22 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
/* At first setup fastmap. */
while (i_byte < size_byte)
{
- c = str[i_byte++];
-
- if (handle_iso_classes && c == '['
- && i_byte < size_byte
- && str[i_byte] == ':')
+ if (handle_iso_classes)
{
- const unsigned char *class_beg = str + i_byte + 1;
- const unsigned char *class_end = class_beg;
- const unsigned char *class_limit = str + size_byte - 2;
- /* Leave room for the null. */
- unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
- re_wctype_t cc;
-
- if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
- class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
-
- while (class_end < class_limit
- && *class_end >= 'a' && *class_end <= 'z')
- class_end++;
-
- if (class_end == class_beg
- || *class_end != ':' || class_end[1] != ']')
- goto not_a_class_name;
-
- memcpy (class_name, class_beg, class_end - class_beg);
- class_name[class_end - class_beg] = 0;
-
- cc = re_wctype (class_name);
+ const unsigned char *ch = str + i_byte;
+ re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
if (cc == 0)
error ("Invalid ISO C character class");
-
- iso_classes = Fcons (make_number (cc), iso_classes);
-
- i_byte = class_end + 2 - str;
- continue;
+ if (cc != -1)
+ {
+ iso_classes = Fcons (make_number (cc), iso_classes);
+ i_byte = ch - str;
+ continue;
+ }
}
- not_a_class_name:
+ c = str[i_byte++];
+
if (c == '\\')
{
if (i_byte == size_byte)
@@ -1808,54 +1786,32 @@ skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
while (i_byte < size_byte)
{
int leading_code = str[i_byte];
- c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
- i_byte += len;
- if (handle_iso_classes && c == '['
- && i_byte < size_byte
- && STRING_CHAR (str + i_byte) == ':')
+ if (handle_iso_classes)
{
- const unsigned char *class_beg = str + i_byte + 1;
- const unsigned char *class_end = class_beg;
- const unsigned char *class_limit = str + size_byte - 2;
- /* Leave room for the null. */
- unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
- re_wctype_t cc;
-
- if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
- class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
-
- while (class_end < class_limit
- && *class_end >= 'a' && *class_end <= 'z')
- class_end++;
-
- if (class_end == class_beg
- || *class_end != ':' || class_end[1] != ']')
- goto not_a_class_name_multibyte;
-
- memcpy (class_name, class_beg, class_end - class_beg);
- class_name[class_end - class_beg] = 0;
-
- cc = re_wctype (class_name);
+ const unsigned char *ch = str + i_byte;
+ re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
if (cc == 0)
error ("Invalid ISO C character class");
-
- iso_classes = Fcons (make_number (cc), iso_classes);
-
- i_byte = class_end + 2 - str;
- continue;
+ if (cc != -1)
+ {
+ iso_classes = Fcons (make_number (cc), iso_classes);
+ i_byte = ch - str;
+ continue;
+ }
}
- not_a_class_name_multibyte:
- if (c == '\\')
+ if (leading_code== '\\')
{
- if (i_byte == size_byte)
+ if (++i_byte == size_byte)
break;
leading_code = str[i_byte];
- c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
- i_byte += len;
}
+ c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
+ i_byte += len;
+
+
/* Treat `-' as range character only if another character
follows. */
if (i_byte + 1 < size_byte