summaryrefslogtreecommitdiff
path: root/src/character.h
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2020-04-26 15:18:49 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2020-04-26 19:31:54 -0700
commited2def7d5e423388ca75c6e10fd7b42e0c4789c7 (patch)
treea488de7c0a4729937cfa8fca01093433a609374f /src/character.h
parent895a18eafb84bca68045e552437dbb00a15a9f56 (diff)
downloademacs-ed2def7d5e423388ca75c6e10fd7b42e0c4789c7.tar.gz
Improve string_char_and_length speed
This tweak improved the CPU time performance of ‘make compile-always’ by about 1.7% on my platform. * src/character.c (string_char): Remove; no longer used. * src/character.h (string_char_and_length): Redo so that it needn’t call string_char. This helps the caller, which can now become a leaf function.
Diffstat (limited to 'src/character.h')
-rw-r--r--src/character.h47
1 files changed, 27 insertions, 20 deletions
diff --git a/src/character.h b/src/character.h
index 4887473b27e..d4d77504426 100644
--- a/src/character.h
+++ b/src/character.h
@@ -85,7 +85,6 @@ enum
};
extern int char_string (unsigned, unsigned char *);
-extern int string_char (const unsigned char *, int *);
/* UTF-8 encodings. Use \x escapes, so they are portable to pre-C11
compilers and can be concatenated with ordinary string literals. */
@@ -371,33 +370,41 @@ raw_prev_char_len (unsigned char const *p)
INLINE int
string_char_and_length (unsigned char const *p, int *length)
{
- int c, len;
+ int c = p[0];
+ if (! (c & 0x80))
+ {
+ *length = 1;
+ return c;
+ }
+ eassume (0xC0 <= c);
- if (! (p[0] & 0x80))
+ int d = (c << 6) + p[1] - ((0xC0 << 6) + 0x80);
+ if (! (c & 0x20))
{
- len = 1;
- c = p[0];
+ *length = 2;
+ return d + (c < 0xC2 ? 0x3FFF80 : 0);
}
- else if (! (p[0] & 0x20))
+
+ d = (d << 6) + p[2] - ((0x20 << 12) + 0x80);
+ if (! (c & 0x10))
{
- len = 2;
- c = ((((p[0] & 0x1F) << 6)
- | (p[1] & 0x3F))
- + (p[0] < 0xC2 ? 0x3FFF80 : 0));
+ *length = 3;
+ eassume (MAX_2_BYTE_CHAR < d && d <= MAX_3_BYTE_CHAR);
+ return d;
}
- else if (! (p[0] & 0x10))
+
+ d = (d << 6) + p[3] - ((0x10 << 18) + 0x80);
+ if (! (c & 0x08))
{
- len = 3;
- c = (((p[0] & 0x0F) << 12)
- | ((p[1] & 0x3F) << 6)
- | (p[2] & 0x3F));
+ *length = 4;
+ eassume (MAX_3_BYTE_CHAR < d && d <= MAX_4_BYTE_CHAR);
+ return d;
}
- else
- c = string_char (p, &len);
- eassume (0 < len && len <= MAX_MULTIBYTE_LENGTH);
- *length = len;
- return c;
+ d = (d << 6) + p[4] - ((0x08 << 24) + 0x80);
+ *length = 5;
+ eassume (MAX_4_BYTE_CHAR < d && d <= MAX_5_BYTE_CHAR);
+ return d;
}
/* Return the character code of character whose multibyte form is at P. */