summaryrefslogtreecommitdiff
path: root/src/fns.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@Penguin.CS.UCLA.EDU>2019-06-08 23:31:28 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2019-06-08 23:32:23 -0700
commit5abaea334cf4c0e004fca2b8b272e091eb5b5444 (patch)
tree315f3f1773ffaa11658115c5d9b920713079fd2a /src/fns.c
parent1043cd30acffcc0b61da4a80dcf3f8a5ac459267 (diff)
downloademacs-5abaea334cf4c0e004fca2b8b272e091eb5b5444.tar.gz
Tune base64 decoding
This improves performance of base64-decode-region by about 7.5% on my platform, and gets rid of some macros. * src/fns.c (IS_ASCII, IS_BASE64, IS_BASE64_IGNORABLE) (READ_QUADRUPLET_BYTE): Remove. (base64_value_to_char, base64_char_to_value): Now an array of two arrays. All uses changed. (base64url_value_to_char, base64url_char_to_value): Remove. All uses changed to the other array. (base64_char_to_value): Entries are now of type signed char, not short, since we can assume C99. Use C99 initializers; this is clearer and caters to the (theoretical) possibility of systems that do not use ASCII or do not have 8-bit bytes. Allow any index in the range 0..UCHAR_MAX instead of limiting it to 0..127, so that uses need not check for in-range indexes. Also record padding chars. All uses changed. (base64_decode_1): Always store number of chars in *NCHARS_RETURN, for simplicity. All callers changed. Speed up the byte-fetching.
Diffstat (limited to 'src/fns.c')
-rw-r--r--src/fns.c221
1 files changed, 124 insertions, 97 deletions
diff --git a/src/fns.c b/src/fns.c
index 3dad8f5f5fb..7d5443150d4 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -3186,33 +3186,11 @@ The data read from the system are decoded using `locale-coding-system'. */)
#define MIME_LINE_LENGTH 76
-#define IS_ASCII(Character) \
- ((Character) < 128)
-#define IS_BASE64(Character) \
- (IS_ASCII (Character) && b64_char_to_value[Character] >= 0)
-#define IS_BASE64_IGNORABLE(Character) \
- ((Character) == ' ' || (Character) == '\t' || (Character) == '\n' \
- || (Character) == '\f' || (Character) == '\r')
-
-/* Used by base64_decode_1 to retrieve a non-base64-ignorable
- character or return retval if there are no characters left to
- process. */
-#define READ_QUADRUPLET_BYTE(retval) \
- do \
- { \
- if (i == length) \
- { \
- if (nchars_return) \
- *nchars_return = nchars; \
- return (retval); \
- } \
- c = from[i++]; \
- } \
- while (IS_BASE64_IGNORABLE (c))
-
-/* Table of characters coding the 64 values. */
-static const char base64_value_to_char[64] =
+/* Tables of characters coding the 64 values. */
+static char const base64_value_to_char[2][64] =
{
+ /* base64 */
+ {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', /* 0- 9 */
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', /* 10-19 */
'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', /* 20-29 */
@@ -3220,10 +3198,9 @@ static const char base64_value_to_char[64] =
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', /* 40-49 */
'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', /* 50-59 */
'8', '9', '+', '/' /* 60-63 */
-};
-
-static const char base64url_value_to_char[64] =
-{
+ },
+ /* base64url */
+ {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', /* 0- 9 */
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', /* 10-19 */
'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', /* 20-29 */
@@ -3231,41 +3208,47 @@ static const char base64url_value_to_char[64] =
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', /* 40-49 */
'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', /* 50-59 */
'8', '9', '-', '_' /* 60-63 */
+ }
};
-/* Table of base64 values for first 128 characters. */
-static const short base64_char_to_value[128] =
-{
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20- 29 */
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 30- 39 */
- -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, /* 40- 49 */
- 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, /* 50- 59 */
- -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, /* 60- 69 */
- 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 70- 79 */
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 80- 89 */
- 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, /* 90- 99 */
- 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, /* 100-109 */
- 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, /* 110-119 */
- 49, 50, 51, -1, -1, -1, -1, -1 /* 120-127 */
-};
-
-static const short base64url_char_to_value[128] =
-{
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20- 29 */
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 30- 39 */
- -1, -1, -1, -1, -1, 62, -1, -1, 52, 53, /* 40- 49 */
- 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, /* 50- 59 */
- -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, /* 60- 69 */
- 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 70- 79 */
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 80- 89 */
- 25, -1, -1, -1, -1, 63, -1, 26, 27, 28, /* 90- 99 */
- 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, /* 100-109 */
- 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, /* 110-119 */
- 49, 50, 51, -1, -1, -1, -1, -1 /* 120-127 */
+/* Tables of base64 values for bytes. -1 means ignorable, 0 invalid,
+ positive means 1 + the represented value. */
+static signed char const base64_char_to_value[2][UCHAR_MAX] =
+{
+ /* base64 */
+ {
+ ['\t']= -1, ['\n']= -1, ['\f']= -1, ['\r']= -1, [' '] = -1,
+ ['A'] = 1, ['B'] = 2, ['C'] = 3, ['D'] = 4, ['E'] = 5,
+ ['F'] = 6, ['G'] = 7, ['H'] = 8, ['I'] = 9, ['J'] = 10,
+ ['K'] = 11, ['L'] = 12, ['M'] = 13, ['N'] = 14, ['O'] = 15,
+ ['P'] = 16, ['Q'] = 17, ['R'] = 18, ['S'] = 19, ['T'] = 20,
+ ['U'] = 21, ['V'] = 22, ['W'] = 23, ['X'] = 24, ['Y'] = 25, ['Z'] = 26,
+ ['a'] = 27, ['b'] = 28, ['c'] = 29, ['d'] = 30, ['e'] = 31,
+ ['f'] = 32, ['g'] = 33, ['h'] = 34, ['i'] = 35, ['j'] = 36,
+ ['k'] = 37, ['l'] = 38, ['m'] = 39, ['n'] = 40, ['o'] = 41,
+ ['p'] = 42, ['q'] = 43, ['r'] = 44, ['s'] = 45, ['t'] = 46,
+ ['u'] = 47, ['v'] = 48, ['w'] = 49, ['x'] = 50, ['y'] = 51, ['z'] = 52,
+ ['0'] = 53, ['1'] = 54, ['2'] = 55, ['3'] = 56, ['4'] = 57,
+ ['5'] = 58, ['6'] = 59, ['7'] = 60, ['8'] = 61, ['9'] = 62,
+ ['+'] = 63, ['/'] = 64
+ },
+ /* base64url */
+ {
+ ['\t']= -1, ['\n']= -1, ['\f']= -1, ['\r']= -1, [' '] = -1,
+ ['A'] = 1, ['B'] = 2, ['C'] = 3, ['D'] = 4, ['E'] = 5,
+ ['F'] = 6, ['G'] = 7, ['H'] = 8, ['I'] = 9, ['J'] = 10,
+ ['K'] = 11, ['L'] = 12, ['M'] = 13, ['N'] = 14, ['O'] = 15,
+ ['P'] = 16, ['Q'] = 17, ['R'] = 18, ['S'] = 19, ['T'] = 20,
+ ['U'] = 21, ['V'] = 22, ['W'] = 23, ['X'] = 24, ['Y'] = 25, ['Z'] = 26,
+ ['a'] = 27, ['b'] = 28, ['c'] = 29, ['d'] = 30, ['e'] = 31,
+ ['f'] = 32, ['g'] = 33, ['h'] = 34, ['i'] = 35, ['j'] = 36,
+ ['k'] = 37, ['l'] = 38, ['m'] = 39, ['n'] = 40, ['o'] = 41,
+ ['p'] = 42, ['q'] = 43, ['r'] = 44, ['s'] = 45, ['t'] = 46,
+ ['u'] = 47, ['v'] = 48, ['w'] = 49, ['x'] = 50, ['y'] = 51, ['z'] = 52,
+ ['0'] = 53, ['1'] = 54, ['2'] = 55, ['3'] = 56, ['4'] = 57,
+ ['5'] = 58, ['6'] = 59, ['7'] = 60, ['8'] = 61, ['9'] = 62,
+ ['-'] = 63, ['_'] = 64
+ }
};
/* The following diagram shows the logical steps by which three octets
@@ -3454,7 +3437,7 @@ base64_encode_1 (const char *from, char *to, ptrdiff_t length,
int c;
unsigned int value;
int bytes;
- char const *b64_value_to_char = (base64url) ? base64url_value_to_char : base64_value_to_char;
+ char const *b64_value_to_char = base64_value_to_char[base64url];
while (i < length)
{
@@ -3632,8 +3615,9 @@ the base 64 encoding, as defined in RFC 4648. */)
decoded = SAFE_ALLOCA (length);
/* The decoded result should be unibyte. */
+ ptrdiff_t decoded_chars;
decoded_length = base64_decode_1 (SSDATA (string), decoded, length,
- !NILP (base64url), 0, NULL);
+ !NILP (base64url), 0, &decoded_chars);
if (decoded_length > length)
emacs_abort ();
else if (decoded_length >= 0)
@@ -3650,41 +3634,60 @@ the base 64 encoding, as defined in RFC 4648. */)
/* Base64-decode the data at FROM of LENGTH bytes into TO. If
MULTIBYTE, the decoded result should be in multibyte
- form. If NCHARS_RETURN is not NULL, store the number of produced
- characters in *NCHARS_RETURN. */
+ form. Store the number of produced characters in *NCHARS_RETURN. */
static ptrdiff_t
base64_decode_1 (const char *from, char *to, ptrdiff_t length,
bool base64url,
bool multibyte, ptrdiff_t *nchars_return)
{
- ptrdiff_t i = 0; /* Used inside READ_QUADRUPLET_BYTE */
+ char const *f = from;
+ char const *flim = from + length;
char *e = to;
- unsigned char c;
- unsigned long value;
ptrdiff_t nchars = 0;
- short const *b64_char_to_value = (base64url) ? base64url_char_to_value : base64_char_to_value;
+ signed char const *b64_char_to_value = base64_char_to_value[base64url];
+ unsigned char multibyte_bit = multibyte << 7;
- while (1)
+ while (true)
{
+ unsigned char c;
+ int v1;
+
/* Process first byte of a quadruplet. */
- READ_QUADRUPLET_BYTE (e-to);
+ do
+ {
+ if (f == flim)
+ {
+ *nchars_return = nchars;
+ return e - to;
+ }
+ c = *f++;
+ v1 = b64_char_to_value[c];
+ }
+ while (v1 < 0);
- if (!IS_BASE64 (c))
+ if (v1 == 0)
return -1;
- value = b64_char_to_value[c] << 18;
+ unsigned int value = (v1 - 1) << 18;
/* Process second byte of a quadruplet. */
- READ_QUADRUPLET_BYTE (-1);
+ do
+ {
+ if (f == flim)
+ return -1;
+ c = *f++;
+ v1 = b64_char_to_value[c];
+ }
+ while (v1 < 0);
- if (!IS_BASE64 (c))
+ if (v1 == 0)
return -1;
- value |= b64_char_to_value[c] << 12;
+ value += (v1 - 1) << 12;
- c = (unsigned char) (value >> 16);
- if (multibyte && c >= 128)
+ c = value >> 16 & 0xff;
+ if (c & multibyte_bit)
e += BYTE8_STRING (c, e);
else
*e++ = c;
@@ -3692,26 +3695,41 @@ base64_decode_1 (const char *from, char *to, ptrdiff_t length,
/* Process third byte of a quadruplet. */
- if (!base64url)
- READ_QUADRUPLET_BYTE (-1);
- else
- READ_QUADRUPLET_BYTE (e-to);
+ do
+ {
+ if (f == flim)
+ {
+ if (!base64url)
+ return -1;
+ *nchars_return = nchars;
+ return e - to;
+ }
+ c = *f++;
+ v1 = b64_char_to_value[c];
+ }
+ while (v1 < 0);
if (c == '=')
{
- READ_QUADRUPLET_BYTE (-1);
+ do
+ {
+ if (f == flim)
+ return -1;
+ c = *f++;
+ }
+ while (b64_char_to_value[c] < 0);
if (c != '=')
return -1;
continue;
}
- if (!IS_BASE64 (c))
+ if (v1 == 0)
return -1;
- value |= b64_char_to_value[c] << 6;
+ value += (v1 - 1) << 6;
- c = (unsigned char) (0xff & value >> 8);
- if (multibyte && c >= 128)
+ c = value >> 8 & 0xff;
+ if (c & multibyte_bit)
e += BYTE8_STRING (c, e);
else
*e++ = c;
@@ -3719,20 +3737,29 @@ base64_decode_1 (const char *from, char *to, ptrdiff_t length,
/* Process fourth byte of a quadruplet. */
- if (!base64url)
- READ_QUADRUPLET_BYTE (-1);
- else
- READ_QUADRUPLET_BYTE (e-to);
+ do
+ {
+ if (f == flim)
+ {
+ if (!base64url)
+ return -1;
+ *nchars_return = nchars;
+ return e - to;
+ }
+ c = *f++;
+ v1 = b64_char_to_value[c];
+ }
+ while (v1 < 0);
if (c == '=')
continue;
- if (!IS_BASE64 (c))
+ if (v1 < 0)
return -1;
- value |= b64_char_to_value[c];
+ value += v1 - 1;
- c = (unsigned char) (0xff & value);
- if (multibyte && c >= 128)
+ c = value & 0xff;
+ if (c & multibyte_bit)
e += BYTE8_STRING (c, e);
else
*e++ = c;