summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorylavic <ylavic@13f79535-47bb-0310-9956-ffa450edef68>2022-01-15 23:22:33 +0000
committerylavic <ylavic@13f79535-47bb-0310-9956-ffa450edef68>2022-01-15 23:22:33 +0000
commita5e5af946ca447b7dfe6062ea2fb8f9de4159ff5 (patch)
tree397a7a0aab805dd71c2684c71a74c994add3af71
parent17ab5e899488635e9564e6e1f332ea03fba11243 (diff)
downloadlibapr-a5e5af946ca447b7dfe6062ea2fb8f9de4159ff5.tar.gz
apr_cstr: Improve apr_cstr_casecmp() and apr_cstr_casecmpn() performances.
The new versions [1] compile to a shorter/faster assembly than the previous ones [2], no functionnal change. [1] apr_cstr_casecmp() after this commit: Dump of assembler code for function apr_cstr_casecmp: 0x0000000000049fc0 <+0>: movzbl (%rdi),%eax 0x0000000000049fc3 <+3>: movzbl (%rsi),%edx 0x0000000000049fc6 <+6>: lea 0x3d573(%rip),%r8 # 0x87540 <ucharmap> 0x0000000000049fcd <+13>: movzbl (%r8,%rax,1),%eax 0x0000000000049fd2 <+18>: movzbl (%r8,%rdx,1),%ecx 0x0000000000049fd7 <+23>: cmp %ecx,%eax 0x0000000000049fd9 <+25>: jne 0x49ffe <apr_cstr_casecmp+62> 0x0000000000049fdb <+27>: xor %edx,%edx 0x0000000000049fdd <+29>: jmp 0x49ffa <apr_cstr_casecmp+58> 0x0000000000049fdf <+31>: nop 0x0000000000049fe0 <+32>: add $0x1,%rdx 0x0000000000049fe4 <+36>: movzbl (%rdi,%rdx,1),%eax 0x0000000000049fe8 <+40>: movzbl (%rsi,%rdx,1),%ecx 0x0000000000049fec <+44>: movzbl (%r8,%rax,1),%eax 0x0000000000049ff1 <+49>: movzbl (%r8,%rcx,1),%ecx 0x0000000000049ff6 <+54>: cmp %ecx,%eax 0x0000000000049ff8 <+56>: jne 0x49ffe <apr_cstr_casecmp+62> 0x0000000000049ffa <+58>: test %eax,%eax 0x0000000000049ffc <+60>: jne 0x49fe0 <apr_cstr_casecmp+32> 0x0000000000049ffe <+62>: sub %ecx,%eax 0x000000000004a000 <+64>: ret End of assembler dump. [2] apr_cstr_casecmp() before this commit: Dump of assembler code for function apr_cstr_casecmp: 0x000000000004a000 <+0>: movzbl (%rdi),%eax 0x000000000004a003 <+3>: movzbl (%rsi),%edx 0x000000000004a006 <+6>: lea 0x3d533(%rip),%r8 # 0x87540 <ucharmap> 0x000000000004a00d <+13>: mov %rdi,%r9 0x000000000004a010 <+16>: mov %rax,%rcx 0x000000000004a013 <+19>: movswl (%r8,%rdx,2),%edx 0x000000000004a018 <+24>: movswl (%r8,%rax,2),%eax 0x000000000004a01d <+29>: sub %edx,%eax 0x000000000004a01f <+31>: jne 0x4a052 <apr_cstr_casecmp+82> 0x000000000004a021 <+33>: mov $0x1,%edx 0x000000000004a026 <+38>: test %ecx,%ecx 0x000000000004a028 <+40>: je 0x4a052 <apr_cstr_casecmp+82> 0x000000000004a02a <+42>: nopw 0x0(%rax,%rax,1) 0x000000000004a030 <+48>: movzbl (%r9,%rdx,1),%eax 0x000000000004a035 <+53>: movzbl (%rsi,%rdx,1),%ecx 0x000000000004a039 <+57>: add $0x1,%rdx 0x000000000004a03d <+61>: mov %rax,%rdi 0x000000000004a040 <+64>: movswl (%r8,%rcx,2),%ecx 0x000000000004a045 <+69>: movswl (%r8,%rax,2),%eax 0x000000000004a04a <+74>: sub %ecx,%eax 0x000000000004a04c <+76>: jne 0x4a052 <apr_cstr_casecmp+82> 0x000000000004a04e <+78>: test %edi,%edi 0x000000000004a050 <+80>: jne 0x4a030 <apr_cstr_casecmp+48> 0x000000000004a052 <+82>: ret End of assembler dump. Merge r1897102 from trunk. Submitted by: ylavic git-svn-id: https://svn.apache.org/repos/asf/apr/apr/branches/1.8.x@1897103 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--strings/apr_cstr.c42
1 files changed, 16 insertions, 26 deletions
diff --git a/strings/apr_cstr.c b/strings/apr_cstr.c
index 27229a067..c211c46dd 100644
--- a/strings/apr_cstr.c
+++ b/strings/apr_cstr.c
@@ -197,7 +197,7 @@ APR_DECLARE(char *) apr_cstr_join(const apr_array_header_t *strings,
* octets (such as extended latin alphabetics) are never case-folded.
* NOTE: Other than Alpha A-Z/a-z, each code point is unique!
*/
-static const short ucharmap[] = {
+static const unsigned char ucharmap[256] = {
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
@@ -244,7 +244,7 @@ static const short ucharmap[] = {
*
* NOTE: Other than Alpha A-Z/a-z, each code point is unique!
*/
-static const short ucharmap[] = {
+static const unsigned char ucharmap[256] = {
0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F,
0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87,
@@ -282,35 +282,25 @@ static const short ucharmap[] = {
APR_DECLARE(int) apr_cstr_casecmp(const char *s1, const char *s2)
{
- const unsigned char *str1 = (const unsigned char *)s1;
- const unsigned char *str2 = (const unsigned char *)s2;
- for (;;)
- {
- const int c1 = (int)(*str1);
- const int c2 = (int)(*str2);
- const int cmp = ucharmap[c1] - ucharmap[c2];
- /* Not necessary to test for !c2, this is caught by cmp */
- if (cmp || !c1)
- return cmp;
- str1++;
- str2++;
+ apr_size_t i = 0;
+ for (;; ++i) {
+ const int c1 = ucharmap[(unsigned char)s1[i]];
+ const int c2 = ucharmap[(unsigned char)s2[i]];
+ /* Not necessary to test for !c2, this is caught by c1 != c2 */
+ if (c1 != c2 || !c1)
+ return c1 - c2;
}
}
APR_DECLARE(int) apr_cstr_casecmpn(const char *s1, const char *s2, apr_size_t n)
{
- const unsigned char *str1 = (const unsigned char *)s1;
- const unsigned char *str2 = (const unsigned char *)s2;
- while (n--)
- {
- const int c1 = (int)(*str1);
- const int c2 = (int)(*str2);
- const int cmp = ucharmap[c1] - ucharmap[c2];
- /* Not necessary to test for !c2, this is caught by cmp */
- if (cmp || !c1)
- return cmp;
- str1++;
- str2++;
+ apr_size_t i = 0;
+ for (; i < n; ++i) {
+ const int c1 = ucharmap[(unsigned char)s1[i]];
+ const int c2 = ucharmap[(unsigned char)s2[i]];
+ /* Not necessary to test for !c2, this is caught by c1 != c2 */
+ if (c1 != c2 || !c1)
+ return c1 - c2;
}
return 0;
}