summaryrefslogtreecommitdiff
path: root/crypto
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2007-05-14 21:35:25 +0000
committerAndy Polyakov <appro@openssl.org>2007-05-14 21:35:25 +0000
commitb2dba9bf1f8f73376b9c1f0904a86996c728b236 (patch)
tree8848a4f5efdf02d841b1ebea4969d879d6e61eb5 /crypto
parent932cc129ee61f5b72636eee6a7c3268e23967f7b (diff)
downloadopenssl-new-b2dba9bf1f8f73376b9c1f0904a86996c728b236.tar.gz
Profiling revealed that OPENSSL_cleanse consumes *more* CPU time than
sha1_block_data_order when hashing short messages. Move OPENSSL_cleanse to "cpuid" assembler module and gain 2x.
Diffstat (limited to 'crypto')
-rw-r--r--crypto/Makefile2
-rw-r--r--crypto/ia64cpuid.S36
-rw-r--r--crypto/mem.c10
-rw-r--r--crypto/sparccpuid.S48
-rw-r--r--crypto/x86_64cpuid.pl32
-rw-r--r--crypto/x86cpuid.pl31
6 files changed, 156 insertions, 3 deletions
diff --git a/crypto/Makefile b/crypto/Makefile
index efe6a79d87..1b8c7c2591 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -34,7 +34,7 @@ GENERAL=Makefile README crypto-lib.com install.com
LIB= $(TOP)/libcrypto.a
SHARED_LIB= libcrypto$(SHLIB_EXT)
LIBSRC= cryptlib.c mem.c mem_clr.c mem_dbg.c cversion.c ex_data.c cpt_err.c ebcdic.c uid.c o_time.c o_str.c o_dir.c
-LIBOBJ= cryptlib.o mem.o mem_clr.o mem_dbg.o cversion.o ex_data.o cpt_err.o ebcdic.o uid.o o_time.o o_str.o o_dir.o $(CPUID_OBJ)
+LIBOBJ= cryptlib.o mem.o mem_dbg.o cversion.o ex_data.o cpt_err.o ebcdic.o uid.o o_time.o o_str.o o_dir.o $(CPUID_OBJ)
SRC= $(LIBSRC)
diff --git a/crypto/ia64cpuid.S b/crypto/ia64cpuid.S
index 5836565abb..818e2d1e1d 100644
--- a/crypto/ia64cpuid.S
+++ b/crypto/ia64cpuid.S
@@ -1,11 +1,13 @@
// Works on all IA-64 platforms: Linux, HP-UX, Win64i...
// On Win64i compile with ias.exe.
.text
+
.global OPENSSL_cpuid_setup#
.proc OPENSSL_cpuid_setup#
OPENSSL_cpuid_setup:
{ .mib; br.ret.sptk.many b0 };;
.endp OPENSSL_cpuid_setup#
+
.global OPENSSL_rdtsc#
.proc OPENSSL_rdtsc#
OPENSSL_rdtsc:
@@ -124,3 +126,37 @@ OPENSSL_wipe_cpu:
mov ar.lc=r3
br.ret.sptk b0 };;
.endp OPENSSL_wipe_cpu#
+
+.global OPENSSL_cleanse#
+.proc OPENSSL_cleanse#
+OPENSSL_cleanse:
+{ .mib; and r2=7,r32
+ cmp.leu p6,p0=15,r33 // len>=15
+(p6) br.cond.dptk .Lot };;
+
+.Little:
+{ .mib; st1 [r32]=r0,1
+ cmp.ltu p6,p7=1,r33 } // len>1
+{ .mbb; add r33=-1,r33 // len--
+(p6) br.cond.dptk .Little
+(p7) br.ret.sptk.many b0 };;
+
+.Lot:
+{ .mib; cmp.eq p6,p0=0,r2
+(p6) br.cond.dptk .Laligned };;
+{ .mmi; st1 [r32]=r0,1;;
+ and r2=7,r32 }
+{ .mib; add r33=-1,r33
+ br .Lot };;
+
+.Laligned:
+{ .mmi; st8 [r32]=r0,8
+ and r2=-8,r33 // len&~7
+ add r33=-8,r33 };; // len-=8
+{ .mib; cmp.ltu p6,p0=8,r2 // ((len+8)&~7)>8
+(p6) br.cond.dptk .Laligned };;
+
+{ .mbb; cmp.eq p6,p7=r0,r33
+(p7) br.cond.dpnt .Little
+(p6) br.ret.sptk.many b0 };;
+.endp OPENSSL_cleanse#
diff --git a/crypto/mem.c b/crypto/mem.c
index 6635167228..43d48ab425 100644
--- a/crypto/mem.c
+++ b/crypto/mem.c
@@ -250,7 +250,6 @@ void CRYPTO_get_mem_debug_functions(void (**m)(void *,int,const char *,int,int),
void *CRYPTO_malloc_locked(int num, const char *file, int line)
{
void *ret = NULL;
- extern unsigned char cleanse_ctr;
if (num <= 0) return NULL;
@@ -267,11 +266,15 @@ void *CRYPTO_malloc_locked(int num, const char *file, int line)
if (malloc_debug_func != NULL)
malloc_debug_func(ret, num, file, line, 1);
+#ifndef OPENSSL_CPUID_OBJ
/* Create a dependency on the value of 'cleanse_ctr' so our memory
* sanitisation function can't be optimised out. NB: We only do
* this for >2Kb so the overhead doesn't bother us. */
if(ret && (num > 2048))
+ { extern unsigned char cleanse_ctr;
((unsigned char *)ret)[0] = cleanse_ctr;
+ }
+#endif
return ret;
}
@@ -291,7 +294,6 @@ void CRYPTO_free_locked(void *str)
void *CRYPTO_malloc(int num, const char *file, int line)
{
void *ret = NULL;
- extern unsigned char cleanse_ctr;
if (num <= 0) return NULL;
@@ -308,11 +310,15 @@ void *CRYPTO_malloc(int num, const char *file, int line)
if (malloc_debug_func != NULL)
malloc_debug_func(ret, num, file, line, 1);
+#ifndef OPENSSL_CPUID_OBJ
/* Create a dependency on the value of 'cleanse_ctr' so our memory
* sanitisation function can't be optimised out. NB: We only do
* this for >2Kb so the overhead doesn't bother us. */
if(ret && (num > 2048))
+ { extern unsigned char cleanse_ctr;
((unsigned char *)ret)[0] = cleanse_ctr;
+ }
+#endif
return ret;
}
diff --git a/crypto/sparccpuid.S b/crypto/sparccpuid.S
index 52308abca6..f691abc57f 100644
--- a/crypto/sparccpuid.S
+++ b/crypto/sparccpuid.S
@@ -232,6 +232,54 @@ _sparcv9_rdtick:
.type _sparcv9_rdtick,#function
.size _sparcv9_rdtick,.-_sparcv9_rdtick
+.global OPENSSL_cleanse
+.align 32
+OPENSSL_cleanse:
+ cmp %o1,6
+ nop
+#ifdef ABI64
+ bgu %xcc,.Lot
+#else
+ bgu .Lot
+#endif
+ nop
+
+.Little:
+ stb %g0,[%o0]
+ subcc %o1,1,%o1
+ bnz .Little
+ add %o0,1,%o0
+ retl
+ nop
+.align 32
+.Lot:
+ andcc %o0,3,%g0
+ bz .Laligned
+ nop
+ stb %g0,[%o0]
+ sub %o1,1,%o1
+ ba .Lot
+ add %o0,1,%o0
+ nop
+.Laligned:
+ st %g0,[%o0]
+ sub %o1,4,%o1
+ andcc %o1,-4,%g0
+#ifdef ABI64
+ bnz %xcc,.Laligned
+#else
+ bnz .Laligned
+#endif
+ add %o0,4,%o0
+
+ cmp %o1,0
+ bne .Little
+ nop
+ retl
+ nop
+.type OPENSSL_cleanse,#function
+.size OPENSSL_cleanse,.-OPENSSL_cleanse
+
.section ".init",#alloc,#execinstr
call OPENSSL_cpuid_setup
nop
diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index bc06e99cfb..2f657ca9d8 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -155,4 +155,36 @@ OPENSSL_ia32_cpuid:
or %rcx,%rax
ret
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
+
+.globl OPENSSL_cleanse
+.type OPENSSL_cleanse,\@function,2
+.align 16
+OPENSSL_cleanse:
+ xor %rax,%rax
+ cmp \$15,%rsi
+ jae .Lot
+.Little:
+ mov %al,(%rdi)
+ sub \$1,%rsi
+ lea 1(%rdi),%rdi
+ jnz .Little
+ ret
+.align 16
+.Lot:
+ test \$7,%rdi
+ jz .Laligned
+ mov %al,(%rdi)
+ lea -1(%rsi),%rsi
+ lea 1(%rdi),%rdi
+ jmp .Lot
+.Laligned:
+ mov %rax,(%rdi)
+ lea -8(%rsi),%rsi
+ test \$-8,%rsi
+ lea 8(%rdi),%rdi
+ jnz .Laligned
+ cmp \$0,%rsi
+ jne .Little
+ ret
+.size OPENSSL_cleanse,.-OPENSSL_cleanse
___
diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl
index 7d924a60b7..13828d5633 100644
--- a/crypto/x86cpuid.pl
+++ b/crypto/x86cpuid.pl
@@ -216,6 +216,37 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
}
&function_end_B("OPENSSL_indirect_call");
+&function_begin_B("OPENSSL_cleanse");
+ &mov ("edx",&wparam(0));
+ &mov ("ecx",&wparam(1));
+ &xor ("eax","eax");
+ &cmp ("ecx",7);
+ &jae (&label("lot"));
+&set_label("little");
+ &mov (&BP(0,"edx"),"al");
+ &sub ("ecx",1);
+ &lea ("edx",&DWP(1,"edx"));
+ &jnz (&label("little"));
+ &ret ();
+
+&set_label("lot",16);
+ &test ("edx",3);
+ &jz (&label("aligned"));
+ &mov (&BP(0,"edx"),"al");
+ &lea ("ecx",&DWP(-1,"ecx"));
+ &lea ("edx",&DWP(1,"edx"));
+ &jmp (&label("lot"));
+&set_label("aligned");
+ &mov (&DWP(0,"edx"),"eax");
+ &lea ("ecx",&DWP(-4,"ecx"));
+ &test ("ecx",-4);
+ &lea ("edx",&DWP(4,"edx"));
+ &jnz (&label("aligned"));
+ &cmp ("ecx",0);
+ &jne (&label("little"));
+ &ret ();
+&function_end_B("OPENSSL_cleanse");
+
&initseg("OPENSSL_cpuid_setup");
&asm_finish();