summaryrefslogtreecommitdiff
path: root/sosemanuk.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2018-08-23 14:42:29 -0400
committerGitHub <noreply@github.com>2018-08-23 14:42:29 -0400
commitafbd3e60f68ff8d9ae1b90d9c3c4eb576f518dbd (patch)
tree99d43bac5e27abb44453e699ae308407f25fba3e /sosemanuk.cpp
parente054d36dc88d004efb16f6afe1234b4ea94f995c (diff)
downloadcryptopp-git-afbd3e60f68ff8d9ae1b90d9c3c4eb576f518dbd.tar.gz
Fix alignment on Win32 and Solaris Sparc (PR #709)
These fixes were interesting in a morbid sort of way. I thought the FixedSizeAllocatorWithCleanup specializations faithfully reproduced semantics but I was wrong on Win32 and Sparc. Also see Commit e054d36dc88d. It seems there was another requirement or dependency that we missed, but it was not readily apparent. If I am parsing results correctly (which I may not be), it appears the bit twiddling using 8 byte alignment had more influence on alignment than I originally thought based on use of CRYPTOPP_BOOL_ALIGN16 and T_Align16. Or maybe the alignment attributes specified by CRYPTOPP_ALIGN_DATA are not being honored like they should for stack allocations. This check-in avoids some uses of x86 movdqa (aligned) in favor of movdqu (unaligned). The uses were concentrated on memory operands which were 8-byte aligned instead of 16-byte aligned. It is not clear to me how the specializations lost 8-bytes of alignment. The check-in also enlists CRYPTOPP_ASSERT to tell us when there's a problem so we don't need to go hunting for bugs.
Diffstat (limited to 'sosemanuk.cpp')
-rw-r--r--sosemanuk.cpp24
1 files changed, 12 insertions, 12 deletions
diff --git a/sosemanuk.cpp b/sosemanuk.cpp
index 82cf8c92..ba049c3b 100644
--- a/sosemanuk.cpp
+++ b/sosemanuk.cpp
@@ -412,10 +412,10 @@ void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *outpu
AS2( lea WORD_REG(cx), [4*WORD_REG(cx)+WORD_REG(cx)])
AS2( lea WORD_REG(si), [4*WORD_REG(cx)])
AS2( mov SSE2_wordsLeft, WORD_REG(si))
- AS2( movdqa xmm0, [WORD_REG(ax)+0*16]) // copy state to stack to save a register
- AS2( movdqa [SSE2_stateCopy+0*16], xmm0)
- AS2( movdqa xmm0, [WORD_REG(ax)+1*16])
- AS2( movdqa [SSE2_stateCopy+1*16], xmm0)
+ AS2( movdqu xmm0, [WORD_REG(ax)+0*16]) // copy state to stack to save a register
+ AS2( movdqu [SSE2_stateCopy+0*16], xmm0)
+ AS2( movdqu xmm0, [WORD_REG(ax)+1*16])
+ AS2( movdqu [SSE2_stateCopy+1*16], xmm0)
AS2( movq xmm0, QWORD PTR [WORD_REG(ax)+2*16])
AS2( movq QWORD PTR [SSE2_stateCopy+2*16], xmm0)
AS2( psrlq xmm0, 32)
@@ -507,10 +507,10 @@ void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *outpu
AS2( mov WORD_REG(si), SSE2_wordsLeft2)
ASL(1) // second inner loop, 16 words each, 5 iterations
- AS2( movdqa xmm0, [WORD_REG(di)+0*20*4])
- AS2( movdqa xmm2, [WORD_REG(di)+2*20*4])
- AS2( movdqa xmm3, [WORD_REG(di)+3*20*4])
- AS2( movdqa xmm1, [WORD_REG(di)+1*20*4])
+ AS2( movdqu xmm0, [WORD_REG(di)+0*20*4])
+ AS2( movdqu xmm2, [WORD_REG(di)+2*20*4])
+ AS2( movdqu xmm3, [WORD_REG(di)+3*20*4])
+ AS2( movdqu xmm1, [WORD_REG(di)+1*20*4])
// S2
AS2( movdqa xmm4, xmm0)
AS2( pand xmm0, xmm2)
@@ -596,10 +596,10 @@ void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *outpu
ASL(6) // save state
AS2( mov AS_REG_6, SSE2_state)
- AS2( movdqa xmm0, [SSE2_stateCopy+0*16])
- AS2( movdqa [AS_REG_6+0*16], xmm0)
- AS2( movdqa xmm0, [SSE2_stateCopy+1*16])
- AS2( movdqa [AS_REG_6+1*16], xmm0)
+ AS2( movdqu xmm0, [SSE2_stateCopy+0*16])
+ AS2( movdqu [AS_REG_6+0*16], xmm0)
+ AS2( movdqu xmm0, [SSE2_stateCopy+1*16])
+ AS2( movdqu [AS_REG_6+1*16], xmm0)
AS2( movq xmm0, QWORD PTR [SSE2_stateCopy+2*16])
AS2( movq QWORD PTR [AS_REG_6+2*16], xmm0)
AS2( mov [AS_REG_6+10*4], ecx)