diff options
author | Jeffrey Walton <noloader@gmail.com> | 2018-08-24 08:13:23 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-08-24 08:13:23 -0400 |
commit | 1bbbfb6b7538e88f979c68aa70d6427ce2e98cc0 (patch) | |
tree | d96a2f70a7a526e09e42458593089d4429675052 /sosemanuk.cpp | |
parent | 243673c32acee63a7b0b23aeebac9b2ccfd80e7d (diff) | |
download | cryptopp-git-1bbbfb6b7538e88f979c68aa70d6427ce2e98cc0.tar.gz |
Fix partial specializations for FixedSizeAllocatorWithCleanup (PR #710)
Commit afbd3e60f68f effectively treated a symptom and not the underlying problem. The problem was linkers on 32-bit systems ignore CRYPTOPP_ALIGN_DAT(16) passed down by the compiler and align to 8-bytes or less. We have to use Wei's original code in some places. It is not a bad thing, but the bit fiddling is something we would like to contain a little more by depending more on language or platform features.
This commit keeps the original changes which improve partial specializations; but fixes 32-bit linker behavior by effectively reverting afbd3e60f68f and e054d36dc88d00. We also add more comments so the next person has understands why things are done they way they are.
Diffstat (limited to 'sosemanuk.cpp')
-rw-r--r-- | sosemanuk.cpp | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/sosemanuk.cpp b/sosemanuk.cpp index ba049c3b..82cf8c92 100644 --- a/sosemanuk.cpp +++ b/sosemanuk.cpp @@ -412,10 +412,10 @@ void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *outpu AS2( lea WORD_REG(cx), [4*WORD_REG(cx)+WORD_REG(cx)])
AS2( lea WORD_REG(si), [4*WORD_REG(cx)])
AS2( mov SSE2_wordsLeft, WORD_REG(si))
- AS2( movdqu xmm0, [WORD_REG(ax)+0*16]) // copy state to stack to save a register
- AS2( movdqu [SSE2_stateCopy+0*16], xmm0)
- AS2( movdqu xmm0, [WORD_REG(ax)+1*16])
- AS2( movdqu [SSE2_stateCopy+1*16], xmm0)
+ AS2( movdqa xmm0, [WORD_REG(ax)+0*16]) // copy state to stack to save a register
+ AS2( movdqa [SSE2_stateCopy+0*16], xmm0)
+ AS2( movdqa xmm0, [WORD_REG(ax)+1*16])
+ AS2( movdqa [SSE2_stateCopy+1*16], xmm0)
AS2( movq xmm0, QWORD PTR [WORD_REG(ax)+2*16])
AS2( movq QWORD PTR [SSE2_stateCopy+2*16], xmm0)
AS2( psrlq xmm0, 32)
@@ -507,10 +507,10 @@ void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *outpu AS2( mov WORD_REG(si), SSE2_wordsLeft2)
ASL(1) // second inner loop, 16 words each, 5 iterations
- AS2( movdqu xmm0, [WORD_REG(di)+0*20*4])
- AS2( movdqu xmm2, [WORD_REG(di)+2*20*4])
- AS2( movdqu xmm3, [WORD_REG(di)+3*20*4])
- AS2( movdqu xmm1, [WORD_REG(di)+1*20*4])
+ AS2( movdqa xmm0, [WORD_REG(di)+0*20*4])
+ AS2( movdqa xmm2, [WORD_REG(di)+2*20*4])
+ AS2( movdqa xmm3, [WORD_REG(di)+3*20*4])
+ AS2( movdqa xmm1, [WORD_REG(di)+1*20*4])
// S2
AS2( movdqa xmm4, xmm0)
AS2( pand xmm0, xmm2)
@@ -596,10 +596,10 @@ void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *outpu ASL(6) // save state
AS2( mov AS_REG_6, SSE2_state)
- AS2( movdqu xmm0, [SSE2_stateCopy+0*16])
- AS2( movdqu [AS_REG_6+0*16], xmm0)
- AS2( movdqu xmm0, [SSE2_stateCopy+1*16])
- AS2( movdqu [AS_REG_6+1*16], xmm0)
+ AS2( movdqa xmm0, [SSE2_stateCopy+0*16])
+ AS2( movdqa [AS_REG_6+0*16], xmm0)
+ AS2( movdqa xmm0, [SSE2_stateCopy+1*16])
+ AS2( movdqa [AS_REG_6+1*16], xmm0)
AS2( movq xmm0, QWORD PTR [SSE2_stateCopy+2*16])
AS2( movq QWORD PTR [AS_REG_6+2*16], xmm0)
AS2( mov [AS_REG_6+10*4], ecx)
|