From 1bbbfb6b7538e88f979c68aa70d6427ce2e98cc0 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 24 Aug 2018 08:13:23 -0400 Subject: Fix partial specializations for FixedSizeAllocatorWithCleanup (PR #710) Commit afbd3e60f68f effectively treated a symptom and not the underlying problem. The problem was linkers on 32-bit systems ignore CRYPTOPP_ALIGN_DAT(16) passed down by the compiler and align to 8-bytes or less. We have to use Wei's original code in some places. It is not a bad thing, but the bit fiddling is something we would like to contain a little more by depending more on language or platform features. This commit keeps the original changes which improve partial specializations; but fixes 32-bit linker behavior by effectively reverting afbd3e60f68f and e054d36dc88d00. We also add more comments so the next person has understands why things are done they way they are. --- sosemanuk.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'sosemanuk.cpp') diff --git a/sosemanuk.cpp b/sosemanuk.cpp index ba049c3b..82cf8c92 100644 --- a/sosemanuk.cpp +++ b/sosemanuk.cpp @@ -412,10 +412,10 @@ void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *outpu AS2( lea WORD_REG(cx), [4*WORD_REG(cx)+WORD_REG(cx)]) AS2( lea WORD_REG(si), [4*WORD_REG(cx)]) AS2( mov SSE2_wordsLeft, WORD_REG(si)) - AS2( movdqu xmm0, [WORD_REG(ax)+0*16]) // copy state to stack to save a register - AS2( movdqu [SSE2_stateCopy+0*16], xmm0) - AS2( movdqu xmm0, [WORD_REG(ax)+1*16]) - AS2( movdqu [SSE2_stateCopy+1*16], xmm0) + AS2( movdqa xmm0, [WORD_REG(ax)+0*16]) // copy state to stack to save a register + AS2( movdqa [SSE2_stateCopy+0*16], xmm0) + AS2( movdqa xmm0, [WORD_REG(ax)+1*16]) + AS2( movdqa [SSE2_stateCopy+1*16], xmm0) AS2( movq xmm0, QWORD PTR [WORD_REG(ax)+2*16]) AS2( movq QWORD PTR [SSE2_stateCopy+2*16], xmm0) AS2( psrlq xmm0, 32) @@ -507,10 +507,10 @@ void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *outpu AS2( mov WORD_REG(si), SSE2_wordsLeft2) ASL(1) // second inner loop, 16 words each, 5 iterations - AS2( movdqu xmm0, [WORD_REG(di)+0*20*4]) - AS2( movdqu xmm2, [WORD_REG(di)+2*20*4]) - AS2( movdqu xmm3, [WORD_REG(di)+3*20*4]) - AS2( movdqu xmm1, [WORD_REG(di)+1*20*4]) + AS2( movdqa xmm0, [WORD_REG(di)+0*20*4]) + AS2( movdqa xmm2, [WORD_REG(di)+2*20*4]) + AS2( movdqa xmm3, [WORD_REG(di)+3*20*4]) + AS2( movdqa xmm1, [WORD_REG(di)+1*20*4]) // S2 AS2( movdqa xmm4, xmm0) AS2( pand xmm0, xmm2) @@ -596,10 +596,10 @@ void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *outpu ASL(6) // save state AS2( mov AS_REG_6, SSE2_state) - AS2( movdqu xmm0, [SSE2_stateCopy+0*16]) - AS2( movdqu [AS_REG_6+0*16], xmm0) - AS2( movdqu xmm0, [SSE2_stateCopy+1*16]) - AS2( movdqu [AS_REG_6+1*16], xmm0) + AS2( movdqa xmm0, [SSE2_stateCopy+0*16]) + AS2( movdqa [AS_REG_6+0*16], xmm0) + AS2( movdqa xmm0, [SSE2_stateCopy+1*16]) + AS2( movdqa [AS_REG_6+1*16], xmm0) AS2( movq xmm0, QWORD PTR [SSE2_stateCopy+2*16]) AS2( movq QWORD PTR [AS_REG_6+2*16], xmm0) AS2( mov [AS_REG_6+10*4], ecx) -- cgit v1.2.1