summaryrefslogtreecommitdiff
path: root/sse_simd.h
blob: fe3a033251b72a05a9a41f48535868f7f7504bec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
// sse_simd.h - written and placed in public domain by Jeffrey Walton
//              Helper functions to work with SSE and above. The class file
//              was added after a scan by lgtm.com. We caught some findings
//              that were not problems, but we refactored to squash them.

#ifndef CRYPTOPP_SSE_CRYPTO_H
#define CRYPTOPP_SSE_CRYPTO_H

#include "config.h"

#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
# include <emmintrin.h>
#endif

#if (CRYPTOPP_AVX2_AVAILABLE)
# include <immintrin.h>
#endif

NAMESPACE_BEGIN(CryptoPP)

#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)

template <unsigned int N>
inline __m128i load_m128i(const byte* ptr)
{
    enum { SCALE=sizeof(__m128i)/sizeof(byte) };
    return _mm_loadu_si128(
        const_cast<__m128i*>(  // SunCC workaround
        reinterpret_cast<const __m128i*>(ptr+SCALE*N)));
}

template <unsigned int N>
inline __m128i load_m128i(const word16* ptr)
{
    enum { SCALE=sizeof(__m128i)/sizeof(word16) };
    return _mm_loadu_si128(
        const_cast<__m128i*>(  // SunCC workaround
        reinterpret_cast<const __m128i*>(ptr+SCALE*N)));
}

template <unsigned int N>
inline __m128i load_m128i(const word32* ptr)
{
    enum { SCALE=sizeof(__m128i)/sizeof(word32) };
    return _mm_loadu_si128(
        const_cast<__m128i*>(  // SunCC workaround
        reinterpret_cast<const __m128i*>(ptr+SCALE*N)));
}

template <unsigned int N>
inline __m128i load_m128i(const word64* ptr)
{
    enum { SCALE=sizeof(__m128i)/sizeof(word64) };
    return _mm_loadu_si128(
        const_cast<__m128i*>(  // SunCC workaround
        reinterpret_cast<const __m128i*>(ptr+SCALE*N)));
}

// N specifies the nth 128-bit element
template <unsigned int N, class T>
inline void store_m128i(T* ptr, __m128i val)
{
    enum { SCALE=sizeof(__m128i)/sizeof(T) };
    return _mm_storeu_si128(
        reinterpret_cast<__m128i*>(ptr+SCALE*N), val);
}

#endif

#if (CRYPTOPP_AVX2_AVAILABLE)

template <unsigned int N>
inline __m256i load_m256i(const byte* ptr)
{
    enum { SCALE=sizeof(__m256i)/sizeof(byte) };
    return _mm256_loadu_si256(
        const_cast<__m256i*>(  // SunCC workaround
        reinterpret_cast<const __m256i*>(ptr+SCALE*N)));
}

template <unsigned int N>
inline __m256i load_m256i(const word16* ptr)
{
    enum { SCALE=sizeof(__m256i)/sizeof(word16) };
    return _mm256_loadu_si256(
        const_cast<__m256i*>(  // SunCC workaround
        reinterpret_cast<const __m256i*>(ptr+SCALE*N)));
}

template <unsigned int N>
inline __m256i load_m256i(const word32* ptr)
{
    enum { SCALE=sizeof(__m256i)/sizeof(word32) };
    return _mm256_loadu_si256(
        const_cast<__m256i*>(  // SunCC workaround
        reinterpret_cast<const __m256i*>(ptr+SCALE*N)));
}

template <unsigned int N>
inline __m256i load_m256i(const word64* ptr)
{
    enum { SCALE=sizeof(__m256i)/sizeof(word64) };
    return _mm256_loadu_si256(
        const_cast<__m256i*>(  // SunCC workaround
        reinterpret_cast<const __m256i*>(ptr+SCALE*N)));
}

// N specifies the nth 256-bit element
template <unsigned int N, class T>
inline void store_m256i(T* ptr, __m256i val)
{
    enum { SCALE=sizeof(__m256i)/sizeof(T) };
    return _mm256_storeu_si256(
        reinterpret_cast<__m256i*>(ptr+SCALE*N), val);
}

#endif

NAMESPACE_END

#endif  // CRYPTOPP_SSE_CRYPTO_H