1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
/* { dg-do assemble } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
#define SCATTER_STORE1(OBJTYPE,STRIDETYPE,STRIDE)\
void scatter_store1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
OBJTYPE * restrict src,\
STRIDETYPE count)\
{\
for (STRIDETYPE i=0; i<count; i++)\
dst[i * STRIDE] = src[i];\
}
#define SCATTER_STORE2(OBJTYPE,STRIDETYPE)\
void scatter_store2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
OBJTYPE * restrict src,\
STRIDETYPE stride,\
STRIDETYPE count)\
{\
for (STRIDETYPE i=0; i<count; i++)\
dst[i * stride] = src[i];\
}
#define SCATTER_STORE3(OBJTYPE,STRIDETYPE)\
void scatter_store3s5##OBJTYPE##STRIDETYPE\
(OBJTYPE * restrict dst, OBJTYPE * restrict s1, OBJTYPE * restrict s2,\
OBJTYPE * restrict s3, OBJTYPE * restrict s4, OBJTYPE * restrict s5,\
STRIDETYPE count)\
{\
const STRIDETYPE STRIDE = 5;\
for (STRIDETYPE i=0; i<count; i++)\
{\
dst[0 + (i * STRIDE)] = s1[i];\
dst[4 + (i * STRIDE)] = s5[i];\
dst[1 + (i * STRIDE)] = s2[i];\
dst[2 + (i * STRIDE)] = s3[i];\
dst[3 + (i * STRIDE)] = s4[i];\
}\
}
#define SCATTER_STORE4(OBJTYPE,STRIDETYPE,STRIDE)\
void scatter_store4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
OBJTYPE * restrict src,\
STRIDETYPE count)\
{\
for (STRIDETYPE i=0; i<count; i++)\
{\
*dst = *src;\
dst += STRIDE;\
src += 1;\
}\
}
#define SCATTER_STORE5(OBJTYPE,STRIDETYPE)\
void scatter_store5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
OBJTYPE * restrict src,\
STRIDETYPE stride,\
STRIDETYPE count)\
{\
for (STRIDETYPE i=0; i<count; i++)\
{\
*dst = *src;\
dst += stride;\
src += 1;\
}\
}
SCATTER_STORE1 (double, long, 5)
SCATTER_STORE1 (double, long, 8)
SCATTER_STORE1 (double, long, 21)
SCATTER_STORE1 (double, long, 1009)
SCATTER_STORE1 (float, int, 5)
SCATTER_STORE1 (float, int, 8)
SCATTER_STORE1 (float, int, 21)
SCATTER_STORE1 (float, int, 1009)
SCATTER_STORE2 (double, long)
SCATTER_STORE2 (float, int)
SCATTER_STORE3 (double, long)
SCATTER_STORE3 (float, int)
SCATTER_STORE4 (double, long, 5)
/* NOTE: We can't vectorize SCATTER_STORE4 (float, int, 5) because we can't
prove that the offsets used for the gather load won't overflow. */
SCATTER_STORE5 (double, long)
SCATTER_STORE5 (float, int)
/* Widened forms. */
SCATTER_STORE1 (double, int, 5)
SCATTER_STORE1 (double, int, 8)
SCATTER_STORE1 (double, short, 5)
SCATTER_STORE1 (double, short, 8)
SCATTER_STORE1 (float, short, 5)
SCATTER_STORE1 (float, short, 8)
SCATTER_STORE2 (double, int)
SCATTER_STORE2 (float, short)
SCATTER_STORE4 (double, int, 5)
SCATTER_STORE4 (float, short, 5)
SCATTER_STORE5 (double, int)
/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 19 } } */
/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 12 } } */
/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */
|