summaryrefslogtreecommitdiff
path: root/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c
blob: a7f2995a6cd1597456ee0f26aecd5ecb563874b5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/* { dg-do assemble } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */

#define MASK_SCATTER_STORE1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
void mscatter_store1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
						   OBJTYPE * restrict src,\
						   MASKTYPE * restrict masks,\
						   STRIDETYPE count)\
{\
  for (STRIDETYPE i=0; i<count; i++)\
    if (masks[i * STRIDE])\
      dst[i * STRIDE] = src[i];\
}

#define MASK_SCATTER_STORE2(OBJTYPE,MASKTYPE,STRIDETYPE)\
void mscatter_store2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
					   OBJTYPE * restrict src,\
					   MASKTYPE * restrict masks,\
					   STRIDETYPE stride,\
					   STRIDETYPE count)\
{\
  for (STRIDETYPE i=0; i<count; i++)\
    if (masks[i * stride])\
      dst[i * stride] = src[i];\
}

#define MASK_SCATTER_STORE3(OBJTYPE,MASKTYPE,STRIDETYPE)\
void mscatter_store3s5##OBJTYPE##STRIDETYPE\
  (OBJTYPE * restrict dst, OBJTYPE * restrict s1, OBJTYPE * restrict s2,\
   OBJTYPE * restrict s3, OBJTYPE * restrict s4, OBJTYPE * restrict s5,\
   MASKTYPE * restrict masks, STRIDETYPE count)\
{\
  const STRIDETYPE STRIDE = 5;\
  for (STRIDETYPE i=0; i<count; i++)\
    if (masks[i * STRIDE])\
      {\
	dst[0 + (i * STRIDE)] = s1[i];\
	dst[1 + (i * STRIDE)] = s2[i];\
	dst[2 + (i * STRIDE)] = s3[i];\
	dst[3 + (i * STRIDE)] = s4[i];\
	dst[4 + (i * STRIDE)] = s5[i];\
      }\
}

#define MASK_SCATTER_STORE4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\
void mscatter_store4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\
						   OBJTYPE * restrict src,\
						   MASKTYPE * restrict masks,\
						   STRIDETYPE count)\
{\
  for (STRIDETYPE i=0; i<count; i++)\
    {\
      if (masks[i * STRIDE])\
	*dst = *src;\
      dst += STRIDE;\
      src += 1;\
    }\
}

#define MASK_SCATTER_STORE5(OBJTYPE,MASKTYPE,STRIDETYPE)\
void mscatter_store5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\
					   OBJTYPE * restrict src,\
					   MASKTYPE * restrict masks,\
					   STRIDETYPE stride,\
					   STRIDETYPE count)\
{\
  for (STRIDETYPE i=0; i<count; i++)\
    {\
      if (masks[i * stride])\
	*dst = *src;\
      dst += stride;\
      src += 1;\
    }\
}

MASK_SCATTER_STORE1 (double, long, long, 5)
MASK_SCATTER_STORE1 (double, long, long, 8)
MASK_SCATTER_STORE1 (double, long, long, 21)
MASK_SCATTER_STORE1 (double, long, long, 1009)

MASK_SCATTER_STORE1 (float, int, int, 5)

MASK_SCATTER_STORE1 (float, int, int, 8)
MASK_SCATTER_STORE1 (float, int, int, 21)
MASK_SCATTER_STORE1 (float, int, int, 1009)

MASK_SCATTER_STORE2 (double, long, long)
MASK_SCATTER_STORE2 (float, int, int)

MASK_SCATTER_STORE3 (double, long, long)
MASK_SCATTER_STORE3 (float, int, int)

MASK_SCATTER_STORE4 (double, long, long, 5)
/* NOTE: We can't vectorize MASK_SCATTER_STORE4 (float, int, int, 3) because we
   can't prove that the offsets used for the gather load won't overflow.  */

MASK_SCATTER_STORE5 (double, long, long)
MASK_SCATTER_STORE5 (float, int, int)

/* Widened forms.  */
MASK_SCATTER_STORE1 (double, long, int, 5)
MASK_SCATTER_STORE1 (double, long, int, 8)
MASK_SCATTER_STORE1 (double, long, short, 5)
MASK_SCATTER_STORE1 (double, long, short, 8)

MASK_SCATTER_STORE1 (float, int, short, 5)
MASK_SCATTER_STORE1 (float, int, short, 8)

MASK_SCATTER_STORE2 (double, long, int)
MASK_SCATTER_STORE2 (float, int, short)

MASK_SCATTER_STORE4 (double, long, int, 5)
MASK_SCATTER_STORE4 (float, int, short, 5)

MASK_SCATTER_STORE5 (double, long, int)

/* Gather loads are for the masks.  */
/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 15 } } */
/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 8 } } */
/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */

/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 19 } } */
/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 12 } } */
/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */