; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s ; GCN-LABEL: {{^}}float4_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 ; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 ; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 ; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2.0, [[V1]], [[C2]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4.0, [[V2]], [[C3]] ; GCN: store_dword v[{{[0-9:]+}}], [[V3]] define amdgpu_kernel void @float4_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <4 x float> , i32 %sel store float %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}int4_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: s_cmp_lg_u32 [[IDX:s[0-9]+]], 2 ; GCN-DAG: s_cmp_eq_u32 [[IDX]], 1 ; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1, [[C1]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], 2, [[V1]], vcc ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], 4, [[V2]], vcc ; GCN: store_dword v[{{[0-9:]+}}], [[V3]] define amdgpu_kernel void @int4_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <4 x i32> , i32 %sel store i32 %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}double4_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3f847ae1 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x47ae147b ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 2 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0xe147ae14, s{{[0-9]+}} ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x4000147a, s{{[0-9]+}} ; GCN-DAG: s_cmp_eq_u32 s{{[[0-9]+}}, 3 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x40100a3d, s{{[0-9]+}} ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x70a3d70a, s{{[0-9]+}} ; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @double4_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <4 x double> , i32 %sel store double %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}double5_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3f847ae1 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x47ae147b ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 2 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0xe147ae14, s{{[0-9]+}} ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x4000147a, s{{[0-9]+}} ; GCN-DAG: s_cmp_eq_u32 s{{[[0-9]+}}, 3 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x40100a3d, s{{[0-9]+}} ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x70a3d70a, s{{[0-9]+}} ; GCN-DAG: s_cmp_eq_u32 s{{[[0-9]+}}, 4 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, 0x40140a3d, s{{[0-9]+}} ; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @double5_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <5 x double> , i32 %sel store double %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}half4_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x40003c00 ; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x44004200 ; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 4 ; GCN: s_lshr_b64 s[[[RL:[0-9]+]]:{{[0-9]+}}], s[[[SL]]:[[SH]]], [[SEL]] ; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]] ; GCN: store_short v[{{[0-9:]+}}], v[[VRL]] define amdgpu_kernel void @half4_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <4 x half> , i32 %sel store half %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}float2_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 ; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1.0, [[C1]] ; GCN: store_dword v[{{[0-9:]+}}], [[V1]] define amdgpu_kernel void @float2_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <2 x float> , i32 %sel store float %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}double2_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 1 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3f847ae1 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x47ae147b ; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @double2_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <2 x double> , i32 %sel store double %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}half8_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 ; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 ; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 ; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 ; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 ; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 ; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 ; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] ; GCN: store_short v[{{[0-9:]+}}], [[V7]] define amdgpu_kernel void @half8_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <8 x half> , i32 %sel store half %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}short8_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 ; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 ; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 ; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 ; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 ; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 ; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 ; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] ; GCN: store_short v[{{[0-9:]+}}], [[V7]] define amdgpu_kernel void @short8_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <8 x i16> , i32 %sel store i16 %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}float8_extelt: ; GCN-DAG: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-DAG: s_load_dword [[S0:s[0-9]+]], s[0:1], 0x2c ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 ; GCN-DAG: s_waitcnt lgkmcnt(0) ; GCN-DAG: s_mov_b32 m0, [[S0]] ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000 ; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], v{{[0-9]+}} ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; GCN: flat_store_dword v[{{[0-9:]+}}], [[RES]] define amdgpu_kernel void @float8_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <8 x float> , i32 %sel store float %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}double8_extelt: ; GCN-NOT: buffer_ ; GCN-NOT: s_or_b32 ; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] ; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] ; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] ; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] ; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]] define amdgpu_kernel void @double8_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <8 x double> , i32 %sel store double %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}double7_extelt: ; GCN-NOT: buffer_ ; GCN-NOT: s_or_b32 ; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] ; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] ; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] ; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] ; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]] define amdgpu_kernel void @double7_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <7 x double> , i32 %sel store double %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}float16_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: s_mov_b32 m0, ; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000 ; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]] define amdgpu_kernel void @float16_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <16 x float> , i32 %sel store float %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}double15_extelt: ; GCN-NOT: buffer_ ; GCN-NOT: s_or_b32 ; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] ; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] ; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] ; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] ; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]] define amdgpu_kernel void @double15_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <15 x double> , i32 %sel store double %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}double16_extelt: ; GCN-NOT: buffer_ ; GCN-NOT: s_or_b32 ; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] ; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] ; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] ; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] ; GCN: store_dwordx2 v[{{[0-9:]+}}], v[[[RES_LO]]:[[RES_HI]]] define amdgpu_kernel void @double16_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <16 x double> , i32 %sel store double %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}float32_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: s_mov_b32 m0, ; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41880000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41980000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b80000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41c00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41c80000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41d00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41d80000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41e00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41e80000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41f00000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41f80000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x42000000 ; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]] define amdgpu_kernel void @float32_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <32 x float> , i32 %sel store float %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}byte8_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x4030201 ; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0x8070605 ; GCN-DAG: s_lshl_b32 [[SEL:s[0-p]+]], s{{[0-9]+}}, 3 ; GCN: s_lshr_b64 s[[[RL:[0-9]+]]:{{[0-9]+}}], s[[[SL]]:[[SH]]], [[SEL]] ; GCN-DAG: v_mov_b32_e32 v[[VRL:[0-9]+]], s[[RL]] ; GCN: store_byte v[{{[0-9:]+}}], v[[VRL]] define amdgpu_kernel void @byte8_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <8 x i8> , i32 %sel store i8 %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}byte16_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 ; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 2 ; GCN-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 3 ; GCN-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 4 ; GCN-DAG: s_cselect_b64 [[C4:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 5 ; GCN-DAG: s_cselect_b64 [[C5:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 6 ; GCN-DAG: s_cselect_b64 [[C6:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 7 ; GCN-DAG: s_cselect_b64 [[C7:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 8 ; GCN-DAG: s_cselect_b64 [[C8:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 9 ; GCN-DAG: s_cselect_b64 [[C9:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 10 ; GCN-DAG: s_cselect_b64 [[C10:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 11 ; GCN-DAG: s_cselect_b64 [[C11:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 12 ; GCN-DAG: s_cselect_b64 [[C12:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 13 ; GCN-DAG: s_cselect_b64 [[C13:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 14 ; GCN-DAG: s_cselect_b64 [[C14:[^,]+]], -1, 0 ; GCN-DAG: s_cmp_lg_u32 [[IDX]], 15 ; GCN-DAG: s_cselect_b64 [[C15:[^,]+]], -1, 0 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], {{[^,]+}}, {{[^,]+}}, [[C1]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V2:v[0-9]+]], {{[^,]+}}, [[V1]], [[C2]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V3:v[0-9]+]], {{[^,]+}}, [[V2]], [[C3]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V4:v[0-9]+]], {{[^,]+}}, [[V3]], [[C4]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V5:v[0-9]+]], {{[^,]+}}, [[V4]], [[C5]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V6:v[0-9]+]], {{[^,]+}}, [[V5]], [[C6]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V7:v[0-9]+]], {{[^,]+}}, [[V6]], [[C7]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V8:v[0-9]+]], {{[^,]+}}, [[V7]], [[C8]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V9:v[0-9]+]], {{[^,]+}}, [[V8]], [[C8]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V10:v[0-9]+]], {{[^,]+}}, [[V9]], [[C10]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V11:v[0-9]+]], {{[^,]+}}, [[V10]], [[C11]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V12:v[0-9]+]], {{[^,]+}}, [[V11]], [[C12]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V13:v[0-9]+]], {{[^,]+}}, [[V12]], [[C13]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V14:v[0-9]+]], {{[^,]+}}, [[V13]], [[C14]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V15:v[0-9]+]], {{[^,]+}}, [[V14]], [[C15]] ; GCN: store_byte v[{{[0-9:]+}}], [[V15]] define amdgpu_kernel void @byte16_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <16 x i8> , i32 %sel store i8 %ext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}bit4_extelt: ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN-DAG: buffer_store_byte [[ZERO]], ; GCN-DAG: buffer_store_byte [[ONE]], ; GCN-DAG: buffer_store_byte [[ZERO]], ; GCN-DAG: buffer_store_byte [[ONE]], ; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]], ; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[LOAD]] ; GCN: flat_store_dword v[{{[0-9:]+}}], [[RES]] define amdgpu_kernel void @bit4_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <4 x i1> , i32 %sel %zext = zext i1 %ext to i32 store i32 %zext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}bit128_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1 ; GCN: s_cmpk_lg_i32 {{s[0-9]+}}, 0x7f ; GCN: s_cselect_b64 [[CL:[^,]+]], -1, 0 ; GCN: v_cndmask_b32_e{{32|64}} [[VL:v[0-9]+]], 0, [[V1]], [[CL]] ; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[VL]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]] define amdgpu_kernel void @bit128_extelt(ptr addrspace(1) %out, i32 %sel) { entry: %ext = extractelement <128 x i1> , i32 %sel %zext = zext i1 %ext to i32 store i32 %zext, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}float32_extelt_vec: ; GCN-NOT: buffer_ ; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC1:[^,]+]], 1, v0 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 1.0, 2.0, [[CC1]] ; GCN-DAG: v_mov_b32_e32 [[LASTVAL:v[0-9]+]], 0x42000000 ; GCN-DAG: v_cmp_ne_u32_e32 [[LASTCC:[^,]+]], 31, v0 ; GCN-DAG: v_cndmask_b32_e{{32|64}} v0, [[LASTVAL]], v{{[0-9]+}}, [[LASTCC]] define float @float32_extelt_vec(i32 %sel) { entry: %ext = extractelement <32 x float> , i32 %sel ret float %ext } ; GCN-LABEL: {{^}}double16_extelt_vec: ; GCN-NOT: buffer_ ; GCN-DAG: v_mov_b32_e32 [[V1HI:v[0-9]+]], 0x3ff19999 ; GCN-DAG: v_mov_b32_e32 [[V1LO:v[0-9]+]], 0x9999999a ; GCN-DAG: v_mov_b32_e32 [[V2HI:v[0-9]+]], 0x4000cccc ; GCN-DAG: v_mov_b32_e32 [[V2LO:v[0-9]+]], 0xcccccccd ; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC1:[^,]+]], 1, v0 ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[R1HI:v[0-9]+]], [[V1HI]], [[V2HI]], [[CC1]] ; GCN-DAG: v_cndmask_b32_e{{32|64}} [[R1LO:v[0-9]+]], [[V1LO]], [[V2LO]], [[CC1]] define double @double16_extelt_vec(i32 %sel) { entry: %ext = extractelement <16 x double> , i32 %sel ret double %ext }