; REQUIRES: asserts ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -passes=loop-vectorize -S < %s 2>&1 | FileCheck %s ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-DBG %s ; RUN: opt -mtriple=aarch64-none-linux-gnu -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>%t | FileCheck --check-prefix=CHECK-NO-SVE %s ; RUN: cat %t | FileCheck %s -check-prefix=CHECK-NO-SVE-REMARKS target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" ; These tests validate the behaviour of scalable vectorization factor hints, ; where the following applies: ; ; * If the backend does not support scalable vectors, ignore the hint and let ; the vectorizer pick a VF. ; * If there are no dependencies and assuming the VF is a power of 2 the VF ; should be accepted. This applies to both fixed and scalable VFs. ; * If the dependency is too small to use scalable vectors, change the VF to ; fixed, where existing behavior applies (clamping). ; * If scalable vectorization is feasible given the dependency and the VF is ; valid, accept it. Otherwise, clamp to the max scalable VF. ; test1 ; ; Scalable vectorization unfeasible, clamp VF from (4, scalable) -> (4, fixed). ; ; The pragma applied to this loop implies a scalable vector ; be used for vectorization. For fixed vectors the MaxVF=8, otherwise there ; would be a dependence between vector lanes for vectors greater than 256 bits. ; ; void test1(int *a, int *b, int N) { ; #pragma clang loop vectorize(enable) vectorize_width(4, scalable) ; for (int i=0; i:0:0: Max legal vector width too small, scalable vectorization unfeasible. ; CHECK-DBG: LV: The max safe fixed VF is: 8. ; CHECK-DBG: LV: Selecting VF: 4. ; CHECK-LABEL: @test1 ; CHECK: <4 x i32> define void @test1(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv %0 = load i32, ptr %arrayidx, align 4 %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 8 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0 exit: ret void } !0 = !{!0, !1, !2} !1 = !{!"llvm.loop.vectorize.width", i32 4} !2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} ; test2 ; ; Scalable vectorization unfeasible, clamp VF from (8, scalable) -> (4, fixed). ; ; void test2(int *a, int *b, int N) { ; #pragma clang loop vectorize(enable) vectorize_width(8, scalable) ; for (int i=0; i define void @test2(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv %0 = load i32, ptr %arrayidx, align 4 %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 4 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !3 exit: ret void } !3 = !{!3, !4, !5} !4 = !{!"llvm.loop.vectorize.width", i32 8} !5 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} ; test3 ; ; Scalable vectorization feasible and the VF is valid. ; ; Specifies a vector of , i.e. maximum of 32 x i32 with 2 ; words per 128-bits (unpacked). ; ; void test3(int *a, int *b, int N) { ; #pragma clang loop vectorize(enable) vectorize_width(2, scalable) ; for (int i=0; i define void @test3(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv %0 = load i32, ptr %arrayidx, align 4 %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 32 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !6 exit: ret void } !6 = !{!6, !7, !8} !7 = !{!"llvm.loop.vectorize.width", i32 2} !8 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} ; test4 ; ; Scalable vectorization feasible, but the given VF is unsafe. Should ignore ; the hint and leave it to the vectorizer to pick a more suitable VF. ; ; Specifies a vector of , i.e. maximum of 64 x i32 with 4 ; words per 128-bits (packed). ; ; void test4(int *a, int *b, int N) { ; #pragma clang loop vectorize(enable) vectorize_width(4, scalable) ; for (int i=0; i:0:0: User-specified vectorization factor vscale x 4 is unsafe. Ignoring the hint to let the compiler pick a more suitable value. ; CHECK-DBG: Found feasible scalable VF = vscale x 2 ; CHECK-DBG: LV: Selecting VF: vscale x 2. ; CHECK-LABEL: @test4 ; CHECK: define void @test4(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv %0 = load i32, ptr %arrayidx, align 4 %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 32 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !9 exit: ret void } !9 = !{!9, !10, !11} !10 = !{!"llvm.loop.vectorize.width", i32 4} !11 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} ; test5 ; ; Scalable vectorization feasible and the VF is valid. ; ; Specifies a vector of , i.e. maximum of 64 x i32 with 4 ; words per 128-bits (packed). ; ; void test5(int *a, int *b, int N) { ; #pragma clang loop vectorize(enable) vectorize_width(4, scalable) ; for (int i=0; i define void @test5(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv %0 = load i32, ptr %arrayidx, align 4 %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 128 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !12 exit: ret void } !12 = !{!12, !13, !14} !13 = !{!"llvm.loop.vectorize.width", i32 4} !14 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} ; test6 ; ; Scalable vectorization feasible, but the VF is unsafe. Should ignore ; the hint and leave it to the vectorizer to pick a more suitable VF. ; ; Specifies a vector of , i.e. maximum of 256 x i32. ; ; void test6(int *a, int *b, int N) { ; #pragma clang loop vectorize(enable) vectorize_width(16, scalable) ; for (int i=0; i:0:0: User-specified vectorization factor vscale x 16 is unsafe. Ignoring the hint to let the compiler pick a more suitable value. ; CHECK-DBG: LV: Found feasible scalable VF = vscale x 4 ; CHECK-DBG: Selecting VF: vscale x 4. ; CHECK-LABEL: @test6 ; CHECK: define void @test6(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv %0 = load i32, ptr %arrayidx, align 4 %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 128 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !15 exit: ret void } !15 = !{!15, !16, !17} !16 = !{!"llvm.loop.vectorize.width", i32 16} !17 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} ; CHECK-NO-SVE-REMARKS-LABEL: LV: Checking a loop in 'test_no_sve' ; CHECK-NO-SVE-REMARKS: LV: User VF=vscale x 4 is ignored because scalable vectors are not available. ; CHECK-NO-SVE-REMARKS: remark: :0:0: User-specified vectorization factor vscale x 4 is ignored because the target does not support scalable vectors. The compiler will pick a more suitable value. ; CHECK-NO-SVE-REMARKS: LV: Selecting VF: 4. ; CHECK-NO-SVE-LABEL: @test_no_sve ; CHECK-NO-SVE: <4 x i32> ; CHECK-NO-SVE-NOT: define void @test_no_sve(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv %0 = load i32, ptr %arrayidx, align 4 %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 store i32 %add, ptr %arrayidx, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !18 exit: ret void } !18 = !{!18, !19, !20} !19 = !{!"llvm.loop.vectorize.width", i32 4} !20 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} ; Test the LV falls back to fixed-width vectorization if scalable vectors are ; supported but max vscale is undefined. ; ; CHECK-DBG-LABEL: LV: Checking a loop in 'test_no_max_vscale' ; CHECK-DBG: LV: Scalable vectorization is available ; CHECK-DBG: The max safe fixed VF is: 4. ; CHECK-DBG: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF. ; CHECK-DBG: LV: Selecting VF: 4. ; CHECK-LABEL: @test_no_max_vscale ; CHECK: <4 x i32> define void @test_no_max_vscale(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv %0 = load i32, ptr %arrayidx, align 4 %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 4 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !21 exit: ret void } attributes #0 = { vscale_range(1, 16) } !21 = !{!21, !22, !23} !22 = !{!"llvm.loop.vectorize.width", i32 4} !23 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}