diff options
Diffstat (limited to 'test')
24 files changed, 498 insertions, 25 deletions
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll index 74628f0c5c..eb5ad8f0c3 100644 --- a/test/CodeGen/ARM/vmul.ll +++ b/test/CodeGen/ARM/vmul.ll @@ -599,3 +599,27 @@ for.end179: ; preds = %for.cond.loopexit, declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone + +; vmull lowering would create a zext(v4i8 load()) instead of a zextload(v4i8), +; creating an illegal type during legalization and causing an assert. +; PR15970 +define void @no_illegal_types_vmull_sext(<4 x i32> %a) { +entry: + %wide.load283.i = load <4 x i8>* undef, align 1 + %0 = sext <4 x i8> %wide.load283.i to <4 x i32> + %1 = sub nsw <4 x i32> %0, %a + %2 = mul nsw <4 x i32> %1, %1 + %predphi290.v.i = select <4 x i1> undef, <4 x i32> undef, <4 x i32> %2 + store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4 + ret void +} +define void @no_illegal_types_vmull_zext(<4 x i32> %a) { +entry: + %wide.load283.i = load <4 x i8>* undef, align 1 + %0 = zext <4 x i8> %wide.load283.i to <4 x i32> + %1 = sub nsw <4 x i32> %0, %a + %2 = mul nsw <4 x i32> %1, %1 + %predphi290.v.i = select <4 x i1> undef, <4 x i32> undef, <4 x i32> %2 + store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4 + ret void +} diff --git a/test/CodeGen/Generic/annotate.ll b/test/CodeGen/Generic/annotate.ll new file mode 100644 index 0000000000..c617eb0925 --- /dev/null +++ b/test/CodeGen/Generic/annotate.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s + +; PR15253 + +@.str = private unnamed_addr constant [4 x i8] c"sth\00", section "llvm.metadata" +@.str1 = private unnamed_addr constant [4 x i8] c"t.c\00", section "llvm.metadata" + + +define i32 @foo(i32 %a) { +entry: + %0 = call i32 @llvm.annotation.i32(i32 %a, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i32 0, i32 0), i32 2) + ret i32 %0 +} + +declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32) #1 diff --git a/test/CodeGen/Generic/ptr-annotate.ll b/test/CodeGen/Generic/ptr-annotate.ll new file mode 100644 index 0000000000..ac5bd5533e --- /dev/null +++ b/test/CodeGen/Generic/ptr-annotate.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s + +; PR15253 + +%struct.mystruct = type { i32 } + +@.str = private unnamed_addr constant [4 x i8] c"sth\00", section "llvm.metadata" +@.str1 = private unnamed_addr constant [4 x i8] c"t.c\00", section "llvm.metadata" + +define void @foo() { +entry: + %m = alloca i8, align 4 + %0 = call i8* @llvm.ptr.annotation.p0i8(i8* %m, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i32 0, i32 0), i32 2) + store i8 1, i8* %0, align 4 + ret void +} + +declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32) #1 diff --git a/test/CodeGen/NVPTX/generic-to-nvvm.ll b/test/CodeGen/NVPTX/generic-to-nvvm.ll new file mode 100644 index 0000000000..c9cb2f71f4 --- /dev/null +++ b/test/CodeGen/NVPTX/generic-to-nvvm.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +; Ensure global variables in address space 0 are promoted to address space 1 + +; CHECK: .global .align 4 .u32 myglobal = 42; +@myglobal = internal global i32 42, align 4 +; CHECK: .global .align 4 .u32 myconst = 42; +@myconst = internal constant i32 42, align 4 + + +define void @foo(i32* %a, i32* %b) { +; CHECK: cvta.global.u32 + %ld1 = load i32* @myglobal +; CHECK: cvta.global.u32 + %ld2 = load i32* @myconst + store i32 %ld1, i32* %a + store i32 %ld2, i32* %b + ret void +} + + +!nvvm.annotations = !{!0} +!0 = metadata !{void (i32*, i32*)* @foo, metadata !"kernel", i32 1} diff --git a/test/CodeGen/NVPTX/i1-global.ll b/test/CodeGen/NVPTX/i1-global.ll new file mode 100644 index 0000000000..0595325977 --- /dev/null +++ b/test/CodeGen/NVPTX/i1-global.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + + +; CHECK: .visible .global .align 1 .u8 mypred +@mypred = addrspace(1) global i1 true, align 1 + + +define void @foo(i1 %p, i32* %out) { + %ld = load i1 addrspace(1)* @mypred + %val = zext i1 %ld to i32 + store i32 %val, i32* %out + ret void +} + + +!nvvm.annotations = !{!0} +!0 = metadata !{void (i1, i32*)* @foo, metadata !"kernel", i32 1} diff --git a/test/CodeGen/NVPTX/i1-param.ll b/test/CodeGen/NVPTX/i1-param.ll new file mode 100644 index 0000000000..fabd61a25d --- /dev/null +++ b/test/CodeGen/NVPTX/i1-param.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +; Make sure predicate (i1) operands to kernels get expanded out to .u8 + +; CHECK: .entry foo +; CHECK: .param .u8 foo_param_0 +; CHECK: .param .u32 foo_param_1 +define void @foo(i1 %p, i32* %out) { + %val = zext i1 %p to i32 + store i32 %val, i32* %out + ret void +} + + +!nvvm.annotations = !{!0} +!0 = metadata !{void (i1, i32*)* @foo, metadata !"kernel", i32 1} diff --git a/test/CodeGen/NVPTX/intrinsics.ll b/test/CodeGen/NVPTX/intrinsics.ll index 8b0357be87..1676f20643 100644 --- a/test/CodeGen/NVPTX/intrinsics.ll +++ b/test/CodeGen/NVPTX/intrinsics.ll @@ -15,5 +15,12 @@ define ptx_device double @test_fabs(double %d) { ret double %x } +define float @test_nvvm_sqrt(float %a) { + %val = call float @llvm.nvvm.sqrt.f(float %a) + ret float %val +} + + declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) +declare float @llvm.nvvm.sqrt.f(float) diff --git a/test/CodeGen/NVPTX/refl1.ll b/test/CodeGen/NVPTX/refl1.ll new file mode 100644 index 0000000000..5a9dac152e --- /dev/null +++ b/test/CodeGen/NVPTX/refl1.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s + +; Function Attrs: nounwind +; CHECK: .entry foo +define void @foo(float* nocapture %a) #0 { + %val = load float* %a + %tan = tail call fastcc float @__nv_fast_tanf(float %val) + store float %tan, float* %a + ret void +} + +; Function Attrs: nounwind readnone +declare float @llvm.nvvm.sin.approx.ftz.f(float) #1 + +; Function Attrs: nounwind readnone +declare float @llvm.nvvm.cos.approx.ftz.f(float) #1 + +; Function Attrs: nounwind readnone +declare float @llvm.nvvm.div.approx.ftz.f(float, float) #1 + +; Function Attrs: alwaysinline inlinehint nounwind readnone +; CHECK: .func (.param .b32 func_retval0) __nv_fast_tanf +define internal fastcc float @__nv_fast_tanf(float %a) #2 { +entry: + %0 = tail call float @llvm.nvvm.sin.approx.ftz.f(float %a) + %1 = tail call float @llvm.nvvm.cos.approx.ftz.f(float %a) + %2 = tail call float @llvm.nvvm.div.approx.ftz.f(float %0, float %1) + ret float %2 +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { alwaysinline inlinehint nounwind readnone } + +!nvvm.annotations = !{!0} + +!0 = metadata !{void (float*)* @foo, metadata !"kernel", i32 1} diff --git a/test/CodeGen/R600/bfe_uint.ll b/test/CodeGen/R600/bfe_uint.ll new file mode 100644 index 0000000000..92570c3152 --- /dev/null +++ b/test/CodeGen/R600/bfe_uint.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @bfe_def +; CHECK: BFE_UINT +define void @bfe_def(i32 addrspace(1)* %out, i32 %x) { +entry: + %0 = lshr i32 %x, 5 + %1 = and i32 %0, 15 ; 0xf + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; This program could be implemented using a BFE_UINT instruction, however +; since the lshr constant + number of bits in the mask is >= 32, it can also be +; implmented with a LSHR instruction, which is better, because LSHR has less +; operands and requires less constants. + +; CHECK: @bfe_shift +; CHECK-NOT: BFE_UINT +define void @bfe_shift(i32 addrspace(1)* %out, i32 %x) { +entry: + %0 = lshr i32 %x, 16 + %1 = and i32 %0, 65535 ; 0xffff + store i32 %1, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll new file mode 100644 index 0000000000..7278e90398 --- /dev/null +++ b/test/CodeGen/R600/mul.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; mul24 and mad24 are affected +;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = mul <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/r600-encoding.ll b/test/CodeGen/R600/r600-encoding.ll new file mode 100644 index 0000000000..c8040a1b4c --- /dev/null +++ b/test/CodeGen/R600/r600-encoding.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s +; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rs880 | FileCheck --check-prefix=R600-CHECK %s + +; The earliest R600 GPUs have a slightly different encoding than the rest of +; the VLIW4/5 GPUs. + +; EG-CHECK: @test +; EG-CHECK: MUL_IEEE {{[ *TXYZW.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x01,0x[0-9a-f]+,0x[0-9a-f]+}}] + +; R600-CHECK: @test +; R600-CHECK: MUL_IEEE {{[ *TXYZW.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}] + +define void @test() { +entry: + %0 = call float @llvm.R600.load.input(i32 0) + %1 = call float @llvm.R600.load.input(i32 1) + %2 = fmul float %0, %1 + call void @llvm.AMDGPU.store.output(float %2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll new file mode 100644 index 0000000000..972542d346 --- /dev/null +++ b/test/CodeGen/R600/sra.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @ashr_v4i32 +; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) { + %result = ashr <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll new file mode 100644 index 0000000000..12bfba3975 --- /dev/null +++ b/test/CodeGen/R600/sub.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: SUB_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = sub <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll new file mode 100644 index 0000000000..6e459df847 --- /dev/null +++ b/test/CodeGen/R600/vselect.ll @@ -0,0 +1,17 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @test_select_v4i32 +; CHECK: CNDE_INT T{{[0-9]+\.[XYZW], PV\.[xyzw], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], PV\.[xyzw], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) { +entry: + %0 = load <4 x i32> addrspace(1)* %in0 + %1 = load <4 x i32> addrspace(1)* %in1 + %cmp = icmp ne <4 x i32> %0, %1 + %result = select <4 x i1> %cmp, <4 x i32> %0, <4 x i32> %1 + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/X86/compact-unwind.ll b/test/CodeGen/X86/compact-unwind.ll new file mode 100644 index 0000000000..8c4fa27da5 --- /dev/null +++ b/test/CodeGen/X86/compact-unwind.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -disable-cfi -disable-fp-elim -mtriple x86_64-apple-darwin11 | FileCheck %s + +%ty = type { i8* } + +@gv = external global i32 + +; This is aligning the stack with a push of a random register. +; CHECK: pushq %rax + +; Even though we can't encode %rax into the compact unwind, We still want to be +; able to generate a compact unwind encoding in this particular case. +; +; CHECK: __LD,__compact_unwind +; CHECK: _foo ## Range Start +; CHECK: 16842753 ## Compact Unwind Encoding: 0x1010001 + +define i8* @foo(i64 %size) { + %addr = alloca i64, align 8 + %tmp20 = load i32* @gv, align 4 + %tmp21 = call i32 @bar() + %tmp25 = load i64* %addr, align 8 + %tmp26 = inttoptr i64 %tmp25 to %ty* + %tmp29 = getelementptr inbounds %ty* %tmp26, i64 0, i32 0 + %tmp34 = load i8** %tmp29, align 8 + %tmp35 = getelementptr inbounds i8* %tmp34, i64 %size + store i8* %tmp35, i8** %tmp29, align 8 + ret i8* null +} + +declare i32 @bar() diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll index 85d8b2cea3..fd5c234bb1 100644 --- a/test/CodeGen/X86/vec_compare.ll +++ b/test/CodeGen/X86/vec_compare.ll @@ -67,7 +67,15 @@ define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind { } define <2 x i64> @test7(<2 x i64> %A, <2 x i64> %B) nounwind { +; CHECK: [[CONSTSEG:[A-Z0-9_]*]]: +; CHECK: .long 2147483648 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 2147483648 +; CHECK-NEXT: .long 0 ; CHECK: test7: +; CHECK: movdqa [[CONSTSEG]], [[CONSTREG:%xmm[0-9]*]] +; CHECK: pxor [[CONSTREG]] +; CHECK: pxor [[CONSTREG]] ; CHECK: pcmpgtd %xmm1 ; CHECK: pshufd $-96 ; CHECK: pcmpeqd @@ -83,6 +91,8 @@ define <2 x i64> @test7(<2 x i64> %A, <2 x i64> %B) nounwind { define <2 x i64> @test8(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK: test8: +; CHECK: pxor +; CHECK: pxor ; CHECK: pcmpgtd %xmm0 ; CHECK: pshufd $-96 ; CHECK: pcmpeqd @@ -98,6 +108,8 @@ define <2 x i64> @test8(<2 x i64> %A, <2 x i64> %B) nounwind { define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK: test9: +; CHECK: pxor +; CHECK: pxor ; CHECK: pcmpgtd %xmm0 ; CHECK: pshufd $-96 ; CHECK: pcmpeqd @@ -115,6 +127,8 @@ define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind { define <2 x i64> @test10(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK: test10: +; CHECK: pxor +; CHECK: pxor ; CHECK: pcmpgtd %xmm1 ; CHECK: pshufd $-96 ; CHECK: pcmpeqd @@ -131,9 +145,15 @@ define <2 x i64> @test10(<2 x i64> %A, <2 x i64> %B) nounwind { } define <2 x i64> @test11(<2 x i64> %A, <2 x i64> %B) nounwind { +; CHECK: [[CONSTSEG:[A-Z0-9_]*]]: +; CHECK: .long 2147483648 +; CHECK-NEXT: .long 2147483648 +; CHECK-NEXT: .long 2147483648 +; CHECK-NEXT: .long 2147483648 ; CHECK: test11: -; CHECK: pxor -; CHECK: pxor +; CHECK: movdqa [[CONSTSEG]], [[CONSTREG:%xmm[0-9]*]] +; CHECK: pxor [[CONSTREG]] +; CHECK: pxor [[CONSTREG]] ; CHECK: pcmpgtd %xmm1 ; CHECK: pshufd $-96 ; CHECK: pcmpeqd diff --git a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll index e7af892c10..620478a879 100644 --- a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll +++ b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll @@ -1,5 +1,6 @@ ; RUN: llc -O0 %s -mtriple=x86_64-apple-darwin -filetype=obj -o %t ; RUN: llvm-dwarfdump %t | FileCheck %s +; RUN: llc < %s -O0 -mtriple=x86_64-apple-macosx10.7 | FileCheck %s -check-prefix=ASM ; rdar://13067005 ; CHECK: .debug_info contents: @@ -20,6 +21,11 @@ ; CHECK: file_names[ 1] 0 0x00000000 0x00000000 simple2.c ; CHECK-NOT: file_names +; PR15408 +; ASM: L__DWARF__debug_info_begin0: +; ASM: .long 0 ## DW_AT_stmt_list +; ASM: L__DWARF__debug_info_begin1: +; ASM: .long 0 ## DW_AT_stmt_list define i32 @test(i32 %a) nounwind uwtable ssp { entry: %a.addr = alloca i32, align 4 diff --git a/test/ExecutionEngine/MCJIT/eh.ll b/test/ExecutionEngine/MCJIT/eh.ll index 0c19b1bf2e..c2135736ad 100644 --- a/test/ExecutionEngine/MCJIT/eh.ll +++ b/test/ExecutionEngine/MCJIT/eh.ll @@ -1,5 +1,5 @@ ; RUN: %lli_mcjit %s -; XFAIL: arm, cygwin +; XFAIL: arm, cygwin, win32, mingw declare i8* @__cxa_allocate_exception(i64) declare void @__cxa_throw(i8*, i8*, i8*) declare i32 @__gxx_personality_v0(...) diff --git a/test/MC/AsmParser/exprs.s b/test/MC/AsmParser/exprs.s index df075f85ec..a7e10020b6 100644 --- a/test/MC/AsmParser/exprs.s +++ b/test/MC/AsmParser/exprs.s @@ -45,6 +45,7 @@ k: check_expr 0 || 0, 0 check_expr 1 + 2 < 3 + 4, 1 check_expr 1 << 8 - 1, 128 + check_expr 3 * 9 - 2 * 9 + 1, 10 .set c, 10 check_expr c + 1, 11 diff --git a/test/Transforms/LoopUnroll/scevunroll.ll b/test/Transforms/LoopUnroll/scevunroll.ll index 99b3a7d861..308a036316 100644 --- a/test/Transforms/LoopUnroll/scevunroll.ll +++ b/test/Transforms/LoopUnroll/scevunroll.ll @@ -66,13 +66,16 @@ exit2: ; SCEV properly unrolls multi-exit loops. ; +; SCEV cannot currently unroll this loop. +; It should ideally detect a trip count of 5. +; rdar:14038809 [SCEV]: Optimize trip count computation for multi-exit loops. ; CHECK: @multiExit -; CHECK: getelementptr i32* %base, i32 10 -; CHECK-NEXT: load i32* -; CHECK: br i1 false, label %l2.10, label %exit1 -; CHECK: l2.10: -; CHECK-NOT: br -; CHECK: ret i32 +; CHECKFIXME: getelementptr i32* %base, i32 10 +; CHECKFIXME-NEXT: load i32* +; CHECKFIXME: br i1 false, label %l2.10, label %exit1 +; CHECKFIXME: l2.10: +; CHECKFIXME-NOT: br +; CHECKFIXME: ret i32 define i32 @multiExit(i32* %base) nounwind { entry: br label %l1 @@ -170,3 +173,38 @@ for.body87: br label %for.body87 } +; PR16130: clang produces incorrect code with loop/expression at -O2 +; rdar:14036816 loop-unroll makes assumptions about undefined behavior +; +; The loop latch is assumed to exit after the first iteration because +; of the induction variable's NSW flag. However, the loop latch's +; equality test is skipped and the loop exits after the second +; iteration via the early exit. So loop unrolling cannot assume that +; the loop latch's exit count of zero is an upper bound on the number +; of iterations. +; +; CHECK: @nsw_latch +; CHECK: for.body: +; CHECK: %b.03 = phi i32 [ 0, %entry ], [ %add, %for.cond ] +; CHECK: return: +; CHECK: %b.03.lcssa = phi i32 [ %b.03, %for.body ], [ %b.03, %for.cond ] +define void @nsw_latch(i32* %a) nounwind { +entry: + br label %for.body + +for.body: ; preds = %for.cond, %entry + %b.03 = phi i32 [ 0, %entry ], [ %add, %for.cond ] + %tobool = icmp eq i32 %b.03, 0 + %add = add nsw i32 %b.03, 8 + br i1 %tobool, label %for.cond, label %return + +for.cond: ; preds = %for.body + %cmp = icmp eq i32 %add, 13 + br i1 %cmp, label %return, label %for.body + +return: ; preds = %for.body, %for.cond + %b.03.lcssa = phi i32 [ %b.03, %for.body ], [ %b.03, %for.cond ] + %retval.0 = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + store i32 %b.03.lcssa, i32* %a, align 4 + ret void +} diff --git a/test/Transforms/LoopUnroll/unloop.ll b/test/Transforms/LoopUnroll/unloop.ll index 5a9cacda44..9a938cc287 100644 --- a/test/Transforms/LoopUnroll/unloop.ll +++ b/test/Transforms/LoopUnroll/unloop.ll @@ -21,8 +21,8 @@ outer: inner: %iv = phi i32 [ 0, %outer ], [ %inc, %tail ] %inc = add i32 %iv, 1 - %wbucond = call zeroext i1 @check() - br i1 %wbucond, label %outer.backedge, label %tail + call zeroext i1 @check() + br i1 true, label %outer.backedge, label %tail tail: br i1 false, label %inner, label %exit @@ -126,25 +126,27 @@ return: ; Ensure that only the middle loop is removed and rely on verify-loopinfo to ; check soundness. ; -; CHECK: @unloopDeepNested +; This test must be disabled until trip count computation can be optimized... +; rdar:14038809 [SCEV]: Optimize trip count computation for multi-exit loops. +; CHECKFIXME: @unloopDeepNested ; Inner-inner loop control. -; CHECK: while.cond.us.i: -; CHECK: br i1 %cmp.us.i, label %next_data.exit, label %while.body.us.i -; CHECK: if.then.us.i: -; CHECK: br label %while.cond.us.i +; CHECKFIXME: while.cond.us.i: +; CHECKFIXME: br i1 %cmp.us.i, label %next_data.exit, label %while.body.us.i +; CHECKFIXME: if.then.us.i: +; CHECKFIXME: br label %while.cond.us.i ; Inner loop tail. -; CHECK: if.else.i: -; CHECK: br label %while.cond.outer.i +; CHECKFIXME: if.else.i: +; CHECKFIXME: br label %while.cond.outer.i ; Middle loop control (removed). -; CHECK: valid_data.exit: -; CHECK-NOT: br -; CHECK: %cmp = call zeroext i1 @check() +; CHECKFIXME: valid_data.exit: +; CHECKFIXME-NOT: br +; CHECKFIXME: %cmp = call zeroext i1 @check() ; Outer loop control. -; CHECK: copy_data.exit: -; CHECK: br i1 %cmp38, label %if.then39, label %while.cond.outer +; CHECKFIXME: copy_data.exit: +; CHECKFIXME: br i1 %cmp38, label %if.then39, label %while.cond.outer ; Outer-outer loop tail. -; CHECK: while.cond.outer.outer.backedge: -; CHECK: br label %while.cond.outer.outer +; CHECKFIXME: while.cond.outer.outer.backedge: +; CHECKFIXME: br label %while.cond.outer.outer define void @unloopDeepNested() nounwind { for.cond8.preheader.i: %cmp113.i = call zeroext i1 @check() diff --git a/test/Transforms/LoopVectorize/lcssa-crash.ll b/test/Transforms/LoopVectorize/lcssa-crash.ll index 06b3b08aa0..de6be54849 100644 --- a/test/Transforms/LoopVectorize/lcssa-crash.ll +++ b/test/Transforms/LoopVectorize/lcssa-crash.ll @@ -27,3 +27,14 @@ for.end.i.i.i: unreachable } +; PR16139 +define void @test2(i8* %x) { +entry: + indirectbr i8* %x, [ label %L0, label %L1 ] + +L0: + br label %L0 + +L1: + ret void +} diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll new file mode 100644 index 0000000000..6f0357c5e5 --- /dev/null +++ b/test/Transforms/LoopVectorize/no_outside_user.ll @@ -0,0 +1,41 @@ +; RUN: opt -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" + +@f = common global i32 0, align 4 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 +@c = common global i32 0, align 4 +@a = common global i32 0, align 4 +@b = common global i32 0, align 4 +@e = common global i32 0, align 4 + +; We used to vectorize this loop. But it has a value that is used outside of the +; and is not a recognized reduction variable "tmp17". + +; CHECK-NOT: <2 x i32> + +define i32 @main() { +bb: + %b.promoted = load i32* @b, align 4 + br label %.lr.ph.i + +.lr.ph.i: + %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ] + %tmp2 = icmp sgt i32 %tmp8, 10 + br i1 %tmp2, label %bb16, label %bb10 + +bb10: + br label %bb16 + +bb16: + %tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ] + %tmp18 = add nsw i32 %tmp8, 1 + %tmp19 = icmp slt i32 %tmp18, 4 + br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit + +f1.exit.loopexit: + %.lcssa = phi i32 [ %tmp17, %bb16 ] + ret i32 %.lcssa +} + + diff --git a/test/Transforms/LoopVectorize/value-ptr-bug.ll b/test/Transforms/LoopVectorize/value-ptr-bug.ll new file mode 100644 index 0000000000..f376656f07 --- /dev/null +++ b/test/Transforms/LoopVectorize/value-ptr-bug.ll @@ -0,0 +1,50 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; PR16073 + +; Because we were caching value pointers accross a function call that could RAUW +; we would generate an undefined value store below: +; SCEVExpander::expandCodeFor would change a value (the start value of an +; induction) that we cached in the induction variable list. + +; CHECK: test_vh +; CHECK-NOT: store <4 x i8> undef + +define void @test_vh(i32* %ptr265, i32* %ptr266, i32 %sub267) { +entry: + br label %loop + +loop: + %inc = phi i32 [ %sub267, %entry ], [ %add, %loop] + %ext.inc = sext i32 %inc to i64 + %add.ptr265 = getelementptr inbounds i32* %ptr265, i64 %ext.inc + %add.ptr266 = getelementptr inbounds i32* %ptr266, i64 %ext.inc + %add = add i32 %inc, 9 + %cmp = icmp slt i32 %add, 140 + br i1 %cmp, label %block1, label %loop + +block1: + %sub267.lcssa = phi i32 [ %add, %loop ] + %add.ptr266.lcssa = phi i32* [ %add.ptr266, %loop ] + %add.ptr265.lcssa = phi i32* [ %add.ptr265, %loop ] + %tmp29 = bitcast i32* %add.ptr265.lcssa to i8* + %tmp30 = bitcast i32* %add.ptr266.lcssa to i8* + br label %do.body272 + +do.body272: + %row_width.5 = phi i32 [ %sub267.lcssa, %block1 ], [ %dec, %do.body272 ] + %sp.4 = phi i8* [ %tmp30, %block1 ], [ %incdec.ptr273, %do.body272 ] + %dp.addr.4 = phi i8* [ %tmp29, %block1 ], [ %incdec.ptr274, %do.body272 ] + %incdec.ptr273 = getelementptr inbounds i8* %sp.4, i64 1 + %tmp31 = load i8* %sp.4, align 1 + %incdec.ptr274 = getelementptr inbounds i8* %dp.addr.4, i64 1 + store i8 %tmp31, i8* %dp.addr.4, align 1 + %dec = add i32 %row_width.5, -1 + %cmp276 = icmp eq i32 %dec, 0 + br i1 %cmp276, label %loop.exit, label %do.body272 + +loop.exit: + ret void +} |