diff options
author | Evan Cheng <evan.cheng@apple.com> | 2011-11-15 02:12:34 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2011-11-15 02:12:34 +0000 |
commit | eaa192af18677c4dc5894e049514d8a6b1d6d7c2 (patch) | |
tree | dc9dd55a7dd8a5f11f9e547107584e4fa1c408ca | |
parent | bfc9429c2b814469adf3930dda31539d1c3319d8 (diff) | |
download | llvm-eaa192af18677c4dc5894e049514d8a6b1d6d7c2.tar.gz llvm-eaa192af18677c4dc5894e049514d8a6b1d6d7c2.tar.bz2 llvm-eaa192af18677c4dc5894e049514d8a6b1d6d7c2.tar.xz |
Add vmov.f32 to materialize f32 immediate splats which cannot be handled by
integer variants. rdar://10437054
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144608 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 11 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.h | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 14 | ||||
-rw-r--r-- | test/CodeGen/ARM/vmov.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/machine-licm.ll | 5 | ||||
-rw-r--r-- | utils/TableGen/EDEmitter.cpp | 1 |
6 files changed, 49 insertions, 3 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b55ef700f5..e35a570b5e 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -912,6 +912,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; + case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; case ARMISD::VDUP: return "ARMISD::VDUP"; case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; case ARMISD::VEXT: return "ARMISD::VEXT"; @@ -3986,6 +3987,16 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } + + // Use vmov.f32 to materialize other v2f32 and v4f32 splats. + if (VT == MVT::v2f32 || VT == MVT::v4f32) { + ConstantFPSDNode *C = cast<ConstantFPSDNode>(Op.getOperand(0)); + int ImmVal = ARM_AM::getFP32Imm(C->getValueAPF()); + if (ImmVal != -1) { + SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); + return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); + } + } } } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index be6a53032c..2cca40b374 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -146,6 +146,9 @@ namespace llvm { VMOVIMM, VMVNIMM, + // Vector move f32 immediate: + VMOVFPIMM, + // Vector duplicate: VDUP, VDUPLANE, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 49cc254a11..b1fadfb55a 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -39,6 +39,10 @@ def nImmVMOVI32 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmVMOVI32AsmOperand; } +def nImmVMOVF32 : Operand<i32> { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = FPImmOperand; +} def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } def nImmSplatI64 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; @@ -173,6 +177,7 @@ def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; +def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; @@ -4513,6 +4518,15 @@ def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), (ins nImmSplatI64:$SIMM), IIC_VMOVImm, "vmov", "i64", "$Vd, $SIMM", "", [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; + +def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), + (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, + "vmov", "f32", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; +def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), + (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, + "vmov", "f32", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; } // isReMaterializable // VMOV : Vector Get Lane (move scalar to ARM core register) diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll index ab56e5bb29..0396a41a35 100644 --- a/test/CodeGen/ARM/vmov.ll +++ b/test/CodeGen/ARM/vmov.ll @@ -353,3 +353,21 @@ define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind { store <4 x i16> %tmp2, <4 x i16>* %b, align 8 ret void } + +; Use vmov.f32 to materialize f32 immediate splats +; rdar://10437054 +define void @v_mov_v2f32(<2 x float>* nocapture %p) nounwind { +entry: +;CHECK: v_mov_v2f32: +;CHECK: vmov.f32 d{{.*}}, #-1.600000e+01 + store <2 x float> <float -1.600000e+01, float -1.600000e+01>, <2 x float>* %p, align 4 + ret void +} + +define void @v_mov_v4f32(<4 x float>* nocapture %p) nounwind { +entry: +;CHECK: v_mov_v4f32: +;CHECK: vmov.f32 q{{.*}}, #3.100000e+01 + store <4 x float> <float 3.100000e+01, float 3.100000e+01, float 3.100000e+01, float 3.100000e+01>, <4 x float>* %p, align 4 + ret void +} diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll index 46937fc84b..4e16c9aa3b 100644 --- a/test/CodeGen/Thumb2/machine-licm.ll +++ b/test/CodeGen/Thumb2/machine-licm.ll @@ -51,12 +51,11 @@ return: ; preds = %bb, %entry define void @t2(i8* %ptr1, i8* %ptr2) nounwind { entry: ; CHECK: t2: -; CHECK: mov.w [[R3:r[0-9]+]], #1065353216 -; CHECK: vdup.32 q{{.*}}, [[R3]] +; CHECK: vmov.f32 q{{.*}}, #1.000000e+00 br i1 undef, label %bb1, label %bb2 bb1: -; CHECK-NEXT: %bb1 +; CHECK: %bb1 %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] %tmp1 = shl i32 %indvar, 2 %gep1 = getelementptr i8* %ptr1, i32 %tmp1 diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp index effb71ba53..1953dadbdc 100644 --- a/utils/TableGen/EDEmitter.cpp +++ b/utils/TableGen/EDEmitter.cpp @@ -607,6 +607,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type, IMM("nImmSplatI32"); IMM("nImmSplatI64"); IMM("nImmVMOVI32"); + IMM("nImmVMOVF32"); IMM("imm0_7"); IMM("imm0_15"); IMM("imm0_255"); |