summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2011-02-11 02:28:55 +0000
committerEvan Cheng <evan.cheng@apple.com>2011-02-11 02:28:55 +0000
commitc143dd4f63889ca6b4f656200f43a1fa7bbf1c34 (patch)
tree860b9364d858aeeced923401ee1cdc3bb3699bf0 /lib
parent98311ecb4ae9c82baba9e3a48acf756a81c8e9a4 (diff)
downloadllvm-c143dd4f63889ca6b4f656200f43a1fa7bbf1c34.tar.gz
llvm-c143dd4f63889ca6b4f656200f43a1fa7bbf1c34.tar.bz2
llvm-c143dd4f63889ca6b4f656200f43a1fa7bbf1c34.tar.xz
Fix buggy fcopysign lowering.
This define float @foo(float %x, float %y) nounwind readnone { entry: %0 = tail call float @copysignf(float %x, float %y) nounwind readnone ret float %0 } Was compiled to: vmov s0, r1 bic r0, r0, #-2147483648 vmov s1, r0 vcmpe.f32 s0, #0 vmrs apsr_nzcv, fpscr it lt vneglt.f32 s1, s1 vmov r0, s1 bx lr This fails to copy the sign of -0.0f because it's lost during the float to int conversion. Also, it's sub-optimal when the inputs are in GPR registers. Now it uses integer and + or operations when it's profitable. And it's correct! lsrs r1, r1, #31 bfi r0, r1, #31, #1 bx lr rdar://8984306 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@125357 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp44
1 files changed, 39 insertions, 5 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 4afc8c7fac..fe39860271 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -2833,12 +2833,46 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
- SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
- SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
- SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
- SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
+ bool F2IisFast = Subtarget->isCortexA9() ||
+ Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR;
+
+ // Bitcast operand 1 to i32.
+ if (SrcVT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp1, 1).getValue(1);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
+
+ // If float to int conversion isn't going to be super expensive, then simply
+ // or in the signbit.
+ if (F2IisFast) {
+ SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+ SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+ Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+ if (VT == MVT::f32) {
+ Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
+ }
+
+ // f64: Or the high part with signbit and then combine two parts.
+ Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp0, 1);
+ SDValue Lo = Tmp0.getValue(0);
+ SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+ Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ }
+
+ // Remove the signbit of operand 0.
+ Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
+
+ // If operand 1 signbit is one, then negate operand 0.
+ SDValue ARMcc;
+ SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32),
+ ISD::SETLT, ARMcc, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
+ return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp);
}
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{