summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJuergen Ributzka <juergen@apple.com>2014-04-08 20:39:59 +0000
committerJuergen Ributzka <juergen@apple.com>2014-04-08 20:39:59 +0000
commitc6a7502a803ceca5d18801a7598d303f7f5bfa0f (patch)
tree255a17bfcc26d3b8383de337cd66db03d91410f7
parent861e4db90533e6ba7de77c33d7a06857433616ff (diff)
downloadllvm-c6a7502a803ceca5d18801a7598d303f7f5bfa0f.tar.gz
llvm-c6a7502a803ceca5d18801a7598d303f7f5bfa0f.tar.bz2
llvm-c6a7502a803ceca5d18801a7598d303f7f5bfa0f.tar.xz
[Constant Hoisting][ARM64] Enable constant hoisting for ARM64.
This implements the target-hooks for ARM64 to enable constant hoisting. This fixes <rdar://problem/14774662> and <rdar://problem/16381500>. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205791 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM64/ARM64TargetTransformInfo.cpp144
-rw-r--r--test/CodeGen/ARM64/const-addr.ll23
-rw-r--r--test/Transforms/ConstantHoisting/ARM64/const-addr.ll23
-rw-r--r--test/Transforms/ConstantHoisting/ARM64/lit.local.cfg3
4 files changed, 180 insertions, 13 deletions
diff --git a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp b/lib/Target/ARM64/ARM64TargetTransformInfo.cpp
index 9b598d7656..8a6253d768 100644
--- a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp
+++ b/lib/Target/ARM64/ARM64TargetTransformInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
using namespace llvm;
// Declare the pass initialization routine locally as target-specific passes
@@ -71,8 +72,12 @@ public:
/// \name Scalar TTI Implementations
/// @{
-
+ unsigned getIntImmCost(int64_t Val) const;
unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
+ unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) const override;
+ unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) const override;
PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
/// @}
@@ -128,26 +133,139 @@ llvm::createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM) {
return new ARM64TTI(TM);
}
+/// \brief Calculate the cost of materializing a 64-bit value. This helper
+/// method might only calculate a fraction of a larger immediate. Therefore it
+/// is valid to return a cost of ZERO.
+unsigned ARM64TTI::getIntImmCost(int64_t Val) const {
+ // Check if the immediate can be encoded within an instruction.
+ if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, 64))
+ return 0;
+
+ if (Val < 0)
+ Val = ~Val;
+
+ // Calculate how many moves we will need to materialize this constant.
+ unsigned LZ = countLeadingZeros((uint64_t)Val);
+ return (64 - LZ + 15) / 16;
+}
+
+/// \brief Calculate the cost of materializing the given constant.
unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
- if (BitSize == 0)
+ if (BitSize == 0 || BitSize > 128)
return ~0U;
- int64_t Val = Imm.getSExtValue();
- if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize))
- return 1;
+ // Sign-extend all constants to a multiple of 64-bit.
+ APInt ImmVal = Imm;
+ if (BitSize & 0x3f)
+ ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
+
+ // Split the constant into 64-bit chunks and calculate the cost for each
+ // chunk.
+ unsigned Cost = 0;
+ for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
+ APInt Tmp = ImmVal.ashr(ShiftVal).getLoBits(64);
+ int64_t Val = Tmp.getSExtValue();
+ Cost += getIntImmCost(Val);
+ }
+ // We need at least one instruction to materialze the constant.
+ return std::max(1U, Cost);
+}
- if ((int64_t)Val < 0)
- Val = ~Val;
- if (BitSize == 32)
- Val &= (1LL << 32) - 1;
+unsigned ARM64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
+ assert(Ty->isIntegerTy());
- unsigned LZ = countLeadingZeros((uint64_t)Val);
- unsigned Shift = (63 - LZ) / 16;
- // MOVZ is free so return true for one or fewer MOVK.
- return (Shift == 0) ? 1 : Shift;
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0 || BitSize > 128)
+ return ~0U;
+
+ unsigned ImmIdx = ~0U;
+ switch (Opcode) {
+ default:
+ return TCC_Free;
+ case Instruction::GetElementPtr:
+ // Always hoist the base address of a GetElementPtr.
+ if (Idx == 0)
+ return 2 * TCC_Basic;
+ return TCC_Free;
+ case Instruction::Store:
+ ImmIdx = 0;
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ ImmIdx = 1;
+ break;
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ case Instruction::BitCast:
+ case Instruction::PHI:
+ case Instruction::Call:
+ case Instruction::Select:
+ case Instruction::Ret:
+ case Instruction::Load:
+ break;
+ }
+
+ if (Idx == ImmIdx) {
+ unsigned NumConstants = (BitSize + 63) / 64;
+ unsigned Cost = ARM64TTI::getIntImmCost(Imm, Ty);
+ return (Cost <= NumConstants * TCC_Basic) ? TCC_Free : Cost;
+ }
+ return ARM64TTI::getIntImmCost(Imm, Ty);
+}
+
+unsigned ARM64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0 || BitSize > 128)
+ return ~0U;
+
+ switch (IID) {
+ default:
+ return TCC_Free;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ if (Idx == 1) {
+ unsigned NumConstants = (BitSize + 63) / 64;
+ unsigned Cost = ARM64TTI::getIntImmCost(Imm, Ty);
+ return (Cost <= NumConstants * TCC_Basic) ? TCC_Free : Cost;
+ }
+ break;
+ case Intrinsic::experimental_stackmap:
+ if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TCC_Free;
+ break;
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TCC_Free;
+ break;
+ }
+ return ARM64TTI::getIntImmCost(Imm, Ty);
}
ARM64TTI::PopcntSupportKind ARM64TTI::getPopcntSupport(unsigned TyWidth) const {
diff --git a/test/CodeGen/ARM64/const-addr.ll b/test/CodeGen/ARM64/const-addr.ll
new file mode 100644
index 0000000000..c77a6db8fe
--- /dev/null
+++ b/test/CodeGen/ARM64/const-addr.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple=arm64-darwin-unknown < %s | FileCheck %s
+
+%T = type { i32, i32, i32, i32 }
+
+; Test if the constant base address gets only materialized once.
+define i32 @test1() nounwind {
+; CHECK-LABEL: test1
+; CHECK: movz x8, #1039, lsl #16
+; CHECK-NEXT: movk x8, #49152
+; CHECK-NEXT: ldp w9, w10, [x8, #4]
+; CHECK: ldr w8, [x8, #12]
+ %at = inttoptr i64 68141056 to %T*
+ %o1 = getelementptr %T* %at, i32 0, i32 1
+ %t1 = load i32* %o1
+ %o2 = getelementptr %T* %at, i32 0, i32 2
+ %t2 = load i32* %o2
+ %a1 = add i32 %t1, %t2
+ %o3 = getelementptr %T* %at, i32 0, i32 3
+ %t3 = load i32* %o3
+ %a2 = add i32 %a1, %t3
+ ret i32 %a2
+}
+
diff --git a/test/Transforms/ConstantHoisting/ARM64/const-addr.ll b/test/Transforms/ConstantHoisting/ARM64/const-addr.ll
new file mode 100644
index 0000000000..89d596055c
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/ARM64/const-addr.ll
@@ -0,0 +1,23 @@
+; RUN: opt -mtriple=arm64-darwin-unknown -S -consthoist < %s | FileCheck %s
+
+%T = type { i32, i32, i32, i32 }
+
+define i32 @test1() nounwind {
+; CHECK-LABEL: test1
+; CHECK: %const = bitcast i64 68141056 to i64
+; CHECK: %1 = inttoptr i64 %const to %T*
+; CHECK: %o1 = getelementptr %T* %1, i32 0, i32 1
+; CHECK: %o2 = getelementptr %T* %1, i32 0, i32 2
+; CHECK: %o3 = getelementptr %T* %1, i32 0, i32 3
+ %at = inttoptr i64 68141056 to %T*
+ %o1 = getelementptr %T* %at, i32 0, i32 1
+ %t1 = load i32* %o1
+ %o2 = getelementptr %T* %at, i32 0, i32 2
+ %t2 = load i32* %o2
+ %a1 = add i32 %t1, %t2
+ %o3 = getelementptr %T* %at, i32 0, i32 3
+ %t3 = load i32* %o3
+ %a2 = add i32 %a1, %t3
+ ret i32 %a2
+}
+
diff --git a/test/Transforms/ConstantHoisting/ARM64/lit.local.cfg b/test/Transforms/ConstantHoisting/ARM64/lit.local.cfg
new file mode 100644
index 0000000000..84ac9811f0
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/ARM64/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'ARM64' in targets:
+ config.unsupported = True