diff options
-rw-r--r-- | include/llvm/IR/IntrinsicsARM64.td | 12 | ||||
-rw-r--r-- | lib/Target/ARM64/ARM64ISelDAGToDAG.cpp | 11 | ||||
-rw-r--r-- | lib/Target/ARM64/ARM64ISelLowering.cpp | 20 | ||||
-rw-r--r-- | lib/Target/ARM64/ARM64InstrAtomics.td | 80 | ||||
-rw-r--r-- | lib/Target/ARM64/ARM64TargetMachine.cpp | 5 | ||||
-rw-r--r-- | test/CodeGen/ARM64/ldxr-stxr.ll | 127 |
6 files changed, 248 insertions, 7 deletions
diff --git a/include/llvm/IR/IntrinsicsARM64.td b/include/llvm/IR/IntrinsicsARM64.td index d7f307e9ff..abdb655b12 100644 --- a/include/llvm/IR/IntrinsicsARM64.td +++ b/include/llvm/IR/IntrinsicsARM64.td @@ -14,12 +14,18 @@ let TargetPrefix = "arm64" in { def int_arm64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>; +def int_arm64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>; def int_arm64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>; -def int_arm64_clrex : Intrinsic<[]>; +def int_arm64_stlxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>; def int_arm64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>; -def int_arm64_stxp : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, - llvm_ptr_ty]>; +def int_arm64_ldaxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>; +def int_arm64_stxp : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty]>; +def int_arm64_stlxp : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty]>; + +def int_arm64_clrex : Intrinsic<[]>; def int_arm64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp index 956f61148e..9e5b5af0d9 100644 --- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp +++ b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp @@ -1969,12 +1969,15 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { switch (IntNo) { default: break; + case Intrinsic::arm64_ldaxp: case Intrinsic::arm64_ldxp: { + unsigned Op = + IntNo == Intrinsic::arm64_ldaxp ? ARM64::LDAXPX : ARM64::LDXPX; SDValue MemAddr = Node->getOperand(2); SDLoc DL(Node); SDValue Chain = Node->getOperand(0); - SDNode *Ld = CurDAG->getMachineNode(ARM64::LDXPX, DL, MVT::i64, MVT::i64, + SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, MVT::Other, MemAddr, Chain); // Transfer memoperands. @@ -1983,7 +1986,10 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); return Ld; } + case Intrinsic::arm64_stlxp: case Intrinsic::arm64_stxp: { + unsigned Op = + IntNo == Intrinsic::arm64_stlxp ? ARM64::STLXPX : ARM64::STXPX; SDLoc DL(Node); SDValue Chain = Node->getOperand(0); SDValue ValLo = Node->getOperand(2); @@ -1997,8 +2003,7 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { Ops.push_back(MemAddr); Ops.push_back(Chain); - SDNode *St = - CurDAG->getMachineNode(ARM64::STXPX, DL, MVT::i32, MVT::Other, Ops); + SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); // Transfer memoperands. MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand(); diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp index aae018d4e2..794bd02942 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -551,8 +551,22 @@ void ARM64TargetLowering::computeMaskedBitsForTargetNode( KnownOne &= KnownOne2; break; } - case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_W_CHAIN: { + ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1)); + Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue()); + switch (IntID) { + default: return; + case Intrinsic::arm64_ldaxr: + case Intrinsic::arm64_ldxr: { + unsigned BitWidth = KnownOne.getBitWidth(); + EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT(); + unsigned MemBits = VT.getScalarType().getSizeInBits(); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + return; + } + } break; + } case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -5972,6 +5986,7 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm64_ldaxr: case Intrinsic::arm64_ldxr: { PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; @@ -5984,6 +5999,7 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = false; return true; } + case Intrinsic::arm64_stlxr: case Intrinsic::arm64_stxr: { PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; @@ -5996,6 +6012,7 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm64_ldaxp: case Intrinsic::arm64_ldxp: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i128; @@ -6007,6 +6024,7 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = false; return true; } + case Intrinsic::arm64_stlxp: case Intrinsic::arm64_stxp: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i128; diff --git a/lib/Target/ARM64/ARM64InstrAtomics.td b/lib/Target/ARM64/ARM64InstrAtomics.td index 0d36e067a5..296f8d216f 100644 --- a/lib/Target/ARM64/ARM64InstrAtomics.td +++ b/lib/Target/ARM64/ARM64InstrAtomics.td @@ -242,6 +242,39 @@ def : Pat<(and (ldxr_2 am_noindex:$addr), 0xffff), def : Pat<(and (ldxr_4 am_noindex:$addr), 0xffffffff), (SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>; +// Load-exclusives. + +def ldaxr_1 : PatFrag<(ops node:$ptr), (int_arm64_ldaxr node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldaxr_2 : PatFrag<(ops node:$ptr), (int_arm64_ldaxr node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldaxr_4 : PatFrag<(ops node:$ptr), (int_arm64_ldaxr node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +def ldaxr_8 : PatFrag<(ops node:$ptr), (int_arm64_ldaxr node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; +}]>; + +def : Pat<(ldaxr_1 am_noindex:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRB am_noindex:$addr), sub_32)>; +def : Pat<(ldaxr_2 am_noindex:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRH am_noindex:$addr), sub_32)>; +def : Pat<(ldaxr_4 am_noindex:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRW am_noindex:$addr), sub_32)>; +def : Pat<(ldaxr_8 am_noindex:$addr), (LDAXRX am_noindex:$addr)>; + +def : Pat<(and (ldaxr_1 am_noindex:$addr), 0xff), + (SUBREG_TO_REG (i64 0), (LDAXRB am_noindex:$addr), sub_32)>; +def : Pat<(and (ldaxr_2 am_noindex:$addr), 0xffff), + (SUBREG_TO_REG (i64 0), (LDAXRH am_noindex:$addr), sub_32)>; +def : Pat<(and (ldaxr_4 am_noindex:$addr), 0xffffffff), + (SUBREG_TO_REG (i64 0), (LDAXRW am_noindex:$addr), sub_32)>; + // Store-exclusives. def stxr_1 : PatFrag<(ops node:$val, node:$ptr), @@ -264,6 +297,7 @@ def stxr_8 : PatFrag<(ops node:$val, node:$ptr), return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; }]>; + def : Pat<(stxr_1 GPR64:$val, am_noindex:$addr), (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; def : Pat<(stxr_2 GPR64:$val, am_noindex:$addr), @@ -287,6 +321,52 @@ def : Pat<(stxr_2 (and GPR64:$val, 0xffff), am_noindex:$addr), def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), am_noindex:$addr), (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; +// Store-release-exclusives. + +def stlxr_1 : PatFrag<(ops node:$val, node:$ptr), + (int_arm64_stlxr node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; + +def stlxr_2 : PatFrag<(ops node:$val, node:$ptr), + (int_arm64_stlxr node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; + +def stlxr_4 : PatFrag<(ops node:$val, node:$ptr), + (int_arm64_stlxr node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +def stlxr_8 : PatFrag<(ops node:$val, node:$ptr), + (int_arm64_stlxr node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; +}]>; + + +def : Pat<(stlxr_1 GPR64:$val, am_noindex:$addr), + (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; +def : Pat<(stlxr_2 GPR64:$val, am_noindex:$addr), + (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; +def : Pat<(stlxr_4 GPR64:$val, am_noindex:$addr), + (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; +def : Pat<(stlxr_8 GPR64:$val, am_noindex:$addr), + (STLXRX GPR64:$val, am_noindex:$addr)>; + +def : Pat<(stlxr_1 (zext (and GPR32:$val, 0xff)), am_noindex:$addr), + (STLXRB GPR32:$val, am_noindex:$addr)>; +def : Pat<(stlxr_2 (zext (and GPR32:$val, 0xffff)), am_noindex:$addr), + (STLXRH GPR32:$val, am_noindex:$addr)>; +def : Pat<(stlxr_4 (zext GPR32:$val), am_noindex:$addr), + (STLXRW GPR32:$val, am_noindex:$addr)>; + +def : Pat<(stlxr_1 (and GPR64:$val, 0xff), am_noindex:$addr), + (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; +def : Pat<(stlxr_2 (and GPR64:$val, 0xffff), am_noindex:$addr), + (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; +def : Pat<(stlxr_4 (and GPR64:$val, 0xffffffff), am_noindex:$addr), + (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; + // And clear exclusive. diff --git a/lib/Target/ARM64/ARM64TargetMachine.cpp b/lib/Target/ARM64/ARM64TargetMachine.cpp index f4a7996379..946a0ee59e 100644 --- a/lib/Target/ARM64/ARM64TargetMachine.cpp +++ b/lib/Target/ARM64/ARM64TargetMachine.cpp @@ -111,6 +111,11 @@ bool ARM64PassConfig::addPreISel() { addPass(createGlobalMergePass(TM)); if (TM->getOptLevel() != CodeGenOpt::None) addPass(createARM64AddressTypePromotionPass()); + + // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg + // ourselves. + addPass(createAtomicExpandLoadLinkedPass(TM)); + return false; } diff --git a/test/CodeGen/ARM64/ldxr-stxr.ll b/test/CodeGen/ARM64/ldxr-stxr.ll index d50ba949b1..ed53a14ca8 100644 --- a/test/CodeGen/ARM64/ldxr-stxr.ll +++ b/test/CodeGen/ARM64/ldxr-stxr.ll @@ -141,3 +141,130 @@ define void @test_clear() { declare void @llvm.arm64.clrex() nounwind +define i128 @test_load_acquire_i128(i8* %p) nounwind readonly { +; CHECK-LABEL: test_load_acquire_i128: +; CHECK: ldaxp {{x[0-9]+}}, {{x[0-9]+}}, [x0] +entry: + %ldrexd = tail call %0 @llvm.arm64.ldaxp(i8* %p) + %0 = extractvalue %0 %ldrexd, 1 + %1 = extractvalue %0 %ldrexd, 0 + %2 = zext i64 %0 to i128 + %3 = zext i64 %1 to i128 + %shl = shl nuw i128 %2, 64 + %4 = or i128 %shl, %3 + ret i128 %4 +} + +define i32 @test_store_release_i128(i8* %ptr, i128 %val) nounwind { +; CHECK-LABEL: test_store_release_i128: +; CHECK: stlxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0] +entry: + %tmp4 = trunc i128 %val to i64 + %tmp6 = lshr i128 %val, 64 + %tmp7 = trunc i128 %tmp6 to i64 + %strexd = tail call i32 @llvm.arm64.stlxp(i64 %tmp4, i64 %tmp7, i8* %ptr) + ret i32 %strexd +} + +declare %0 @llvm.arm64.ldaxp(i8*) nounwind +declare i32 @llvm.arm64.stlxp(i64, i64, i8*) nounwind + +define void @test_load_acquire_i8(i8* %addr) { +; CHECK-LABEL: test_load_acquire_i8: +; CHECK: ldaxrb w[[LOADVAL:[0-9]+]], [x0] +; CHECK-NOT: uxtb +; CHECK-NOT: and +; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] + + %val = call i64 @llvm.arm64.ldaxr.p0i8(i8* %addr) + %shortval = trunc i64 %val to i8 + %extval = zext i8 %shortval to i64 + store i64 %extval, i64* @var, align 8 + ret void +} + +define void @test_load_acquire_i16(i16* %addr) { +; CHECK-LABEL: test_load_acquire_i16: +; CHECK: ldaxrh w[[LOADVAL:[0-9]+]], [x0] +; CHECK-NOT: uxth +; CHECK-NOT: and +; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] + + %val = call i64 @llvm.arm64.ldaxr.p0i16(i16* %addr) + %shortval = trunc i64 %val to i16 + %extval = zext i16 %shortval to i64 + store i64 %extval, i64* @var, align 8 + ret void +} + +define void @test_load_acquire_i32(i32* %addr) { +; CHECK-LABEL: test_load_acquire_i32: +; CHECK: ldaxr w[[LOADVAL:[0-9]+]], [x0] +; CHECK-NOT: uxtw +; CHECK-NOT: and +; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] + + %val = call i64 @llvm.arm64.ldaxr.p0i32(i32* %addr) + %shortval = trunc i64 %val to i32 + %extval = zext i32 %shortval to i64 + store i64 %extval, i64* @var, align 8 + ret void +} + +define void @test_load_acquire_i64(i64* %addr) { +; CHECK-LABEL: test_load_acquire_i64: +; CHECK: ldaxr x[[LOADVAL:[0-9]+]], [x0] +; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var] + + %val = call i64 @llvm.arm64.ldaxr.p0i64(i64* %addr) + store i64 %val, i64* @var, align 8 + ret void +} + + +declare i64 @llvm.arm64.ldaxr.p0i8(i8*) nounwind +declare i64 @llvm.arm64.ldaxr.p0i16(i16*) nounwind +declare i64 @llvm.arm64.ldaxr.p0i32(i32*) nounwind +declare i64 @llvm.arm64.ldaxr.p0i64(i64*) nounwind + +define i32 @test_store_release_i8(i32, i8 %val, i8* %addr) { +; CHECK-LABEL: test_store_release_i8: +; CHECK-NOT: uxtb +; CHECK-NOT: and +; CHECK: stlxrb w0, w1, [x2] + %extval = zext i8 %val to i64 + %res = call i32 @llvm.arm64.stlxr.p0i8(i64 %extval, i8* %addr) + ret i32 %res +} + +define i32 @test_store_release_i16(i32, i16 %val, i16* %addr) { +; CHECK-LABEL: test_store_release_i16: +; CHECK-NOT: uxth +; CHECK-NOT: and +; CHECK: stlxrh w0, w1, [x2] + %extval = zext i16 %val to i64 + %res = call i32 @llvm.arm64.stlxr.p0i16(i64 %extval, i16* %addr) + ret i32 %res +} + +define i32 @test_store_release_i32(i32, i32 %val, i32* %addr) { +; CHECK-LABEL: test_store_release_i32: +; CHECK-NOT: uxtw +; CHECK-NOT: and +; CHECK: stlxr w0, w1, [x2] + %extval = zext i32 %val to i64 + %res = call i32 @llvm.arm64.stlxr.p0i32(i64 %extval, i32* %addr) + ret i32 %res +} + +define i32 @test_store_release_i64(i32, i64 %val, i64* %addr) { +; CHECK-LABEL: test_store_release_i64: +; CHECK: stlxr w0, x1, [x2] + %res = call i32 @llvm.arm64.stlxr.p0i64(i64 %val, i64* %addr) + ret i32 %res +} + +declare i32 @llvm.arm64.stlxr.p0i8(i64, i8*) nounwind +declare i32 @llvm.arm64.stlxr.p0i16(i64, i16*) nounwind +declare i32 @llvm.arm64.stlxr.p0i32(i64, i32*) nounwind +declare i32 @llvm.arm64.stlxr.p0i64(i64, i64*) nounwind |