summaryrefslogtreecommitdiff
path: root/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
blob: 4c4150bfec90abc1830a0cb2e1b85267a695c7d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
//===-- AtomicExpandLoadLinkedPass.cpp - Expand atomic instructions -------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a pass (at IR level) to replace atomic instructions with
// appropriate (intrinsic-based) ldrex/strex loops.
//
//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;

#define DEBUG_TYPE "arm-atomic-expand"

namespace {
  class AtomicExpandLoadLinked : public FunctionPass {
    const TargetLowering *TLI;
  public:
    static char ID; // Pass identification, replacement for typeid
    explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr)
      : FunctionPass(ID), TLI(TM ? TM->getTargetLowering() : nullptr) {
      initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry());
    }

    bool runOnFunction(Function &F) override;
    bool expandAtomicInsts(Function &F);

    bool expandAtomicLoad(LoadInst *LI);
    bool expandAtomicStore(StoreInst *LI);
    bool expandAtomicRMW(AtomicRMWInst *AI);
    bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);

    AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
    void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
  };
}

char AtomicExpandLoadLinked::ID = 0;
char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID;
INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc",
    "Expand Atomic calls in terms of load-linked & store-conditional",
    false, false)

FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) {
  return new AtomicExpandLoadLinked(TM);
}

bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
  if (!TLI)
    return false;

  SmallVector<Instruction *, 1> AtomicInsts;

  // Changing control-flow while iterating through it is a bad idea, so gather a
  // list of all atomic instructions before we start.
  for (BasicBlock &BB : F)
    for (Instruction &Inst : BB) {
      if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) ||
          (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) ||
          (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
        AtomicInsts.push_back(&Inst);
    }

  bool MadeChange = false;
  for (Instruction *Inst : AtomicInsts) {
    if (!TLI->shouldExpandAtomicInIR(Inst))
      continue;

    if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
      MadeChange |= expandAtomicRMW(AI);
    else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst))
      MadeChange |= expandAtomicCmpXchg(CI);
    else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
      MadeChange |= expandAtomicLoad(LI);
    else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
      MadeChange |= expandAtomicStore(SI);
    else
      llvm_unreachable("Unknown atomic instruction");
  }

  return MadeChange;
}

bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) {
  // Load instructions don't actually need a leading fence, even in the
  // SequentiallyConsistent case.
  AtomicOrdering MemOpOrder =
    TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();

  // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
  // an ldrexd (A3.5.3).
  IRBuilder<> Builder(LI);
  Value *Val =
      TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);

  insertTrailingFence(Builder, LI->getOrdering());

  LI->replaceAllUsesWith(Val);
  LI->eraseFromParent();

  return true;
}

bool AtomicExpandLoadLinked::expandAtomicStore(StoreInst *SI) {
  // The only atomic 64-bit store on ARM is an strexd that succeeds, which means
  // we need a loop and the entire instruction is essentially an "atomicrmw
  // xchg" that ignores the value loaded.
  IRBuilder<> Builder(SI);
  AtomicRMWInst *AI =
      Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
                              SI->getValueOperand(), SI->getOrdering());
  SI->eraseFromParent();

  // Now we have an appropriate swap instruction, lower it as usual.
  return expandAtomicRMW(AI);
}

bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
  AtomicOrdering Order = AI->getOrdering();
  Value *Addr = AI->getPointerOperand();
  BasicBlock *BB = AI->getParent();
  Function *F = BB->getParent();
  LLVMContext &Ctx = F->getContext();

  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
  //
  // The standard expansion we produce is:
  //     [...]
  //     fence?
  // atomicrmw.start:
  //     %loaded = @load.linked(%addr)
  //     %new = some_op iN %loaded, %incr
  //     %stored = @store_conditional(%new, %addr)
  //     %try_again = icmp i32 ne %stored, 0
  //     br i1 %try_again, label %loop, label %atomicrmw.end
  // atomicrmw.end:
  //     fence?
  //     [...]
  BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
  BasicBlock *LoopBB =  BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);

  // This grabs the DebugLoc from AI.
  IRBuilder<> Builder(AI);

  // The split call above "helpfully" added a branch at the end of BB (to the
  // wrong place), but we might want a fence too. It's easiest to just remove
  // the branch entirely.
  std::prev(BB->end())->eraseFromParent();
  Builder.SetInsertPoint(BB);
  AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
  Builder.CreateBr(LoopBB);

  // Start the main loop block now that we've taken care of the preliminaries.
  Builder.SetInsertPoint(LoopBB);
  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);

  Value *NewVal;
  switch (AI->getOperation()) {
  case AtomicRMWInst::Xchg:
    NewVal = AI->getValOperand();
    break;
  case AtomicRMWInst::Add:
    NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::Sub:
    NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::And:
    NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::Nand:
    NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()),
                               "new");
    break;
  case AtomicRMWInst::Or:
    NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::Xor:
    NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::Max:
    NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
    NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::Min:
    NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
    NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::UMax:
    NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
    NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
    break;
  case AtomicRMWInst::UMin:
    NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
    NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
    break;
  default:
    llvm_unreachable("Unknown atomic op");
  }

  Value *StoreSuccess =
      TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
  Value *TryAgain = Builder.CreateICmpNE(
      StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);

  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
  insertTrailingFence(Builder, Order);

  AI->replaceAllUsesWith(Loaded);
  AI->eraseFromParent();

  return true;
}

bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
  AtomicOrdering FailureOrder = CI->getFailureOrdering();
  Value *Addr = CI->getPointerOperand();
  BasicBlock *BB = CI->getParent();
  Function *F = BB->getParent();
  LLVMContext &Ctx = F->getContext();

  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
  //
  // The full expansion we produce is:
  //     [...]
  //     fence?
  // cmpxchg.start:
  //     %loaded = @load.linked(%addr)
  //     %should_store = icmp eq %loaded, %desired
  //     br i1 %should_store, label %cmpxchg.trystore,
  //                          label %cmpxchg.failure
  // cmpxchg.trystore:
  //     %stored = @store_conditional(%new, %addr)
  //     %success = icmp eq i32 %stored, 0
  //     br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
  // cmpxchg.success:
  //     fence?
  //     br label %cmpxchg.end
  // cmpxchg.failure:
  //     fence?
  //     br label %cmpxchg.end
  // cmpxchg.end:
  //     %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
  //     %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
  //     %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
  //     [...]
  BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
  auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
  auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);

  // This grabs the DebugLoc from CI
  IRBuilder<> Builder(CI);

  // The split call above "helpfully" added a branch at the end of BB (to the
  // wrong place), but we might want a fence too. It's easiest to just remove
  // the branch entirely.
  std::prev(BB->end())->eraseFromParent();
  Builder.SetInsertPoint(BB);
  AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
  Builder.CreateBr(LoopBB);

  // Start the main loop block now that we've taken care of the preliminaries.
  Builder.SetInsertPoint(LoopBB);
  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
  Value *ShouldStore =
      Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");

  // If the the cmpxchg doesn't actually need any ordering when it fails, we can
  // jump straight past that fence instruction (if it exists).
  Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);

  Builder.SetInsertPoint(TryStoreBB);
  Value *StoreSuccess = TLI->emitStoreConditional(
      Builder, CI->getNewValOperand(), Addr, MemOpOrder);
  StoreSuccess = Builder.CreateICmpEQ(
      StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
  Builder.CreateCondBr(StoreSuccess, SuccessBB,
                       CI->isWeak() ? FailureBB : LoopBB);

  // Make sure later instructions don't get reordered with a fence if necessary.
  Builder.SetInsertPoint(SuccessBB);
  insertTrailingFence(Builder, SuccessOrder);
  Builder.CreateBr(ExitBB);

  Builder.SetInsertPoint(FailureBB);
  insertTrailingFence(Builder, FailureOrder);
  Builder.CreateBr(ExitBB);

  // Finally, we have control-flow based knowledge of whether the cmpxchg
  // succeeded or not. We expose this to later passes by converting any
  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.

  // Setup the builder so we can create any PHIs we need.
  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);

  // Look for any users of the cmpxchg that are just comparing the loaded value
  // against the desired one, and replace them with the CFG-derived version.
  SmallVector<ExtractValueInst *, 2> PrunedInsts;
  for (auto User : CI->users()) {
    ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
    if (!EV)
      continue;

    assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
           "weird extraction from { iN, i1 }");

    if (EV->getIndices()[0] == 0)
      EV->replaceAllUsesWith(Loaded);
    else
      EV->replaceAllUsesWith(Success);

    PrunedInsts.push_back(EV);
  }

  // We can remove the instructions now we're no longer iterating through them.
  for (auto EV : PrunedInsts)
    EV->eraseFromParent();

  if (!CI->use_empty()) {
    // Some use of the full struct return that we don't understand has happened,
    // so we've got to reconstruct it properly.
    Value *Res;
    Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
    Res = Builder.CreateInsertValue(Res, Success, 1);

    CI->replaceAllUsesWith(Res);
  }

  CI->eraseFromParent();
  return true;
}

AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
                                                       AtomicOrdering Ord) {
  if (!TLI->getInsertFencesForAtomic())
    return Ord;

  if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
    Builder.CreateFence(Release);

  // The exclusive operations don't need any barrier if we're adding separate
  // fences.
  return Monotonic;
}

void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder,
                                              AtomicOrdering Ord) {
  if (!TLI->getInsertFencesForAtomic())
    return;

  if (Ord == Acquire || Ord == AcquireRelease)
    Builder.CreateFence(Acquire);
  else if (Ord == SequentiallyConsistent)
    Builder.CreateFence(SequentiallyConsistent);
}