summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorJohn McCall <rjmccall@apple.com>2010-07-29 07:53:27 +0000
committerJohn McCall <rjmccall@apple.com>2010-07-29 07:53:27 +0000
commit3dd706b5288b83967968287c0950480948e8c3f6 (patch)
treed728285bb8fad9053a3786ec32e18e9c992d2bd0 /tools
parentdade28ee4eeaa9d22dac986666de4005e1309a06 (diff)
downloadllvm-3dd706b5288b83967968287c0950480948e8c3f6.tar.gz
llvm-3dd706b5288b83967968287c0950480948e8c3f6.tar.bz2
llvm-3dd706b5288b83967968287c0950480948e8c3f6.tar.xz
Add the llvm-diff tool, which performs a relatively naive structural
diff of a function. There's a lot of cruft in the current version, and it's pretty far from perfect, but it's usable. Currently only capable of comparing functions. Currently ignores metadata. Currently ignores most attributes of functions and instructions. Patches welcome. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109739 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools')
-rw-r--r--tools/llvm-diff/CMakeLists.txt6
-rw-r--r--tools/llvm-diff/DifferenceEngine.cpp600
-rw-r--r--tools/llvm-diff/DifferenceEngine.h171
-rw-r--r--tools/llvm-diff/Makefile17
-rw-r--r--tools/llvm-diff/llvm-diff.cpp301
5 files changed, 1095 insertions, 0 deletions
diff --git a/tools/llvm-diff/CMakeLists.txt b/tools/llvm-diff/CMakeLists.txt
new file mode 100644
index 0000000000..f6d65c947a
--- /dev/null
+++ b/tools/llvm-diff/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_LINK_COMPONENTS support asmparser bitreader)
+
+add_llvm_tool(llvm-diff
+ llvm-diff.cpp
+ DifferenceEngine.cpp
+ )
diff --git a/tools/llvm-diff/DifferenceEngine.cpp b/tools/llvm-diff/DifferenceEngine.cpp
new file mode 100644
index 0000000000..400ab9fcb5
--- /dev/null
+++ b/tools/llvm-diff/DifferenceEngine.cpp
@@ -0,0 +1,600 @@
+//===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the interface to the LLVM difference engine,
+// which structurally compares functions within a module.
+//
+//===----------------------------------------------------------------------===//
+
+#include <utility>
+
+#include <llvm/ADT/DenseMap.h>
+#include <llvm/ADT/DenseSet.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/StringRef.h>
+#include <llvm/ADT/StringSet.h>
+
+#include <llvm/Module.h>
+#include <llvm/Function.h>
+#include <llvm/Instructions.h>
+#include <llvm/Support/CFG.h>
+
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/Support/type_traits.h>
+#include <llvm/Support/ErrorHandling.h>
+#include <llvm/Support/CallSite.h>
+
+#include "DifferenceEngine.h"
+
+using namespace llvm;
+
+namespace {
+
+/// A priority queue, implemented as a heap.
+template <class T, class Sorter, unsigned InlineCapacity>
+class PriorityQueue {
+ Sorter Precedes;
+ llvm::SmallVector<T, InlineCapacity> Storage;
+
+public:
+ PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {}
+
+ /// Checks whether the heap is empty.
+ bool empty() const { return Storage.empty(); }
+
+ /// Insert a new value on the heap.
+ void insert(const T &V) {
+ unsigned Index = Storage.size();
+ Storage.push_back(V);
+ if (Index == 0) return;
+
+ T *data = Storage.data();
+ while (true) {
+ unsigned Target = (Index + 1) / 2 - 1;
+ if (!Precedes(data[Index], data[Target])) return;
+ std::swap(data[Index], data[Target]);
+ if (Target == 0) return;
+ Index = Target;
+ }
+ }
+
+ /// Remove the minimum value in the heap. Only valid on a non-empty heap.
+ T remove_min() {
+ assert(!empty());
+ T tmp = Storage[0];
+
+ unsigned NewSize = Storage.size() - 1;
+ if (NewSize) {
+ // Move the slot at the end to the beginning.
+ if (isPodLike<T>::value)
+ Storage[0] = Storage[NewSize];
+ else
+ std::swap(Storage[0], Storage[NewSize]);
+
+ // Bubble the root up as necessary.
+ unsigned Index = 0;
+ while (true) {
+ // With a 1-based index, the children would be Index*2 and Index*2+1.
+ unsigned R = (Index + 1) * 2;
+ unsigned L = R - 1;
+
+ // If R is out of bounds, we're done after this in any case.
+ if (R >= NewSize) {
+ // If L is also out of bounds, we're done immediately.
+ if (L >= NewSize) break;
+
+ // Otherwise, test whether we should swap L and Index.
+ if (Precedes(Storage[L], Storage[Index]))
+ std::swap(Storage[L], Storage[Index]);
+ break;
+ }
+
+ // Otherwise, we need to compare with the smaller of L and R.
+ // Prefer R because it's closer to the end of the array.
+ unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R);
+
+ // If Index is >= the min of L and R, then heap ordering is restored.
+ if (!Precedes(Storage[IndexToTest], Storage[Index]))
+ break;
+
+ // Otherwise, keep bubbling up.
+ std::swap(Storage[IndexToTest], Storage[Index]);
+ Index = IndexToTest;
+ }
+ }
+ Storage.pop_back();
+
+ return tmp;
+ }
+};
+
+/// A function-scope difference engine.
+class FunctionDifferenceEngine {
+ DifferenceEngine &Engine;
+
+ /// The current mapping from old local values to new local values.
+ DenseMap<Value*, Value*> Values;
+
+ /// The current mapping from old blocks to new blocks.
+ DenseMap<BasicBlock*, BasicBlock*> Blocks;
+
+ DenseSet<std::pair<Value*, Value*> > TentativeValuePairs;
+
+ unsigned getUnprocPredCount(BasicBlock *Block) const {
+ unsigned Count = 0;
+ for (pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E; ++I)
+ if (!Blocks.count(*I)) Count++;
+ return Count;
+ }
+
+ typedef std::pair<BasicBlock*, BasicBlock*> BlockPair;
+
+ /// A type which sorts a priority queue by the number of unprocessed
+ /// predecessor blocks it has remaining.
+ ///
+ /// This is actually really expensive to calculate.
+ struct QueueSorter {
+ const FunctionDifferenceEngine &fde;
+ explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {}
+
+ bool operator()(const BlockPair &Old, const BlockPair &New) {
+ return fde.getUnprocPredCount(Old.first)
+ < fde.getUnprocPredCount(New.first);
+ }
+ };
+
+ /// A queue of unified blocks to process.
+ PriorityQueue<BlockPair, QueueSorter, 20> Queue;
+
+ /// Try to unify the given two blocks. Enqueues them for processing
+ /// if they haven't already been processed.
+ ///
+ /// Returns true if there was a problem unifying them.
+ bool tryUnify(BasicBlock *L, BasicBlock *R) {
+ BasicBlock *&Ref = Blocks[L];
+
+ if (Ref) {
+ if (Ref == R) return false;
+
+ Engine.logf("successor %s cannot be equivalent to %s; "
+ "it's already equivalent to %s")
+ << L << R << Ref;
+ return true;
+ }
+
+ Ref = R;
+ Queue.insert(BlockPair(L, R));
+ return false;
+ }
+
+ void processQueue() {
+ while (!Queue.empty()) {
+ BlockPair Pair = Queue.remove_min();
+ diff(Pair.first, Pair.second);
+ }
+ }
+
+ void diff(BasicBlock *L, BasicBlock *R) {
+ DifferenceEngine::Context C(Engine, L, R);
+
+ BasicBlock::iterator LI = L->begin(), LE = L->end();
+ BasicBlock::iterator RI = R->begin(), RE = R->end();
+
+ do {
+ assert(LI != LE && RI != RE);
+ Instruction *LeftI = &*LI, *RightI = &*RI;
+
+ // If the instructions differ, start the more sophisticated diff
+ // algorithm here.
+ if (diff(LeftI, RightI, false, true))
+ return runBlockDiff(LI, RI);
+
+ // Otherwise, unify them.
+ if (!LeftI->use_empty())
+ Values[LeftI] = RightI;
+
+ ++LI, ++RI;
+ } while (LI != LE); // This is sufficient: we can't get equality of
+ // terminators if there are residual instructions.
+ }
+
+ bool matchForBlockDiff(Instruction *L, Instruction *R);
+ void runBlockDiff(BasicBlock::iterator LI, BasicBlock::iterator RI);
+
+ bool diffCallSites(CallSite L, CallSite R, bool Complain) {
+ // FIXME: call attributes
+ if (!equivalentAsOperands(L.getCalledValue(), R.getCalledValue())) {
+ if (Complain) Engine.log("called functions differ");
+ return true;
+ }
+ if (L.arg_size() != R.arg_size()) {
+ if (Complain) Engine.log("argument counts differ");
+ return true;
+ }
+ for (unsigned I = 0, E = L.arg_size(); I != E; ++I)
+ if (!equivalentAsOperands(L.getArgument(I), R.getArgument(I))) {
+ if (Complain)
+ Engine.logf("arguments %s and %s differ")
+ << L.getArgument(I) << R.getArgument(I);
+ return true;
+ }
+ return false;
+ }
+
+ bool diff(Instruction *L, Instruction *R, bool Complain, bool TryUnify) {
+ // FIXME: metadata (if Complain is set)
+
+ // Different opcodes always imply different operations.
+ if (L->getOpcode() != R->getOpcode()) {
+ if (Complain) Engine.log("different instruction types");
+ return true;
+ }
+
+ if (isa<CmpInst>(L)) {
+ if (cast<CmpInst>(L)->getPredicate()
+ != cast<CmpInst>(R)->getPredicate()) {
+ if (Complain) Engine.log("different predicates");
+ return true;
+ }
+ } else if (isa<CallInst>(L)) {
+ return diffCallSites(CallSite(L), CallSite(R), Complain);
+ } else if (isa<PHINode>(L)) {
+ // FIXME: implement.
+
+ // This is really wierd; type uniquing is broken?
+ if (L->getType() != R->getType()) {
+ if (!L->getType()->isPointerTy() || !R->getType()->isPointerTy()) {
+ if (Complain) Engine.log("different phi types");
+ return true;
+ }
+ }
+ return false;
+
+ // Terminators.
+ } else if (isa<InvokeInst>(L)) {
+ InvokeInst *LI = cast<InvokeInst>(L);
+ InvokeInst *RI = cast<InvokeInst>(R);
+ if (diffCallSites(CallSite(LI), CallSite(RI), Complain))
+ return true;
+
+ if (TryUnify) {
+ tryUnify(LI->getNormalDest(), RI->getNormalDest());
+ tryUnify(LI->getUnwindDest(), RI->getUnwindDest());
+ }
+ return false;
+
+ } else if (isa<BranchInst>(L)) {
+ BranchInst *LI = cast<BranchInst>(L);
+ BranchInst *RI = cast<BranchInst>(R);
+ if (LI->isConditional() != RI->isConditional()) {
+ if (Complain) Engine.log("branch conditionality differs");
+ return true;
+ }
+
+ if (LI->isConditional()) {
+ if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
+ if (Complain) Engine.log("branch conditions differ");
+ return true;
+ }
+ if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1));
+ }
+ if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0));
+ return false;
+
+ } else if (isa<SwitchInst>(L)) {
+ SwitchInst *LI = cast<SwitchInst>(L);
+ SwitchInst *RI = cast<SwitchInst>(R);
+ if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
+ if (Complain) Engine.log("switch conditions differ");
+ return true;
+ }
+ if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest());
+
+ bool Difference = false;
+
+ DenseMap<ConstantInt*,BasicBlock*> LCases;
+ for (unsigned I = 1, E = LI->getNumCases(); I != E; ++I)
+ LCases[LI->getCaseValue(I)] = LI->getSuccessor(I);
+ for (unsigned I = 1, E = RI->getNumCases(); I != E; ++I) {
+ ConstantInt *CaseValue = RI->getCaseValue(I);
+ BasicBlock *LCase = LCases[CaseValue];
+ if (LCase) {
+ if (TryUnify) tryUnify(LCase, RI->getSuccessor(I));
+ LCases.erase(CaseValue);
+ } else if (!Difference) {
+ if (Complain)
+ Engine.logf("right switch has extra case %s") << CaseValue;
+ Difference = true;
+ }
+ }
+ if (!Difference)
+ for (DenseMap<ConstantInt*,BasicBlock*>::iterator
+ I = LCases.begin(), E = LCases.end(); I != E; ++I) {
+ if (Complain)
+ Engine.logf("left switch has extra case %s") << I->first;
+ Difference = true;
+ }
+ return Difference;
+ } else if (isa<UnreachableInst>(L)) {
+ return false;
+ }
+
+ if (L->getNumOperands() != R->getNumOperands()) {
+ if (Complain) Engine.log("instructions have different operand counts");
+ return true;
+ }
+
+ for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
+ Value *LO = L->getOperand(I), *RO = R->getOperand(I);
+ if (!equivalentAsOperands(LO, RO)) {
+ if (Complain) Engine.logf("operands %s and %s differ") << LO << RO;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ bool equivalentAsOperands(Constant *L, Constant *R) {
+ // Use equality as a preliminary filter.
+ if (L == R)
+ return true;
+
+ if (L->getValueID() != R->getValueID())
+ return false;
+
+ // Ask the engine about global values.
+ if (isa<GlobalValue>(L))
+ return Engine.equivalentAsOperands(cast<GlobalValue>(L),
+ cast<GlobalValue>(R));
+
+ // Compare constant expressions structurally.
+ if (isa<ConstantExpr>(L))
+ return equivalentAsOperands(cast<ConstantExpr>(L),
+ cast<ConstantExpr>(R));
+
+ // Nulls of the "same type" don't always actually have the same
+ // type; I don't know why. Just white-list them.
+ if (isa<ConstantPointerNull>(L))
+ return true;
+
+ // Block addresses only match if we've already encountered the
+ // block. FIXME: tentative matches?
+ if (isa<BlockAddress>(L))
+ return Blocks[cast<BlockAddress>(L)->getBasicBlock()]
+ == cast<BlockAddress>(R)->getBasicBlock();
+
+ return false;
+ }
+
+ bool equivalentAsOperands(ConstantExpr *L, ConstantExpr *R) {
+ if (L == R)
+ return true;
+ if (L->getOpcode() != R->getOpcode())
+ return false;
+
+ switch (L->getOpcode()) {
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ if (L->getPredicate() != R->getPredicate())
+ return false;
+ break;
+
+ case Instruction::GetElementPtr:
+ // FIXME: inbounds?
+ break;
+
+ default:
+ break;
+ }
+
+ if (L->getNumOperands() != R->getNumOperands())
+ return false;
+
+ for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I)
+ if (!equivalentAsOperands(L->getOperand(I), R->getOperand(I)))
+ return false;
+
+ return true;
+ }
+
+ bool equivalentAsOperands(Value *L, Value *R) {
+ // Fall out if the values have different kind.
+ // This possibly shouldn't take priority over oracles.
+ if (L->getValueID() != R->getValueID())
+ return false;
+
+ // Value subtypes: Argument, Constant, Instruction, BasicBlock,
+ // InlineAsm, MDNode, MDString, PseudoSourceValue
+
+ if (isa<Constant>(L))
+ return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R));
+
+ if (isa<Instruction>(L))
+ return Values[L] == R || TentativeValuePairs.count(std::make_pair(L, R));
+
+ if (isa<Argument>(L))
+ return Values[L] == R;
+
+ if (isa<BasicBlock>(L))
+ return Blocks[cast<BasicBlock>(L)] != R;
+
+ // Pretend everything else is identical.
+ return true;
+ }
+
+ // Avoid a gcc warning about accessing 'this' in an initializer.
+ FunctionDifferenceEngine *this_() { return this; }
+
+public:
+ FunctionDifferenceEngine(DifferenceEngine &Engine) :
+ Engine(Engine), Queue(QueueSorter(*this_())) {}
+
+ void diff(Function *L, Function *R) {
+ if (L->arg_size() != R->arg_size())
+ Engine.log("different argument counts");
+
+ // Map the arguments.
+ for (Function::arg_iterator
+ LI = L->arg_begin(), LE = L->arg_end(),
+ RI = R->arg_begin(), RE = R->arg_end();
+ LI != LE && RI != RE; ++LI, ++RI)
+ Values[&*LI] = &*RI;
+
+ tryUnify(&*L->begin(), &*R->begin());
+ processQueue();
+ }
+};
+
+struct DiffEntry {
+ DiffEntry() : Cost(0) {}
+
+ unsigned Cost;
+ llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange
+};
+
+bool FunctionDifferenceEngine::matchForBlockDiff(Instruction *L,
+ Instruction *R) {
+ return !diff(L, R, false, false);
+}
+
+void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart,
+ BasicBlock::iterator RStart) {
+ BasicBlock::iterator LE = LStart->getParent()->end();
+ BasicBlock::iterator RE = RStart->getParent()->end();
+
+ unsigned NL = std::distance(LStart, LE);
+
+ SmallVector<DiffEntry, 20> Paths1(NL+1);
+ SmallVector<DiffEntry, 20> Paths2(NL+1);
+
+ DiffEntry *Cur = Paths1.data();
+ DiffEntry *Next = Paths2.data();
+
+ assert(TentativeValuePairs.empty());
+
+ // Initialize the first column.
+ for (unsigned I = 0; I != NL+1; ++I) {
+ Cur[I].Cost = I;
+ for (unsigned J = 0; J != I; ++J)
+ Cur[I].Path.push_back(DifferenceEngine::DC_left);
+ }
+
+ for (BasicBlock::iterator RI = RStart; RI != RE; ++RI) {
+ // Initialize the first row.
+ Next[0] = Cur[0];
+ Next[0].Path.push_back(DifferenceEngine::DC_right);
+
+ unsigned Index = 1;
+ for (BasicBlock::iterator LI = LStart; LI != LE; ++LI, ++Index) {
+ if (matchForBlockDiff(&*LI, &*RI)) {
+ Next[Index] = Cur[Index-1];
+ Next[Index].Path.push_back(DifferenceEngine::DC_match);
+ TentativeValuePairs.insert(std::make_pair(&*LI, &*RI));
+ } else if (Next[Index-1].Cost <= Cur[Index].Cost) {
+ Next[Index] = Next[Index-1];
+ Next[Index].Path.push_back(DifferenceEngine::DC_left);
+ } else {
+ Next[Index] = Cur[Index];
+ Next[Index].Path.push_back(DifferenceEngine::DC_right);
+ }
+ }
+
+ std::swap(Cur, Next);
+ }
+
+ SmallVectorImpl<char> &Path = Cur[NL].Path;
+ BasicBlock::iterator LI = LStart, RI = RStart;
+
+ DifferenceEngine::DiffLogBuilder Diff(Engine);
+
+ // Drop trailing matches.
+ while (Path.back() == DifferenceEngine::DC_match)
+ Path.pop_back();
+
+ for (SmallVectorImpl<char>::iterator
+ PI = Path.begin(), PE = Path.end(); PI != PE; ++PI) {
+ switch (static_cast<DifferenceEngine::DiffChange>(*PI)) {
+ case DifferenceEngine::DC_match:
+ assert(LI != LE && RI != RE);
+ {
+ Instruction *L = &*LI, *R = &*RI;
+ DifferenceEngine::Context C(Engine, L, R);
+ diff(L, R, false, true);
+ Diff.addMatch(L, R);
+ }
+ ++LI; ++RI;
+ break;
+
+ case DifferenceEngine::DC_left:
+ assert(LI != LE);
+ Diff.addLeft(&*LI);
+ ++LI;
+ break;
+
+ case DifferenceEngine::DC_right:
+ assert(RI != RE);
+ Diff.addRight(&*RI);
+ ++RI;
+ break;
+ }
+ }
+
+ TentativeValuePairs.clear();
+}
+
+}
+
+void DifferenceEngine::diff(Function *L, Function *R) {
+ Context C(*this, L, R);
+
+ // FIXME: types
+ // FIXME: attributes and CC
+ // FIXME: parameter attributes
+
+ // If both are declarations, we're done.
+ if (L->empty() && R->empty())
+ return;
+ else if (L->empty())
+ log("left function is declaration, right function is definition");
+ else if (R->empty())
+ log("right function is declaration, left function is definition");
+ else
+ FunctionDifferenceEngine(*this).diff(L, R);
+}
+
+void DifferenceEngine::diff(Module *L, Module *R) {
+ StringSet<> LNames;
+ SmallVector<std::pair<Function*,Function*>, 20> Queue;
+
+ for (Module::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ Function *LFn = &*I;
+ LNames.insert(LFn->getName());
+
+ if (Function *RFn = R->getFunction(LFn->getName()))
+ Queue.push_back(std::make_pair(LFn, RFn));
+ else
+ logf("function %s exists only in left module") << LFn;
+ }
+
+ for (Module::iterator I = R->begin(), E = R->end(); I != E; ++I) {
+ Function *RFn = &*I;
+ if (!LNames.count(RFn->getName()))
+ logf("function %s exists only in right module") << RFn;
+ }
+
+ for (SmallVectorImpl<std::pair<Function*,Function*> >::iterator
+ I = Queue.begin(), E = Queue.end(); I != E; ++I)
+ diff(I->first, I->second);
+}
+
+bool DifferenceEngine::equivalentAsOperands(GlobalValue *L, GlobalValue *R) {
+ if (globalValueOracle) return (*globalValueOracle)(L, R);
+ return L->getName() == R->getName();
+}
diff --git a/tools/llvm-diff/DifferenceEngine.h b/tools/llvm-diff/DifferenceEngine.h
new file mode 100644
index 0000000000..9d0ebecc53
--- /dev/null
+++ b/tools/llvm-diff/DifferenceEngine.h
@@ -0,0 +1,171 @@
+//===-- DifferenceEngine.h - Module comparator ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the interface to the LLVM difference engine,
+// which structurally compares functions within a module.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LLVM_DIFFERENCE_ENGINE_H_
+#define _LLVM_DIFFERENCE_ENGINE_H_
+
+#include <utility>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/StringRef.h>
+
+namespace llvm {
+ class LLVMContext;
+ class Module;
+ class Function;
+ class Twine;
+ class Value;
+ class GlobalValue;
+
+ /// A class for performing structural comparisons of LLVM assembly.
+ class DifferenceEngine {
+ public:
+ /// A temporary-object class for building up log messages.
+ class LogBuilder {
+ DifferenceEngine &Engine;
+
+ /// The use of a stored StringRef here is okay because
+ /// LogBuilder should be used only as a temporary, and as a
+ /// temporary it will be destructed before whatever temporary
+ /// might be initializing this format.
+ StringRef Format;
+
+ SmallVector<Value*, 4> Arguments;
+
+ public:
+ LogBuilder(DifferenceEngine &Engine, StringRef Format)
+ : Engine(Engine), Format(Format) {}
+
+ LogBuilder &operator<<(Value *V) {
+ Arguments.push_back(V);
+ return *this;
+ }
+
+ ~LogBuilder() {
+ Engine.consumer.logf(*this);
+ }
+
+ StringRef getFormat() const { return Format; }
+
+ unsigned getNumArguments() const { return Arguments.size(); }
+ Value *getArgument(unsigned I) const { return Arguments[I]; }
+ };
+
+ enum DiffChange { DC_match, DC_left, DC_right };
+
+ /// A temporary-object class for building up diff messages.
+ class DiffLogBuilder {
+ typedef std::pair<Instruction*,Instruction*> DiffRecord;
+ SmallVector<DiffRecord, 20> Diff;
+
+ DifferenceEngine &Engine;
+
+ public:
+ DiffLogBuilder(DifferenceEngine &Engine) : Engine(Engine) {}
+ ~DiffLogBuilder() { Engine.consumer.logd(*this); }
+
+ void addMatch(Instruction *L, Instruction *R) {
+ Diff.push_back(DiffRecord(L, R));
+ }
+ void addLeft(Instruction *L) { Diff.push_back(DiffRecord(L, 0)); }
+ void addRight(Instruction *R) { Diff.push_back(DiffRecord(0, R)); }
+
+ unsigned getNumLines() const { return Diff.size(); }
+ DiffChange getLineKind(unsigned I) const {
+ return (Diff[I].first ? (Diff[I].second ? DC_match : DC_left)
+ : DC_right);
+ }
+ Instruction *getLeft(unsigned I) const { return Diff[I].first; }
+ Instruction *getRight(unsigned I) const { return Diff[I].second; }
+ };
+
+ /// The interface for consumers of difference data.
+ struct Consumer {
+ /// Record that a local context has been entered. Left and
+ /// Right are IR "containers" of some sort which are being
+ /// considered for structural equivalence: global variables,
+ /// functions, blocks, instructions, etc.
+ virtual void enterContext(Value *Left, Value *Right) = 0;
+
+ /// Record that a local context has been exited.
+ virtual void exitContext() = 0;
+
+ /// Record a difference within the current context.
+ virtual void log(StringRef Text) = 0;
+
+ /// Record a formatted difference within the current context.
+ virtual void logf(const LogBuilder &Log) = 0;
+
+ /// Record a line-by-line instruction diff.
+ virtual void logd(const DiffLogBuilder &Log) = 0;
+
+ protected:
+ ~Consumer() {}
+ };
+
+ /// A RAII object for recording the current context.
+ struct Context {
+ Context(DifferenceEngine &Engine, Value *L, Value *R) : Engine(Engine) {
+ Engine.consumer.enterContext(L, R);
+ }
+
+ ~Context() {
+ Engine.consumer.exitContext();
+ }
+
+ private:
+ DifferenceEngine &Engine;
+ };
+
+ /// An oracle for answering whether two values are equivalent as
+ /// operands.
+ struct Oracle {
+ virtual bool operator()(Value *L, Value *R) = 0;
+
+ protected:
+ ~Oracle() {}
+ };
+
+ DifferenceEngine(LLVMContext &context, Consumer &consumer)
+ : context(context), consumer(consumer), globalValueOracle(0) {}
+
+ void diff(Module *L, Module *R);
+ void diff(Function *L, Function *R);
+
+ void log(StringRef text) {
+ consumer.log(text);
+ }
+
+ LogBuilder logf(StringRef text) {
+ return LogBuilder(*this, text);
+ }
+
+ /// Installs an oracle to decide whether two global values are
+ /// equivalent as operands. Without an oracle, global values are
+ /// considered equivalent as operands precisely when they have the
+ /// same name.
+ void setGlobalValueOracle(Oracle *oracle) {
+ globalValueOracle = oracle;
+ }
+
+ /// Determines whether two global values are equivalent.
+ bool equivalentAsOperands(GlobalValue *L, GlobalValue *R);
+
+ private:
+ LLVMContext &context;
+ Consumer &consumer;
+ Oracle *globalValueOracle;
+ };
+}
+
+#endif
diff --git a/tools/llvm-diff/Makefile b/tools/llvm-diff/Makefile
new file mode 100644
index 0000000000..58e49fa959
--- /dev/null
+++ b/tools/llvm-diff/Makefile
@@ -0,0 +1,17 @@
+##===- tools/llvm-diff/Makefile ----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = llvm-diff
+LINK_COMPONENTS := asmparser bitreader
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-diff/llvm-diff.cpp b/tools/llvm-diff/llvm-diff.cpp
new file mode 100644
index 0000000000..7c2a5d66c3
--- /dev/null
+++ b/tools/llvm-diff/llvm-diff.cpp
@@ -0,0 +1,301 @@
+#include <string>
+#include <utility>
+
+#include <llvm/ADT/StringRef.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/DenseMap.h>
+
+// Required to parse .ll files.
+#include <llvm/Support/SourceMgr.h>
+#include <llvm/Assembly/Parser.h>
+
+// Required to parse .bc files.
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/LLVMContext.h>
+#include <llvm/Module.h>
+#include <llvm/Type.h>
+#include <llvm/Instructions.h>
+
+#include "DifferenceEngine.h"
+
+using namespace llvm;
+
+/// Reads a module from a file. If the filename ends in .ll, it is
+/// interpreted as an assembly file; otherwise, it is interpreted as
+/// bitcode. On error, messages are written to stderr and null is
+/// returned.
+static Module *ReadModule(LLVMContext &Context, StringRef Name) {
+ // LLVM assembly path.
+ if (Name.endswith(".ll")) {
+ SMDiagnostic Diag;
+ Module *M = ParseAssemblyFile(Name, Diag, Context);
+ if (M) return M;
+
+ Diag.Print("llvmdiff", errs());
+ return 0;
+ }
+
+ // Bitcode path.
+ MemoryBuffer *Buffer = MemoryBuffer::getFile(Name);
+
+ // ParseBitcodeFile takes ownership of the buffer if it succeeds.
+ std::string Error;
+ Module *M = ParseBitcodeFile(Buffer, Context, &Error);
+ if (M) return M;
+
+ errs() << "error parsing " << Name << ": " << Error;
+ delete Buffer;
+ return 0;
+}
+
+static int usage() {
+ errs() << "expected usage:\n";
+ errs() << " llvm-diff oldmodule.ll newmodule.ll [function list]\n";
+ errs() << "Assembly or bitcode modules may be used interchangeably.\n";
+ errs() << "If no functions are provided, all functions will be compared.\n";
+ return 1;
+}
+
+namespace {
+struct DiffContext {
+ DiffContext(Value *L, Value *R)
+ : L(L), R(R), Differences(false), IsFunction(isa<Function>(L)) {}
+ Value *L;
+ Value *R;
+ bool Differences;
+ bool IsFunction;
+ DenseMap<Value*,unsigned> LNumbering;
+ DenseMap<Value*,unsigned> RNumbering;
+};
+
+void ComputeNumbering(Function *F, DenseMap<Value*,unsigned> &Numbering) {
+ unsigned BBN = 0;
+ unsigned IN = 0;
+
+ // Arguments get the first numbers.
+ for (Function::arg_iterator
+ AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI)
+ if (!AI->hasName())
+ Numbering[&*AI] = IN++;
+
+ // Walk the basic blocks in order.
+ for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) {
+ // Basic blocks have their own 'namespace'.
+ if (!FI->hasName())
+ Numbering[&*FI] = BBN++;
+
+ // Walk the instructions in order.
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI)
+ // void instructions don't get numbers.
+ if (!BI->hasName() && !BI->getType()->isVoidTy())
+ Numbering[&*BI] = IN++;
+ }
+
+ assert(!Numbering.empty() && "asked for numbering but numbering was no-op");
+}
+
+class DiffConsumer : public DifferenceEngine::Consumer {
+private:
+ Module *LModule;
+ Module *RModule;
+ SmallVector<DiffContext, 5> contexts;
+ bool Differences;
+ unsigned Indent;
+
+ void printValue(Value *V, bool isL) {
+ if (V->hasName()) {
+ errs() << (isa<GlobalValue>(V) ? '@' : '%') << V->getName();
+ return;
+ }
+ if (V->getType()->isVoidTy()) {
+ if (isa<StoreInst>(V)) {
+ errs() << "store to ";
+ printValue(cast<StoreInst>(V)->getPointerOperand(), isL);
+ } else if (isa<CallInst>(V)) {
+ errs() << "call to ";
+ printValue(cast<CallInst>(V)->getCalledValue(), isL);
+ } else if (isa<InvokeInst>(V)) {
+ errs() << "invoke to ";
+ printValue(cast<InvokeInst>(V)->getCalledValue(), isL);
+ } else {
+ errs() << *V;
+ }
+ return;
+ }
+
+ unsigned N = contexts.size();
+ while (N > 0) {
+ --N;
+ DiffContext &ctxt = contexts[N];
+ if (!ctxt.IsFunction) continue;
+ if (isL) {
+ if (ctxt.LNumbering.empty())
+ ComputeNumbering(cast<Function>(ctxt.L), ctxt.LNumbering);
+ errs() << '%' << ctxt.LNumbering[V];
+ return;
+ } else {
+ if (ctxt.RNumbering.empty())
+ ComputeNumbering(cast<Function>(ctxt.R), ctxt.RNumbering);
+ errs() << '%' << ctxt.RNumbering[V];
+ return;
+ }
+ }
+
+ errs() << "<anonymous>";
+ }
+
+ void header() {
+ if (contexts.empty()) return;
+ for (SmallVectorImpl<DiffContext>::iterator
+ I = contexts.begin(), E = contexts.end(); I != E; ++I) {
+ if (I->Differences) continue;
+ if (isa<Function>(I->L)) {
+ // Extra newline between functions.
+ if (Differences) errs() << "\n";
+
+ Function *L = cast<Function>(I->L);
+ Function *R = cast<Function>(I->R);
+ if (L->getName() != R->getName())
+ errs() << "in function " << L->getName() << " / " << R->getName() << ":\n";
+ else
+ errs() << "in function " << L->getName() << ":\n";
+ } else if (isa<BasicBlock>(I->L)) {
+ BasicBlock *L = cast<BasicBlock>(I->L);
+ BasicBlock *R = cast<BasicBlock>(I->R);
+ errs() << " in block ";
+ printValue(L, true);
+ errs() << " / ";
+ printValue(R, false);
+ errs() << ":\n";
+ } else if (isa<Instruction>(I->L)) {
+ errs() << " in instruction ";
+ printValue(I->L, true);
+ errs() << " / ";
+ printValue(I->R, false);
+ errs() << ":\n";
+ }
+
+ I->Differences = true;
+ }
+ }
+
+ void indent() {
+ unsigned N = Indent;
+ while (N--) errs() << ' ';
+ }
+
+public:
+ DiffConsumer(Module *L, Module *R)
+ : LModule(L), RModule(R), Differences(false), Indent(0) {}
+
+ bool hadDifferences() const { return Differences; }
+
+ void enterContext(Value *L, Value *R) {
+ contexts.push_back(DiffContext(L, R));
+ Indent += 2;
+ }
+ void exitContext() {
+ Differences |= contexts.back().Differences;
+ contexts.pop_back();
+ Indent -= 2;
+ }
+
+ void log(StringRef text) {
+ header();
+ indent();
+ errs() << text << "\n";
+ }
+
+ void logf(const DifferenceEngine::LogBuilder &Log) {
+ header();
+ indent();
+
+ // FIXME: we don't know whether these are l-values or r-values (ha!)
+ // Print them in some saner way!
+ errs() << Log.getFormat() << "\n";
+ for (unsigned I = 0, E = Log.getNumArguments(); I != E; ++I)
+ Log.getArgument(I)->dump();
+ }
+
+ void logd(const DifferenceEngine::DiffLogBuilder &Log) {
+ header();
+
+ for (unsigned I = 0, E = Log.getNumLines(); I != E; ++I) {
+ indent();
+ switch (Log.getLineKind(I)) {
+ case DifferenceEngine::DC_match:
+ errs() << " ";
+ Log.getLeft(I)->dump();
+ //printValue(Log.getLeft(I), true);
+ break;
+ case DifferenceEngine::DC_left:
+ errs() << "< ";
+ Log.getLeft(I)->dump();
+ //printValue(Log.getLeft(I), true);
+ break;
+ case DifferenceEngine::DC_right:
+ errs() << "> ";
+ Log.getRight(I)->dump();
+ //printValue(Log.getRight(I), false);
+ break;
+ }
+ //errs() << "\n";
+ }
+ }
+
+};
+}
+
+int main(int argc, const char **argv) {
+ if (argc < 3) return usage();
+
+ // Don't make StringRef locals like this at home.
+ StringRef LModuleFile = argv[1];
+ StringRef RModuleFile = argv[2];
+
+ LLVMContext Context;
+
+ // Load both modules. Die if that fails.
+ Module *LModule = ReadModule(Context, LModuleFile);
+ Module *RModule = ReadModule(Context, RModuleFile);
+ if (!LModule || !RModule) return 1;
+
+ DiffConsumer Consumer(LModule, RModule);
+ DifferenceEngine Engine(Context, Consumer);
+
+ // If any function names were given, just diff those.
+ const char **FnNames = argv + 3;
+ unsigned NumFnNames = argc - 3;
+ if (NumFnNames) {
+ for (unsigned I = 0; I != NumFnNames; ++I) {
+ StringRef FnName = FnNames[I];
+
+ // Drop leading sigils from the function name.
+ if (FnName.startswith("@")) FnName = FnName.substr(1);
+
+ Function *LFn = LModule->getFunction(FnName);
+ Function *RFn = RModule->getFunction(FnName);
+ if (LFn && RFn)
+ Engine.diff(LFn, RFn);
+ else {
+ if (!LFn && !RFn)
+ errs() << "No function named @" << FnName << " in either module\n";
+ else if (!LFn)
+ errs() << "No function named @" << FnName << " in left module\n";
+ else
+ errs() << "No function named @" << FnName << " in right module\n";
+ }
+ }
+ } else {
+ // Otherwise, diff all functions in the modules.
+ Engine.diff(LModule, RModule);
+ }
+
+ delete LModule;
+ delete RModule;
+
+ return Consumer.hadDifferences();
+}