diff options
author | Michael Liao <michael.liao@intel.com> | 2012-09-26 08:26:25 +0000 |
---|---|---|
committer | Michael Liao <michael.liao@intel.com> | 2012-09-26 08:26:25 +0000 |
commit | 4fa2ddbb94bb7b7f67e4f4d0aac292998fbf00ed (patch) | |
tree | 02f208c5c9f1e8d1620137e6ea020395e7006348 | |
parent | 6bcdb5b9034a02901175e94e89b4815f28f2f141 (diff) | |
download | llvm-4fa2ddbb94bb7b7f67e4f4d0aac292998fbf00ed.tar.gz llvm-4fa2ddbb94bb7b7f67e4f4d0aac292998fbf00ed.tar.bz2 llvm-4fa2ddbb94bb7b7f67e4f4d0aac292998fbf00ed.tar.xz |
Add SARX/SHRX/SHLX code generation support
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164675 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrShiftRotate.td | 55 | ||||
-rw-r--r-- | test/CodeGen/X86/phys_subreg_coalesce-3.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/shift-bmi2.ll | 178 | ||||
-rw-r--r-- | test/CodeGen/X86/targetLoweringGeneric.ll | 2 |
5 files changed, 241 insertions, 2 deletions
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 2fb2ed2104..95d2ad4a24 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -565,6 +565,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) // BMI/BMI2 foldable instructions { X86::RORX32ri, X86::RORX32mi, 0 }, { X86::RORX64ri, X86::RORX64mi, 0 }, + { X86::SARX32rr, X86::SARX32rm, 0 }, + { X86::SARX64rr, X86::SARX64rm, 0 }, + { X86::SHRX32rr, X86::SHRX32rm, 0 }, + { X86::SHRX64rr, X86::SHRX64rm, 0 }, + { X86::SHLX32rr, X86::SHLX32rm, 0 }, + { X86::SHLX64rr, X86::SHLX64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index fe7d0ecf89..893488c159 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -896,4 +896,59 @@ let Predicates = [HasBMI2] in { (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>; def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)), (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>; + + // Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not + // immedidate shift, i.e. the following code is considered better + // + // mov %edi, %esi + // shl $imm, %esi + // ... %edi, ... + // + // than + // + // movb $imm, %sil + // shlx %sil, %edi, %esi + // ... %edi, ... + // + let AddedComplexity = 1 in { + def : Pat<(sra GR32:$src1, GR8:$src2), + (SARX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(sra GR64:$src1, GR8:$src2), + (SARX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + + def : Pat<(srl GR32:$src1, GR8:$src2), + (SHRX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(srl GR64:$src1, GR8:$src2), + (SHRX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + + def : Pat<(shl GR32:$src1, GR8:$src2), + (SHLX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(shl GR64:$src1, GR8:$src2), + (SHLX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + } + + // Patterns on SARXrm/SHRXrm/SHLXrm are explicitly omitted to favor + // + // mov (%ecx), %esi + // shl $imm, $esi + // + // over + // + // movb $imm %al + // shlx %al, (%ecx), %esi + // + // As SARXrr/SHRXrr/SHLXrr is favored on variable shift, the peephole + // optimization will fold them into SARXrm/SHRXrm/SHLXrm if possible. } diff --git a/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/test/CodeGen/X86/phys_subreg_coalesce-3.ll index 51320dd6d0..2a20e7ad6f 100644 --- a/test/CodeGen/X86/phys_subreg_coalesce-3.ll +++ b/test/CodeGen/X86/phys_subreg_coalesce-3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 | FileCheck %s ; rdar://5571034 ; This requires physreg joining, %vreg13 is live everywhere: diff --git a/test/CodeGen/X86/shift-bmi2.ll b/test/CodeGen/X86/shift-bmi2.ll new file mode 100644 index 0000000000..d1f321f177 --- /dev/null +++ b/test/CodeGen/X86/shift-bmi2.ll @@ -0,0 +1,178 @@ +; RUN: llc -mtriple=i386-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI2 %s +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI264 %s + +define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone { +entry: + %shl = shl i32 %x, %shamt +; BMI2: shl32 +; BMI2: shlxl +; BMI2: ret +; BMI264: shl32 +; BMI264: shlxl +; BMI264: ret + ret i32 %shl +} + +define i32 @shl32i(i32 %x) nounwind uwtable readnone { +entry: + %shl = shl i32 %x, 5 +; BMI2: shl32i +; BMI2-NOT: shlxl +; BMI2: ret +; BMI264: shl32i +; BMI264-NOT: shlxl +; BMI264: ret + ret i32 %shl +} + +define i32 @shl32p(i32* %p, i32 %shamt) nounwind uwtable readnone { +entry: + %x = load i32* %p + %shl = shl i32 %x, %shamt +; BMI2: shl32p +; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI2: ret +; BMI264: shl32p +; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: ret + ret i32 %shl +} + +define i32 @shl32pi(i32* %p) nounwind uwtable readnone { +entry: + %x = load i32* %p + %shl = shl i32 %x, 5 +; BMI2: shl32pi +; BMI2-NOT: shlxl +; BMI2: ret +; BMI264: shl32pi +; BMI264-NOT: shlxl +; BMI264: ret + ret i32 %shl +} + +define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone { +entry: + %shl = shl i64 %x, %shamt +; BMI264: shl64 +; BMI264: shlxq +; BMI264: ret + ret i64 %shl +} + +define i64 @shl64i(i64 %x) nounwind uwtable readnone { +entry: + %shl = shl i64 %x, 7 +; BMI264: shl64i +; BMI264-NOT: shlxq +; BMI264: ret + ret i64 %shl +} + +define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +entry: + %x = load i64* %p + %shl = shl i64 %x, %shamt +; BMI264: shl64p +; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: ret + ret i64 %shl +} + +define i64 @shl64pi(i64* %p) nounwind uwtable readnone { +entry: + %x = load i64* %p + %shl = shl i64 %x, 7 +; BMI264: shl64p +; BMI264-NOT: shlxq +; BMI264: ret + ret i64 %shl +} + +define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone { +entry: + %shl = lshr i32 %x, %shamt +; BMI2: lshr32 +; BMI2: shrxl +; BMI2: ret +; BMI264: lshr32 +; BMI264: shrxl +; BMI264: ret + ret i32 %shl +} + +define i32 @lshr32p(i32* %p, i32 %shamt) nounwind uwtable readnone { +entry: + %x = load i32* %p + %shl = lshr i32 %x, %shamt +; BMI2: lshr32p +; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI2: ret +; BMI264: lshr32 +; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: ret + ret i32 %shl +} + +define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone { +entry: + %shl = lshr i64 %x, %shamt +; BMI264: lshr64 +; BMI264: shrxq +; BMI264: ret + ret i64 %shl +} + +define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +entry: + %x = load i64* %p + %shl = lshr i64 %x, %shamt +; BMI264: lshr64p +; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: ret + ret i64 %shl +} + +define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone { +entry: + %shl = ashr i32 %x, %shamt +; BMI2: ashr32 +; BMI2: sarxl +; BMI2: ret +; BMI264: ashr32 +; BMI264: sarxl +; BMI264: ret + ret i32 %shl +} + +define i32 @ashr32p(i32* %p, i32 %shamt) nounwind uwtable readnone { +entry: + %x = load i32* %p + %shl = ashr i32 %x, %shamt +; BMI2: ashr32p +; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI2: ret +; BMI264: ashr32 +; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: ret + ret i32 %shl +} + +define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone { +entry: + %shl = ashr i64 %x, %shamt +; BMI264: ashr64 +; BMI264: sarxq +; BMI264: ret + ret i64 %shl +} + +define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { +entry: + %x = load i64* %p + %shl = ashr i64 %x, %shamt +; BMI264: ashr64p +; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: ret + ret i64 %shl +} diff --git a/test/CodeGen/X86/targetLoweringGeneric.ll b/test/CodeGen/X86/targetLoweringGeneric.ll index ba5f8f8361..a773e9daef 100644 --- a/test/CodeGen/X86/targetLoweringGeneric.ll +++ b/test/CodeGen/X86/targetLoweringGeneric.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=i386-apple-darwin9 -fast-isel=false -O0 < %s | FileCheck %s +; RUN: llc -mtriple=i386-apple-darwin9 -mcpu=corei7 -fast-isel=false -O0 < %s | FileCheck %s ; Gather non-machine specific tests for the transformations in ; CodeGen/SelectionDAG/TargetLowering. Currently, these |