summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2008-03-24 21:52:23 +0000
committerEvan Cheng <evan.cheng@apple.com>2008-03-24 21:52:23 +0000
commit62a3f1538cf50f0373c2a5eeb440d6288604f969 (patch)
treede5c3aaf38da4e7959c9eb52eb8ed4a68f88a42d
parentaec960038920c206505268eb2e8f0849364124fe (diff)
downloadllvm-62a3f1538cf50f0373c2a5eeb440d6288604f969.tar.gz
llvm-62a3f1538cf50f0373c2a5eeb440d6288604f969.tar.bz2
llvm-62a3f1538cf50f0373c2a5eeb440d6288604f969.tar.xz
- SSE4.1 extractfps extracts a f32 into a gr32 register. Very useful! Not. Fix the instruction specification and teaches lowering code to use it only when the only use is a store instruction.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48746 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp22
-rw-r--r--lib/Target/X86/X86InstrSSE.td13
-rw-r--r--test/CodeGen/X86/vec_extract-sse4.ll30
3 files changed, 57 insertions, 8 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index dd2d7849d7..1d72e1f6c6 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -699,7 +699,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
@@ -3718,6 +3718,19 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op,
SDOperand Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
+ } else if (VT == MVT::f32) {
+ // EXTRACTPS outputs to a GPR32 register which will require a movd to copy
+ // the result back to FR32 register. It's only worth matching if the
+ // result has a single use which is a store.
+ if (!Op.hasOneUse())
+ return SDOperand();
+ SDNode *User = *Op.Val->use_begin();
+ if (User->getOpcode() != ISD::STORE)
+ return SDOperand();
+ SDOperand Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)),
+ Op.getOperand(1));
+ return DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Extract);
}
return SDOperand();
}
@@ -3728,8 +3741,11 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return SDOperand();
- if (Subtarget->hasSSE41())
- return LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
+ if (Subtarget->hasSSE41()) {
+ SDOperand Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
+ if (Res.Val)
+ return Res;
+ }
MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8.
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 787414b10f..9a3b2f67b1 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3380,19 +3380,22 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
-/// SS41I_extractf32 - SSE 4.1 extract 32 bits to fp reg or memory destination
+/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
+/// destination
multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs FR32:$dst),
+ // Not worth matching to rr form of extractps since the result is in GPR32.
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs GR32:$dst),
(ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set FR32:$dst,
- (extractelt (v4f32 VR128:$src1), imm:$src2))]>, OpSize;
+ [/*(set GR32:$dst,
+ (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))*/]>,
+ OpSize;
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (extractelt (v4f32 VR128:$src1), imm:$src2),
+ [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
addr:$dst)]>, OpSize;
}
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
new file mode 100644
index 0000000000..1ef5e8803e
--- /dev/null
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -0,0 +1,30 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
+; RUN: grep extractps %t | count 1
+; RUN: grep pextrd %t | count 2
+; RUN: grep pshufd %t | count 1
+
+define void @t1(float* %R, <4 x float>* %P1) {
+ %X = load <4 x float>* %P1
+ %tmp = extractelement <4 x float> %X, i32 3
+ store float %tmp, float* %R
+ ret void
+}
+
+define float @t2(<4 x float>* %P1) {
+ %X = load <4 x float>* %P1
+ %tmp = extractelement <4 x float> %X, i32 2
+ ret float %tmp
+}
+
+define void @t3(i32* %R, <4 x i32>* %P1) {
+ %X = load <4 x i32>* %P1
+ %tmp = extractelement <4 x i32> %X, i32 3
+ store i32 %tmp, i32* %R
+ ret void
+}
+
+define i32 @t4(<4 x i32>* %P1) {
+ %X = load <4 x i32>* %P1
+ %tmp = extractelement <4 x i32> %X, i32 3
+ ret i32 %tmp
+}