diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2011-11-29 15:00:45 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2011-11-29 15:00:45 +0000 |
commit | f68b214e2d06849091b5ff3dd5d5efe29722dcc3 (patch) | |
tree | 230cfe7f111effc763ec9e060ea221f1a1589edf | |
parent | f267972d2864545f82f1eac7e922f85280a4d654 (diff) | |
download | llvm-f68b214e2d06849091b5ff3dd5d5efe29722dcc3.tar.gz llvm-f68b214e2d06849091b5ff3dd5d5efe29722dcc3.tar.bz2 llvm-f68b214e2d06849091b5ff3dd5d5efe29722dcc3.tar.xz |
Fixed vsqrt.ss intrinsic usage - order of input operands was wrong.
Added a test.
Thanks Bruno for reviewing the patch.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145403 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-arith.ll | 11 |
2 files changed, 12 insertions, 1 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 97737d1bba..011d252ac7 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2872,7 +2872,7 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins ssmem:$src1, VR128:$src2), + (ins VR128:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; } diff --git a/test/CodeGen/X86/avx-arith.ll b/test/CodeGen/X86/avx-arith.ll index 59988ca8b6..4aa337033d 100644 --- a/test/CodeGen/X86/avx-arith.ll +++ b/test/CodeGen/X86/avx-arith.ll @@ -259,3 +259,14 @@ define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { ret <4 x i64> %x } +declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone + +define <4 x float> @int_sqrt_ss() { +; CHECK: int_sqrt_ss +; CHECK: vsqrtss + %x0 = load float addrspace(1)* undef, align 8 + %x1 = insertelement <4 x float> undef, float %x0, i32 0 + %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind + ret <4 x float> %x2 +} + |