diff options
author | Bill Wendling <isanbard@gmail.com> | 2013-12-08 00:04:11 +0000 |
---|---|---|
committer | Bill Wendling <isanbard@gmail.com> | 2013-12-08 00:04:11 +0000 |
commit | 696e2d735c38eb73f1628d9b6166565fec4b9f1b (patch) | |
tree | a98a57f181e07d26a8b5610850a4616c129eb0d1 | |
parent | 42dbfb96fce69ba7cf38a1c9c59ffa503ef70e18 (diff) | |
download | clang-696e2d735c38eb73f1628d9b6166565fec4b9f1b.tar.gz clang-696e2d735c38eb73f1628d9b6166565fec4b9f1b.tar.bz2 clang-696e2d735c38eb73f1628d9b6166565fec4b9f1b.tar.xz |
Merging r196535:
------------------------------------------------------------------------
r196535 | apazos | 2013-12-05 13:13:24 -0800 (Thu, 05 Dec 2013) | 1 line
Implemented vget/vset_lane_f16 intrinsics
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_34@196686 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/clang/Basic/arm_neon.td | 5 | ||||
-rw-r--r-- | test/CodeGen/aarch64-neon-copy.c | 73 | ||||
-rw-r--r-- | utils/TableGen/NeonEmitter.cpp | 35 |
3 files changed, 111 insertions, 2 deletions
diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 9097edc4e6..b918459f4e 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -133,6 +133,8 @@ def OP_SCALAR_QDMULH_LN : Op; def OP_SCALAR_QDMULH_LNQ : Op; def OP_SCALAR_QRDMULH_LN : Op; def OP_SCALAR_QRDMULH_LNQ : Op; +def OP_SCALAR_GET_LN : Op; +def OP_SCALAR_SET_LN : Op; class Inst <string n, string p, string t, Op o> { string Name = n; @@ -1330,4 +1332,7 @@ def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_ def SCALAR_VDUP_LANE : IInst<"vdup_lane", "sdi", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; + +def SCALAR_GET_LANE : IOpInst<"vget_lane", "sdi", "hQh", OP_SCALAR_GET_LN>; +def SCALAR_SET_LANE : IOpInst<"vset_lane", "dsdi", "hQh", OP_SCALAR_SET_LN>; } diff --git a/test/CodeGen/aarch64-neon-copy.c b/test/CodeGen/aarch64-neon-copy.c index 077f7ce913..eb91bf9b5a 100644 --- a/test/CodeGen/aarch64-neon-copy.c +++ b/test/CodeGen/aarch64-neon-copy.c @@ -1244,3 +1244,76 @@ float64x2_t test_vcopyq_laneq_f64(float64x2_t a, float64x2_t c) { return vcopyq_laneq_f64(a, 1, c, 1); } +// CHECK: test_vget_lane_f16 +int test_vget_lane_f16(float16x4_t v1) { + float16_t a = vget_lane_f16(v1, 3); + return (int)a; +// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3] +} + +// CHECK: test_vgetq_lane_f16 +int test_vgetq_lane_f16(float16x8_t v1) { + float16_t a = vgetq_lane_f16(v1, 7); + return (int)a; +// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7] +} + +// CHECK: test_vget_lane_f16_2 +float test_vget_lane_f16_2(float16x4_t v1) { + float16_t a = vget_lane_f16(v1, 3); + return (float)a; +// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3] +} + +// CHECK: test_vgetq_lane_f16_2 +float test_vgetq_lane_f16_2(float16x8_t v1) { + float16_t a = vgetq_lane_f16(v1, 7); + return (float)a; +// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7] +} + +// CHECK: test_vset_lane_f16 +float16x4_t test_vset_lane_f16(float16x4_t v1) { + float16_t a; + return vset_lane_f16(a, v1, 3); +// CHECK: fmov {{s[0-9]+}}, wzr +// CHECK-NEXT: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[0] +} + +// CHECK: test_vsetq_lane_f16 +float16x8_t test_vsetq_lane_f16(float16x8_t v1) { + float16_t a; + return vsetq_lane_f16(a, v1, 7); +// CHECK: fmov {{s[0-9]+}}, wzr +// CHECK-NEXT: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[0] +} + +// CHECK: test_vset_lane_f16_2 +float16x4_t test_vset_lane_f16_2(float16x4_t v1) { + float16_t a = vget_lane_f16(v1, 0); + return vset_lane_f16(a, v1, 3); +// CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[0] +} + +// CHECK: test_vsetq_lane_f16_2 +float16x8_t test_vsetq_lane_f16_2(float16x8_t v1) { + float16_t a = vgetq_lane_f16(v1, 0); + return vsetq_lane_f16(a, v1, 7); +// CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[0] +} + + +// CHECK: test_vsetq_lane_f16_3 +float16x8_t test_vsetq_lane_f16_3(float16x8_t v1, float b, float c) { + float16_t a = (float16_t)b; + return vsetq_lane_f16(a, v1, 7); +// CHECK: ins {{v[0-9]+}}.h[7], {{w[0-9]+}} +} + +// CHECK: test_vsetq_lane_f16_4 +float16x8_t test_vsetq_lane_f16_4(float16x8_t v1, float b, float c) { + float16_t a = (float16_t)b + 1.0; + return vsetq_lane_f16(a, v1, 7); +// CHECK: ins {{v[0-9]+}}.h[7], {{w[0-9]+}} +} + diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp index 420c5bc05b..b0939c9d00 100644 --- a/utils/TableGen/NeonEmitter.cpp +++ b/utils/TableGen/NeonEmitter.cpp @@ -154,7 +154,9 @@ enum OpKind { OpScalarQDMulHiLane, OpScalarQDMulHiLaneQ, OpScalarQRDMulHiLane, - OpScalarQRDMulHiLaneQ + OpScalarQRDMulHiLaneQ, + OpScalarGetLane, + OpScalarSetLane }; enum ClassKind { @@ -335,7 +337,8 @@ public: OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ; OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane; OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ; - + OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane; + OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane; Record *SI = R.getClass("SInst"); Record *II = R.getClass("IInst"); @@ -2209,6 +2212,34 @@ static std::string GenOpString(const std::string &name, OpKind op, "vgetq_lane_" + typeCode + "(__b, __c));"; break; } + case OpScalarGetLane:{ + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + if (quad) { + s += "int16x8_t __a1 = vreinterpretq_s16_f16(__a);\\\n"; + s += " vgetq_lane_s16(__a1, __b);"; + } else { + s += "int16x4_t __a1 = vreinterpret_s16_f16(__a);\\\n"; + s += " vget_lane_s16(__a1, __b);"; + } + break; + } + case OpScalarSetLane:{ + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += "int16_t __a1 = (int16_t)__a;\\\n"; + if (quad) { + s += " int16x8_t __b1 = vreinterpretq_s16_f16(b);\\\n"; + s += " int16x8_t __b2 = vsetq_lane_s16(__a1, __b1, __c);\\\n"; + s += " vreinterpretq_f16_s16(__b2);"; + } else { + s += " int16x4_t __b1 = vreinterpret_s16_f16(b);\\\n"; + s += " int16x4_t __b2 = vset_lane_s16(__a1, __b1, __c);\\\n"; + s += " vreinterpret_f16_s16(__b2);"; + } + break; + } + default: PrintFatalError("unknown OpKind!"); } |