From 7b837d8c75f78fe55c9b348b9ec2281169a48d2a Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Sat, 29 Mar 2014 10:18:08 +0000 Subject: ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205090 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Analysis/CostModel/ARM64/lit.local.cfg | 3 + test/Analysis/CostModel/ARM64/select.ll | 38 + test/Analysis/CostModel/ARM64/store.ll | 22 + test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll | 47 + test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll | 45 + .../ARM64/2011-03-21-Unaligned-Frame-Index.ll | 12 + test/CodeGen/ARM64/2011-04-21-CPSRBug.ll | 26 + test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll | 31 + .../CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll | 40 + .../ARM64/2012-05-07-DAGCombineVectorExtract.ll | 20 + test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll | 21 + test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll | 22 + test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll | 50 + test/CodeGen/ARM64/2012-06-06-FPToUI.ll | 65 + test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll | 56 + test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll | 19 + test/CodeGen/ARM64/2013-01-23-frem-crash.ll | 15 + test/CodeGen/ARM64/2013-01-23-sext-crash.ll | 37 + test/CodeGen/ARM64/2013-02-12-shufv8i8.ll | 11 + test/CodeGen/ARM64/AdvSIMD-Scalar.ll | 38 + test/CodeGen/ARM64/aapcs.ll | 86 + test/CodeGen/ARM64/abi-varargs.ll | 191 ++ test/CodeGen/ARM64/abi.ll | 236 ++ test/CodeGen/ARM64/abi_align.ll | 529 +++++ test/CodeGen/ARM64/addp.ll | 32 + test/CodeGen/ARM64/addr-mode-folding.ll | 171 ++ test/CodeGen/ARM64/addr-type-promotion.ll | 82 + test/CodeGen/ARM64/addrmode.ll | 72 + test/CodeGen/ARM64/alloc-no-stack-realign.ll | 21 + test/CodeGen/ARM64/alloca-frame-pointer-offset.ll | 29 + test/CodeGen/ARM64/andCmpBrToTBZ.ll | 72 + test/CodeGen/ARM64/anyregcc-crash.ll | 19 + test/CodeGen/ARM64/anyregcc.ll | 358 +++ test/CodeGen/ARM64/arith-saturating.ll | 153 ++ test/CodeGen/ARM64/arith.ll | 262 +++ test/CodeGen/ARM64/atomic-128.ll | 213 ++ test/CodeGen/ARM64/atomic.ll | 343 +++ test/CodeGen/ARM64/big-imm-offsets.ll | 14 + test/CodeGen/ARM64/big-stack.ll | 21 + test/CodeGen/ARM64/bitfield-extract.ll | 406 ++++ test/CodeGen/ARM64/blockaddress.ll | 30 + test/CodeGen/ARM64/build-vector.ll | 35 + test/CodeGen/ARM64/call-tailcalls.ll | 91 + test/CodeGen/ARM64/cast-opt.ll | 31 + test/CodeGen/ARM64/ccmp-heuristics.ll | 190 ++ test/CodeGen/ARM64/ccmp.ll | 289 +++ test/CodeGen/ARM64/coalesce-ext.ll | 17 + test/CodeGen/ARM64/code-model-large-abs.ll | 72 + test/CodeGen/ARM64/collect-loh-garbage-crash.ll | 37 + test/CodeGen/ARM64/collect-loh-str.ll | 23 + test/CodeGen/ARM64/collect-loh.ll | 47 + test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S | 17 + test/CodeGen/ARM64/complex-ret.ll | 7 + test/CodeGen/ARM64/convert-v2f64-v2i32.ll | 24 + test/CodeGen/ARM64/convert-v2i32-v2f64.ll | 29 + test/CodeGen/ARM64/copy-tuple.ll | 146 ++ test/CodeGen/ARM64/crc32.ll | 71 + test/CodeGen/ARM64/crypto.ll | 135 ++ test/CodeGen/ARM64/cse.ll | 59 + test/CodeGen/ARM64/csel.ll | 222 ++ test/CodeGen/ARM64/cvt.ll | 401 ++++ test/CodeGen/ARM64/dagcombiner-convergence.ll | 19 + test/CodeGen/ARM64/dagcombiner-load-slicing.ll | 102 + test/CodeGen/ARM64/dup.ll | 322 +++ test/CodeGen/ARM64/early-ifcvt.ll | 423 ++++ test/CodeGen/ARM64/elf-calls.ll | 20 + test/CodeGen/ARM64/elf-constpool.ll | 13 + test/CodeGen/ARM64/elf-globals.ll | 115 + test/CodeGen/ARM64/ext.ll | 101 + test/CodeGen/ARM64/extend-int-to-fp.ll | 19 + test/CodeGen/ARM64/extend.ll | 15 + test/CodeGen/ARM64/extload-knownzero.ll | 28 + test/CodeGen/ARM64/extract.ll | 58 + test/CodeGen/ARM64/extract_subvector.ll | 51 + test/CodeGen/ARM64/fast-isel-addr-offset.ll | 47 + test/CodeGen/ARM64/fast-isel-alloca.ll | 24 + test/CodeGen/ARM64/fast-isel-br.ll | 155 ++ test/CodeGen/ARM64/fast-isel-call.ll | 91 + test/CodeGen/ARM64/fast-isel-conversion.ll | 416 ++++ test/CodeGen/ARM64/fast-isel-fcmp.ll | 146 ++ test/CodeGen/ARM64/fast-isel-gv.ll | 38 + test/CodeGen/ARM64/fast-isel-icmp.ll | 214 ++ test/CodeGen/ARM64/fast-isel-indirectbr.ll | 36 + test/CodeGen/ARM64/fast-isel-intrinsic.ll | 135 ++ test/CodeGen/ARM64/fast-isel-materialize.ll | 27 + test/CodeGen/ARM64/fast-isel-noconvert.ll | 36 + test/CodeGen/ARM64/fast-isel-rem.ll | 33 + test/CodeGen/ARM64/fast-isel-ret.ll | 63 + test/CodeGen/ARM64/fast-isel-select.ll | 63 + test/CodeGen/ARM64/fast-isel.ll | 95 + test/CodeGen/ARM64/fastcc-tailcall.ll | 24 + .../ARM64/fastisel-gep-promote-before-add.ll | 18 + test/CodeGen/ARM64/fcmp-opt.ll | 173 ++ test/CodeGen/ARM64/fcopysign.ll | 51 + .../ARM64/fixed-point-scalar-cvt-dagcombine.ll | 15 + test/CodeGen/ARM64/fmadd.ll | 74 + test/CodeGen/ARM64/fmax.ll | 21 + test/CodeGen/ARM64/fmuladd.ll | 88 + test/CodeGen/ARM64/fold-address.ll | 79 + test/CodeGen/ARM64/fold-lsl.ll | 79 + test/CodeGen/ARM64/fp-imm.ll | 21 + test/CodeGen/ARM64/fp.ll | 8 + test/CodeGen/ARM64/fp128-folding.ll | 17 + test/CodeGen/ARM64/fp128.ll | 274 +++ test/CodeGen/ARM64/frame-index.ll | 11 + test/CodeGen/ARM64/frameaddr.ll | 15 + test/CodeGen/ARM64/global-address.ll | 14 + test/CodeGen/ARM64/hello.ll | 38 + test/CodeGen/ARM64/i16-subreg-extract.ll | 12 + test/CodeGen/ARM64/icmp-opt.ll | 17 + test/CodeGen/ARM64/illegal-float-ops.ll | 247 ++ test/CodeGen/ARM64/indexed-memory.ll | 351 +++ test/CodeGen/ARM64/inline-asm-error-I.ll | 11 + test/CodeGen/ARM64/inline-asm-error-J.ll | 11 + test/CodeGen/ARM64/inline-asm-error-K.ll | 11 + test/CodeGen/ARM64/inline-asm-error-L.ll | 11 + test/CodeGen/ARM64/inline-asm-error-M.ll | 11 + test/CodeGen/ARM64/inline-asm-error-N.ll | 11 + test/CodeGen/ARM64/inline-asm-zero-reg-error.ll | 11 + test/CodeGen/ARM64/inline-asm.ll | 230 ++ test/CodeGen/ARM64/join-reserved.ll | 17 + test/CodeGen/ARM64/jumptable.ll | 35 + test/CodeGen/ARM64/ld1.ll | 1254 ++++++++++ test/CodeGen/ARM64/ldp.ll | 149 ++ test/CodeGen/ARM64/ldur.ll | 67 + test/CodeGen/ARM64/ldxr-stxr.ll | 143 ++ test/CodeGen/ARM64/leaf-compact-unwind.ll | 161 ++ test/CodeGen/ARM64/leaf.ll | 13 + test/CodeGen/ARM64/lit.local.cfg | 6 + test/CodeGen/ARM64/long-shift.ll | 59 + test/CodeGen/ARM64/memcpy-inline.ll | 112 + test/CodeGen/ARM64/memset-inline.ll | 27 + test/CodeGen/ARM64/memset-to-bzero.ll | 101 + test/CodeGen/ARM64/movi.ll | 202 ++ test/CodeGen/ARM64/mul.ll | 90 + test/CodeGen/ARM64/neon-compare-instructions.ll | 1191 ++++++++++ test/CodeGen/ARM64/patchpoint.ll | 163 ++ test/CodeGen/ARM64/platform-reg.ll | 26 + test/CodeGen/ARM64/popcnt.ll | 43 + test/CodeGen/ARM64/prefetch.ll | 88 + test/CodeGen/ARM64/promote-const.ll | 255 +++ test/CodeGen/ARM64/redzone.ll | 18 + test/CodeGen/ARM64/register-offset-addressing.ll | 12 + test/CodeGen/ARM64/register-pairing.ll | 53 + test/CodeGen/ARM64/regress-f128csel-flags.ll | 27 + test/CodeGen/ARM64/return-vector.ll | 11 + test/CodeGen/ARM64/returnaddr.ll | 26 + test/CodeGen/ARM64/rev.ll | 221 ++ test/CodeGen/ARM64/rounding.ll | 208 ++ test/CodeGen/ARM64/scaled_iv.ll | 38 + test/CodeGen/ARM64/scvt.ll | 830 +++++++ test/CodeGen/ARM64/shifted-sext.ll | 277 +++ test/CodeGen/ARM64/simd-scalar-to-vector.ll | 17 + test/CodeGen/ARM64/simplest-elf.ll | 18 + test/CodeGen/ARM64/sincos.ll | 31 + test/CodeGen/ARM64/sitofp-combine-chains.ll | 22 + test/CodeGen/ARM64/sli-sri-opt.ll | 41 + test/CodeGen/ARM64/smaxv.ll | 74 + test/CodeGen/ARM64/sminv.ll | 74 + test/CodeGen/ARM64/spill-lr.ll | 74 + test/CodeGen/ARM64/spill.ll | 15 + test/CodeGen/ARM64/st1.ll | 628 +++++ test/CodeGen/ARM64/stack-no-frame.ll | 20 + test/CodeGen/ARM64/stackmap.ll | 281 +++ test/CodeGen/ARM64/stacksave.ll | 20 + test/CodeGen/ARM64/stp.ll | 101 + test/CodeGen/ARM64/strict-align.ll | 25 + test/CodeGen/ARM64/stur.ll | 98 + test/CodeGen/ARM64/subvector-extend.ll | 141 ++ test/CodeGen/ARM64/swizzle-tbl-i16-layout.ll | 36 + test/CodeGen/ARM64/tbl.ll | 132 ++ test/CodeGen/ARM64/this-return.ll | 83 + test/CodeGen/ARM64/tls-darwin.ll | 18 + test/CodeGen/ARM64/tls-dynamic-together.ll | 18 + test/CodeGen/ARM64/tls-dynamics.ll | 135 ++ test/CodeGen/ARM64/tls-execs.ll | 63 + test/CodeGen/ARM64/trap.ll | 8 + test/CodeGen/ARM64/trn.ll | 134 ++ test/CodeGen/ARM64/trunc-store.ll | 75 + test/CodeGen/ARM64/umaxv.ll | 92 + test/CodeGen/ARM64/uminv.ll | 92 + test/CodeGen/ARM64/umov.ll | 33 + test/CodeGen/ARM64/unaligned_ldst.ll | 41 + test/CodeGen/ARM64/uzp.ll | 107 + test/CodeGen/ARM64/vaargs.ll | 20 + test/CodeGen/ARM64/vabs.ll | 796 +++++++ test/CodeGen/ARM64/vadd.ll | 941 ++++++++ test/CodeGen/ARM64/vaddlv.ll | 26 + test/CodeGen/ARM64/vaddv.ll | 233 ++ test/CodeGen/ARM64/variadic-aapcs.ll | 143 ++ test/CodeGen/ARM64/vbitwise.ll | 91 + test/CodeGen/ARM64/vclz.ll | 109 + test/CodeGen/ARM64/vcmp.ll | 227 ++ test/CodeGen/ARM64/vcnt.ll | 56 + test/CodeGen/ARM64/vcombine.ll | 17 + test/CodeGen/ARM64/vcvt.ll | 686 ++++++ test/CodeGen/ARM64/vcvt_f.ll | 82 + test/CodeGen/ARM64/vcvt_f32_su32.ll | 73 + test/CodeGen/ARM64/vcvt_n.ll | 49 + test/CodeGen/ARM64/vcvt_su32_f32.ll | 34 + test/CodeGen/ARM64/vcvtxd_f32_f64.ll | 11 + test/CodeGen/ARM64/vecCmpBr.ll | 207 ++ test/CodeGen/ARM64/vecFold.ll | 145 ++ test/CodeGen/ARM64/vector-ext.ll | 16 + test/CodeGen/ARM64/vector-imm.ll | 134 ++ test/CodeGen/ARM64/vector-ldst.ll | 601 +++++ test/CodeGen/ARM64/vext.ll | 464 ++++ test/CodeGen/ARM64/vfloatintrinsics.ll | 375 +++ test/CodeGen/ARM64/vhadd.ll | 249 ++ test/CodeGen/ARM64/vhsub.ll | 125 + test/CodeGen/ARM64/virtual_base.ll | 51 + test/CodeGen/ARM64/vmax.ll | 679 ++++++ test/CodeGen/ARM64/vminmaxnm.ll | 68 + test/CodeGen/ARM64/vmovn.ll | 242 ++ test/CodeGen/ARM64/vmul.ll | 1969 ++++++++++++++++ test/CodeGen/ARM64/volatile.ll | 27 + test/CodeGen/ARM64/vqadd.ll | 300 +++ test/CodeGen/ARM64/vqsub.ll | 147 ++ test/CodeGen/ARM64/vselect.ll | 18 + test/CodeGen/ARM64/vsetcc_fp.ll | 11 + test/CodeGen/ARM64/vshift.ll | 1909 ++++++++++++++++ test/CodeGen/ARM64/vshr.ll | 49 + test/CodeGen/ARM64/vshuffle.ll | 115 + test/CodeGen/ARM64/vsqrt.ll | 177 ++ test/CodeGen/ARM64/vsra.ll | 142 ++ test/CodeGen/ARM64/vsub.ll | 417 ++++ test/CodeGen/ARM64/weak-reference.ll | 10 + test/CodeGen/ARM64/xaluo.ll | 524 +++++ test/CodeGen/ARM64/zero-cycle-regmov.ll | 17 + test/CodeGen/ARM64/zero-cycle-zeroing.ll | 49 + test/CodeGen/ARM64/zext.ll | 11 + test/CodeGen/ARM64/zextload-unscaled.ll | 40 + test/CodeGen/ARM64/zip.ll | 107 + test/DebugInfo/ARM64/lit.local.cfg | 4 + test/DebugInfo/ARM64/struct_by_value.ll | 68 + test/MC/ARM64/advsimd.s | 1997 ++++++++++++++++ test/MC/ARM64/aliases.s | 733 ++++++ test/MC/ARM64/arithmetic-encoding.s | 631 +++++ test/MC/ARM64/arm64-fixup.s | 10 + test/MC/ARM64/basic-a64-instructions.s | 18 + test/MC/ARM64/bitfield-encoding.s | 30 + test/MC/ARM64/branch-encoding.s | 159 ++ test/MC/ARM64/crypto.s | 66 + test/MC/ARM64/diags.s | 242 ++ test/MC/ARM64/directive_loh.s | 93 + test/MC/ARM64/elf-relocs.s | 249 ++ test/MC/ARM64/fp-encoding.s | 507 +++++ test/MC/ARM64/large-relocs.s | 38 + test/MC/ARM64/lit.local.cfg | 6 + test/MC/ARM64/logical-encoding.s | 224 ++ test/MC/ARM64/mapping-across-sections.s | 28 + test/MC/ARM64/mapping-within-section.s | 23 + test/MC/ARM64/memory.s | 634 ++++++ test/MC/ARM64/separator.s | 20 + test/MC/ARM64/simd-ldst.s | 2404 ++++++++++++++++++++ test/MC/ARM64/small-data-fixups.s | 24 + test/MC/ARM64/system-encoding.s | 679 ++++++ test/MC/ARM64/tls-modifiers-darwin.s | 13 + test/MC/ARM64/tls-relocs.s | 320 +++ test/MC/ARM64/variable-exprs.s | 40 + test/MC/Disassembler/ARM64/advsimd.txt | 2282 +++++++++++++++++++ test/MC/Disassembler/ARM64/arithmetic.txt | 522 +++++ test/MC/Disassembler/ARM64/bitfield.txt | 29 + test/MC/Disassembler/ARM64/branch.txt | 75 + test/MC/Disassembler/ARM64/crc32.txt | 18 + test/MC/Disassembler/ARM64/crypto.txt | 47 + test/MC/Disassembler/ARM64/invalid-logical.txt | 6 + test/MC/Disassembler/ARM64/lit.local.cfg | 5 + test/MC/Disassembler/ARM64/logical.txt | 217 ++ test/MC/Disassembler/ARM64/memory.txt | 558 +++++ test/MC/Disassembler/ARM64/scalar-fp.txt | 255 +++ test/MC/Disassembler/ARM64/system.txt | 58 + .../MC/MachO/ARM64/darwin-ARM64-local-label-diff.s | 21 + test/MC/MachO/ARM64/darwin-ARM64-reloc.s | 157 ++ test/MC/MachO/ARM64/lit.local.cfg | 4 + test/Transforms/GlobalMerge/ARM/arm.ll | 85 + test/Transforms/GlobalMerge/ARM/lit.local.cfg | 4 + test/Transforms/GlobalMerge/ARM64/arm64.ll | 88 + test/Transforms/GlobalMerge/ARM64/lit.local.cfg | 4 + .../InstCombine/2012-04-23-Neon-Intrinsics.ll | 69 +- test/Transforms/InstCombine/sincospi.ll | 1 + .../LoopStrengthReduce/ARM64/lit.local.cfg | 5 + .../LoopStrengthReduce/ARM64/lsr-memcpy.ll | 33 + .../LoopStrengthReduce/ARM64/lsr-memset.ll | 101 + test/Transforms/LoopVectorize/ARM64/gather-cost.ll | 85 + test/Transforms/LoopVectorize/ARM64/lit.local.cfg | 6 + 286 files changed, 46411 insertions(+), 3 deletions(-) create mode 100644 test/Analysis/CostModel/ARM64/lit.local.cfg create mode 100644 test/Analysis/CostModel/ARM64/select.ll create mode 100644 test/Analysis/CostModel/ARM64/store.ll create mode 100644 test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll create mode 100644 test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll create mode 100644 test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll create mode 100644 test/CodeGen/ARM64/2011-04-21-CPSRBug.ll create mode 100644 test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll create mode 100644 test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll create mode 100644 test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll create mode 100644 test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll create mode 100644 test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll create mode 100644 test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll create mode 100644 test/CodeGen/ARM64/2012-06-06-FPToUI.ll create mode 100644 test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll create mode 100644 test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll create mode 100644 test/CodeGen/ARM64/2013-01-23-frem-crash.ll create mode 100644 test/CodeGen/ARM64/2013-01-23-sext-crash.ll create mode 100644 test/CodeGen/ARM64/2013-02-12-shufv8i8.ll create mode 100644 test/CodeGen/ARM64/AdvSIMD-Scalar.ll create mode 100644 test/CodeGen/ARM64/aapcs.ll create mode 100644 test/CodeGen/ARM64/abi-varargs.ll create mode 100644 test/CodeGen/ARM64/abi.ll create mode 100644 test/CodeGen/ARM64/abi_align.ll create mode 100644 test/CodeGen/ARM64/addp.ll create mode 100644 test/CodeGen/ARM64/addr-mode-folding.ll create mode 100644 test/CodeGen/ARM64/addr-type-promotion.ll create mode 100644 test/CodeGen/ARM64/addrmode.ll create mode 100644 test/CodeGen/ARM64/alloc-no-stack-realign.ll create mode 100644 test/CodeGen/ARM64/alloca-frame-pointer-offset.ll create mode 100644 test/CodeGen/ARM64/andCmpBrToTBZ.ll create mode 100644 test/CodeGen/ARM64/anyregcc-crash.ll create mode 100644 test/CodeGen/ARM64/anyregcc.ll create mode 100644 test/CodeGen/ARM64/arith-saturating.ll create mode 100644 test/CodeGen/ARM64/arith.ll create mode 100644 test/CodeGen/ARM64/atomic-128.ll create mode 100644 test/CodeGen/ARM64/atomic.ll create mode 100644 test/CodeGen/ARM64/big-imm-offsets.ll create mode 100644 test/CodeGen/ARM64/big-stack.ll create mode 100644 test/CodeGen/ARM64/bitfield-extract.ll create mode 100644 test/CodeGen/ARM64/blockaddress.ll create mode 100644 test/CodeGen/ARM64/build-vector.ll create mode 100644 test/CodeGen/ARM64/call-tailcalls.ll create mode 100644 test/CodeGen/ARM64/cast-opt.ll create mode 100644 test/CodeGen/ARM64/ccmp-heuristics.ll create mode 100644 test/CodeGen/ARM64/ccmp.ll create mode 100644 test/CodeGen/ARM64/coalesce-ext.ll create mode 100644 test/CodeGen/ARM64/code-model-large-abs.ll create mode 100644 test/CodeGen/ARM64/collect-loh-garbage-crash.ll create mode 100644 test/CodeGen/ARM64/collect-loh-str.ll create mode 100644 test/CodeGen/ARM64/collect-loh.ll create mode 100644 test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S create mode 100644 test/CodeGen/ARM64/complex-ret.ll create mode 100644 test/CodeGen/ARM64/convert-v2f64-v2i32.ll create mode 100644 test/CodeGen/ARM64/convert-v2i32-v2f64.ll create mode 100644 test/CodeGen/ARM64/copy-tuple.ll create mode 100644 test/CodeGen/ARM64/crc32.ll create mode 100644 test/CodeGen/ARM64/crypto.ll create mode 100644 test/CodeGen/ARM64/cse.ll create mode 100644 test/CodeGen/ARM64/csel.ll create mode 100644 test/CodeGen/ARM64/cvt.ll create mode 100644 test/CodeGen/ARM64/dagcombiner-convergence.ll create mode 100644 test/CodeGen/ARM64/dagcombiner-load-slicing.ll create mode 100644 test/CodeGen/ARM64/dup.ll create mode 100644 test/CodeGen/ARM64/early-ifcvt.ll create mode 100644 test/CodeGen/ARM64/elf-calls.ll create mode 100644 test/CodeGen/ARM64/elf-constpool.ll create mode 100644 test/CodeGen/ARM64/elf-globals.ll create mode 100644 test/CodeGen/ARM64/ext.ll create mode 100644 test/CodeGen/ARM64/extend-int-to-fp.ll create mode 100644 test/CodeGen/ARM64/extend.ll create mode 100644 test/CodeGen/ARM64/extload-knownzero.ll create mode 100644 test/CodeGen/ARM64/extract.ll create mode 100644 test/CodeGen/ARM64/extract_subvector.ll create mode 100644 test/CodeGen/ARM64/fast-isel-addr-offset.ll create mode 100644 test/CodeGen/ARM64/fast-isel-alloca.ll create mode 100644 test/CodeGen/ARM64/fast-isel-br.ll create mode 100644 test/CodeGen/ARM64/fast-isel-call.ll create mode 100644 test/CodeGen/ARM64/fast-isel-conversion.ll create mode 100644 test/CodeGen/ARM64/fast-isel-fcmp.ll create mode 100644 test/CodeGen/ARM64/fast-isel-gv.ll create mode 100644 test/CodeGen/ARM64/fast-isel-icmp.ll create mode 100644 test/CodeGen/ARM64/fast-isel-indirectbr.ll create mode 100644 test/CodeGen/ARM64/fast-isel-intrinsic.ll create mode 100644 test/CodeGen/ARM64/fast-isel-materialize.ll create mode 100644 test/CodeGen/ARM64/fast-isel-noconvert.ll create mode 100644 test/CodeGen/ARM64/fast-isel-rem.ll create mode 100644 test/CodeGen/ARM64/fast-isel-ret.ll create mode 100644 test/CodeGen/ARM64/fast-isel-select.ll create mode 100644 test/CodeGen/ARM64/fast-isel.ll create mode 100644 test/CodeGen/ARM64/fastcc-tailcall.ll create mode 100644 test/CodeGen/ARM64/fastisel-gep-promote-before-add.ll create mode 100644 test/CodeGen/ARM64/fcmp-opt.ll create mode 100644 test/CodeGen/ARM64/fcopysign.ll create mode 100644 test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll create mode 100644 test/CodeGen/ARM64/fmadd.ll create mode 100644 test/CodeGen/ARM64/fmax.ll create mode 100644 test/CodeGen/ARM64/fmuladd.ll create mode 100644 test/CodeGen/ARM64/fold-address.ll create mode 100644 test/CodeGen/ARM64/fold-lsl.ll create mode 100644 test/CodeGen/ARM64/fp-imm.ll create mode 100644 test/CodeGen/ARM64/fp.ll create mode 100644 test/CodeGen/ARM64/fp128-folding.ll create mode 100644 test/CodeGen/ARM64/fp128.ll create mode 100644 test/CodeGen/ARM64/frame-index.ll create mode 100644 test/CodeGen/ARM64/frameaddr.ll create mode 100644 test/CodeGen/ARM64/global-address.ll create mode 100644 test/CodeGen/ARM64/hello.ll create mode 100644 test/CodeGen/ARM64/i16-subreg-extract.ll create mode 100644 test/CodeGen/ARM64/icmp-opt.ll create mode 100644 test/CodeGen/ARM64/illegal-float-ops.ll create mode 100644 test/CodeGen/ARM64/indexed-memory.ll create mode 100644 test/CodeGen/ARM64/inline-asm-error-I.ll create mode 100644 test/CodeGen/ARM64/inline-asm-error-J.ll create mode 100644 test/CodeGen/ARM64/inline-asm-error-K.ll create mode 100644 test/CodeGen/ARM64/inline-asm-error-L.ll create mode 100644 test/CodeGen/ARM64/inline-asm-error-M.ll create mode 100644 test/CodeGen/ARM64/inline-asm-error-N.ll create mode 100644 test/CodeGen/ARM64/inline-asm-zero-reg-error.ll create mode 100644 test/CodeGen/ARM64/inline-asm.ll create mode 100644 test/CodeGen/ARM64/join-reserved.ll create mode 100644 test/CodeGen/ARM64/jumptable.ll create mode 100644 test/CodeGen/ARM64/ld1.ll create mode 100644 test/CodeGen/ARM64/ldp.ll create mode 100644 test/CodeGen/ARM64/ldur.ll create mode 100644 test/CodeGen/ARM64/ldxr-stxr.ll create mode 100644 test/CodeGen/ARM64/leaf-compact-unwind.ll create mode 100644 test/CodeGen/ARM64/leaf.ll create mode 100644 test/CodeGen/ARM64/lit.local.cfg create mode 100644 test/CodeGen/ARM64/long-shift.ll create mode 100644 test/CodeGen/ARM64/memcpy-inline.ll create mode 100644 test/CodeGen/ARM64/memset-inline.ll create mode 100644 test/CodeGen/ARM64/memset-to-bzero.ll create mode 100644 test/CodeGen/ARM64/movi.ll create mode 100644 test/CodeGen/ARM64/mul.ll create mode 100644 test/CodeGen/ARM64/neon-compare-instructions.ll create mode 100644 test/CodeGen/ARM64/patchpoint.ll create mode 100644 test/CodeGen/ARM64/platform-reg.ll create mode 100644 test/CodeGen/ARM64/popcnt.ll create mode 100644 test/CodeGen/ARM64/prefetch.ll create mode 100644 test/CodeGen/ARM64/promote-const.ll create mode 100644 test/CodeGen/ARM64/redzone.ll create mode 100644 test/CodeGen/ARM64/register-offset-addressing.ll create mode 100644 test/CodeGen/ARM64/register-pairing.ll create mode 100644 test/CodeGen/ARM64/regress-f128csel-flags.ll create mode 100644 test/CodeGen/ARM64/return-vector.ll create mode 100644 test/CodeGen/ARM64/returnaddr.ll create mode 100644 test/CodeGen/ARM64/rev.ll create mode 100644 test/CodeGen/ARM64/rounding.ll create mode 100644 test/CodeGen/ARM64/scaled_iv.ll create mode 100644 test/CodeGen/ARM64/scvt.ll create mode 100644 test/CodeGen/ARM64/shifted-sext.ll create mode 100644 test/CodeGen/ARM64/simd-scalar-to-vector.ll create mode 100644 test/CodeGen/ARM64/simplest-elf.ll create mode 100644 test/CodeGen/ARM64/sincos.ll create mode 100644 test/CodeGen/ARM64/sitofp-combine-chains.ll create mode 100644 test/CodeGen/ARM64/sli-sri-opt.ll create mode 100644 test/CodeGen/ARM64/smaxv.ll create mode 100644 test/CodeGen/ARM64/sminv.ll create mode 100644 test/CodeGen/ARM64/spill-lr.ll create mode 100644 test/CodeGen/ARM64/spill.ll create mode 100644 test/CodeGen/ARM64/st1.ll create mode 100644 test/CodeGen/ARM64/stack-no-frame.ll create mode 100644 test/CodeGen/ARM64/stackmap.ll create mode 100644 test/CodeGen/ARM64/stacksave.ll create mode 100644 test/CodeGen/ARM64/stp.ll create mode 100644 test/CodeGen/ARM64/strict-align.ll create mode 100644 test/CodeGen/ARM64/stur.ll create mode 100644 test/CodeGen/ARM64/subvector-extend.ll create mode 100644 test/CodeGen/ARM64/swizzle-tbl-i16-layout.ll create mode 100644 test/CodeGen/ARM64/tbl.ll create mode 100644 test/CodeGen/ARM64/this-return.ll create mode 100644 test/CodeGen/ARM64/tls-darwin.ll create mode 100644 test/CodeGen/ARM64/tls-dynamic-together.ll create mode 100644 test/CodeGen/ARM64/tls-dynamics.ll create mode 100644 test/CodeGen/ARM64/tls-execs.ll create mode 100644 test/CodeGen/ARM64/trap.ll create mode 100644 test/CodeGen/ARM64/trn.ll create mode 100644 test/CodeGen/ARM64/trunc-store.ll create mode 100644 test/CodeGen/ARM64/umaxv.ll create mode 100644 test/CodeGen/ARM64/uminv.ll create mode 100644 test/CodeGen/ARM64/umov.ll create mode 100644 test/CodeGen/ARM64/unaligned_ldst.ll create mode 100644 test/CodeGen/ARM64/uzp.ll create mode 100644 test/CodeGen/ARM64/vaargs.ll create mode 100644 test/CodeGen/ARM64/vabs.ll create mode 100644 test/CodeGen/ARM64/vadd.ll create mode 100644 test/CodeGen/ARM64/vaddlv.ll create mode 100644 test/CodeGen/ARM64/vaddv.ll create mode 100644 test/CodeGen/ARM64/variadic-aapcs.ll create mode 100644 test/CodeGen/ARM64/vbitwise.ll create mode 100644 test/CodeGen/ARM64/vclz.ll create mode 100644 test/CodeGen/ARM64/vcmp.ll create mode 100644 test/CodeGen/ARM64/vcnt.ll create mode 100644 test/CodeGen/ARM64/vcombine.ll create mode 100644 test/CodeGen/ARM64/vcvt.ll create mode 100644 test/CodeGen/ARM64/vcvt_f.ll create mode 100644 test/CodeGen/ARM64/vcvt_f32_su32.ll create mode 100644 test/CodeGen/ARM64/vcvt_n.ll create mode 100644 test/CodeGen/ARM64/vcvt_su32_f32.ll create mode 100644 test/CodeGen/ARM64/vcvtxd_f32_f64.ll create mode 100644 test/CodeGen/ARM64/vecCmpBr.ll create mode 100644 test/CodeGen/ARM64/vecFold.ll create mode 100644 test/CodeGen/ARM64/vector-ext.ll create mode 100644 test/CodeGen/ARM64/vector-imm.ll create mode 100644 test/CodeGen/ARM64/vector-ldst.ll create mode 100644 test/CodeGen/ARM64/vext.ll create mode 100644 test/CodeGen/ARM64/vfloatintrinsics.ll create mode 100644 test/CodeGen/ARM64/vhadd.ll create mode 100644 test/CodeGen/ARM64/vhsub.ll create mode 100644 test/CodeGen/ARM64/virtual_base.ll create mode 100644 test/CodeGen/ARM64/vmax.ll create mode 100644 test/CodeGen/ARM64/vminmaxnm.ll create mode 100644 test/CodeGen/ARM64/vmovn.ll create mode 100644 test/CodeGen/ARM64/vmul.ll create mode 100644 test/CodeGen/ARM64/volatile.ll create mode 100644 test/CodeGen/ARM64/vqadd.ll create mode 100644 test/CodeGen/ARM64/vqsub.ll create mode 100644 test/CodeGen/ARM64/vselect.ll create mode 100644 test/CodeGen/ARM64/vsetcc_fp.ll create mode 100644 test/CodeGen/ARM64/vshift.ll create mode 100644 test/CodeGen/ARM64/vshr.ll create mode 100644 test/CodeGen/ARM64/vshuffle.ll create mode 100644 test/CodeGen/ARM64/vsqrt.ll create mode 100644 test/CodeGen/ARM64/vsra.ll create mode 100644 test/CodeGen/ARM64/vsub.ll create mode 100644 test/CodeGen/ARM64/weak-reference.ll create mode 100644 test/CodeGen/ARM64/xaluo.ll create mode 100644 test/CodeGen/ARM64/zero-cycle-regmov.ll create mode 100644 test/CodeGen/ARM64/zero-cycle-zeroing.ll create mode 100644 test/CodeGen/ARM64/zext.ll create mode 100644 test/CodeGen/ARM64/zextload-unscaled.ll create mode 100644 test/CodeGen/ARM64/zip.ll create mode 100644 test/DebugInfo/ARM64/lit.local.cfg create mode 100644 test/DebugInfo/ARM64/struct_by_value.ll create mode 100644 test/MC/ARM64/advsimd.s create mode 100644 test/MC/ARM64/aliases.s create mode 100644 test/MC/ARM64/arithmetic-encoding.s create mode 100644 test/MC/ARM64/arm64-fixup.s create mode 100644 test/MC/ARM64/basic-a64-instructions.s create mode 100644 test/MC/ARM64/bitfield-encoding.s create mode 100644 test/MC/ARM64/branch-encoding.s create mode 100644 test/MC/ARM64/crypto.s create mode 100644 test/MC/ARM64/diags.s create mode 100644 test/MC/ARM64/directive_loh.s create mode 100644 test/MC/ARM64/elf-relocs.s create mode 100644 test/MC/ARM64/fp-encoding.s create mode 100644 test/MC/ARM64/large-relocs.s create mode 100644 test/MC/ARM64/lit.local.cfg create mode 100644 test/MC/ARM64/logical-encoding.s create mode 100644 test/MC/ARM64/mapping-across-sections.s create mode 100644 test/MC/ARM64/mapping-within-section.s create mode 100644 test/MC/ARM64/memory.s create mode 100644 test/MC/ARM64/separator.s create mode 100644 test/MC/ARM64/simd-ldst.s create mode 100644 test/MC/ARM64/small-data-fixups.s create mode 100644 test/MC/ARM64/system-encoding.s create mode 100644 test/MC/ARM64/tls-modifiers-darwin.s create mode 100644 test/MC/ARM64/tls-relocs.s create mode 100644 test/MC/ARM64/variable-exprs.s create mode 100644 test/MC/Disassembler/ARM64/advsimd.txt create mode 100644 test/MC/Disassembler/ARM64/arithmetic.txt create mode 100644 test/MC/Disassembler/ARM64/bitfield.txt create mode 100644 test/MC/Disassembler/ARM64/branch.txt create mode 100644 test/MC/Disassembler/ARM64/crc32.txt create mode 100644 test/MC/Disassembler/ARM64/crypto.txt create mode 100644 test/MC/Disassembler/ARM64/invalid-logical.txt create mode 100644 test/MC/Disassembler/ARM64/lit.local.cfg create mode 100644 test/MC/Disassembler/ARM64/logical.txt create mode 100644 test/MC/Disassembler/ARM64/memory.txt create mode 100644 test/MC/Disassembler/ARM64/scalar-fp.txt create mode 100644 test/MC/Disassembler/ARM64/system.txt create mode 100644 test/MC/MachO/ARM64/darwin-ARM64-local-label-diff.s create mode 100644 test/MC/MachO/ARM64/darwin-ARM64-reloc.s create mode 100644 test/MC/MachO/ARM64/lit.local.cfg create mode 100644 test/Transforms/GlobalMerge/ARM/arm.ll create mode 100644 test/Transforms/GlobalMerge/ARM/lit.local.cfg create mode 100644 test/Transforms/GlobalMerge/ARM64/arm64.ll create mode 100644 test/Transforms/GlobalMerge/ARM64/lit.local.cfg create mode 100644 test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg create mode 100644 test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll create mode 100644 test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll create mode 100644 test/Transforms/LoopVectorize/ARM64/gather-cost.ll create mode 100644 test/Transforms/LoopVectorize/ARM64/lit.local.cfg (limited to 'test') diff --git a/test/Analysis/CostModel/ARM64/lit.local.cfg b/test/Analysis/CostModel/ARM64/lit.local.cfg new file mode 100644 index 0000000000..84ac9811f0 --- /dev/null +++ b/test/Analysis/CostModel/ARM64/lit.local.cfg @@ -0,0 +1,3 @@ +targets = set(config.root.targets_to_build.split()) +if not 'ARM64' in targets: + config.unsupported = True diff --git a/test/Analysis/CostModel/ARM64/select.ll b/test/Analysis/CostModel/ARM64/select.ll new file mode 100644 index 0000000000..216dc5ddc4 --- /dev/null +++ b/test/Analysis/CostModel/ARM64/select.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" + +; CHECK-LABEL: select +define void @select() { + ; Scalar values + ; CHECK: cost of 1 {{.*}} select + %v1 = select i1 undef, i8 undef, i8 undef + ; CHECK: cost of 1 {{.*}} select + %v2 = select i1 undef, i16 undef, i16 undef + ; CHECK: cost of 1 {{.*}} select + %v3 = select i1 undef, i32 undef, i32 undef + ; CHECK: cost of 1 {{.*}} select + %v4 = select i1 undef, i64 undef, i64 undef + ; CHECK: cost of 1 {{.*}} select + %v5 = select i1 undef, float undef, float undef + ; CHECK: cost of 1 {{.*}} select + %v6 = select i1 undef, double undef, double undef + + ; Vector values - check for vectors that have a high cost because they end up + ; scalarized. + ; CHECK: cost of 320 {{.*}} select + %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef + + ; CHECK: cost of 160 {{.*}} select + %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef + ; CHECK: cost of 320 {{.*}} select + %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef + + ; CHECK: cost of 80 {{.*}} select + %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef + ; CHECK: cost of 160 {{.*}} select + %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef + ; CHECK: cost of 320 {{.*}} select + %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef + + ret void +} diff --git a/test/Analysis/CostModel/ARM64/store.ll b/test/Analysis/CostModel/ARM64/store.ll new file mode 100644 index 0000000000..0c9883cf2a --- /dev/null +++ b/test/Analysis/CostModel/ARM64/store.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" +; CHECK-LABEL: store +define void @store() { + ; Stores of <2 x i64> should be expensive because we don't split them and + ; and unaligned 16b stores have bad performance. + ; CHECK: cost of 12 {{.*}} store + store <2 x i64> undef, <2 x i64> * undef + + ; We scalarize the loads/stores because there is no vector register name for + ; these types (they get extended to v.4h/v.2s). + ; CHECK: cost of 16 {{.*}} store + store <2 x i8> undef, <2 x i8> * undef + ; CHECK: cost of 64 {{.*}} store + store <4 x i8> undef, <4 x i8> * undef + ; CHECK: cost of 16 {{.*}} load + load <2 x i8> * undef + ; CHECK: cost of 64 {{.*}} load + load <4 x i8> * undef + + ret void +} diff --git a/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll b/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll new file mode 100644 index 0000000000..6fb7c3fb5e --- /dev/null +++ b/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -mtriple=arm64-apple-darwin + +; Can't copy or spill / restore CPSR. +; rdar://9105206 + +define fastcc void @t() ssp align 2 { +entry: + br i1 undef, label %bb3.i, label %bb2.i + +bb2.i: ; preds = %entry + br label %bb3.i + +bb3.i: ; preds = %bb2.i, %entry + br i1 undef, label %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71, label %bb.i69 + +bb.i69: ; preds = %bb3.i + br label %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71 + +_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71: ; preds = %bb.i69, %bb3.i + %0 = select i1 undef, float 0.000000e+00, float undef + %1 = fdiv float %0, undef + %2 = fcmp ult float %1, 0xBF847AE140000000 + %storemerge9 = select i1 %2, float %1, float 0.000000e+00 + store float %storemerge9, float* undef, align 4 + br i1 undef, label %bb42, label %bb47 + +bb42: ; preds = %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71 + br i1 undef, label %bb46, label %bb53 + +bb46: ; preds = %bb42 + br label %bb48 + +bb47: ; preds = %_ZN12gjkepa2_impl3EPA6appendERNS0_5sListEPNS0_5sFaceE.exit71 + br label %bb48 + +bb48: ; preds = %bb47, %bb46 + br i1 undef, label %bb1.i14, label %bb.i13 + +bb.i13: ; preds = %bb48 + br label %bb1.i14 + +bb1.i14: ; preds = %bb.i13, %bb48 + br label %bb53 + +bb53: ; preds = %bb1.i14, %bb42 + ret void +} diff --git a/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll b/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll new file mode 100644 index 0000000000..2b083d8049 --- /dev/null +++ b/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -mtriple=arm64-apple-darwin + +; rdar://9146594 + +define void @drt_vsprintf() nounwind ssp { +entry: + %do_tab_convert = alloca i32, align 4 + br i1 undef, label %if.then24, label %if.else295, !dbg !13 + +if.then24: ; preds = %entry + unreachable + +if.else295: ; preds = %entry + call void @llvm.dbg.declare(metadata !{i32* %do_tab_convert}, metadata !16), !dbg !18 + store i32 0, i32* %do_tab_convert, align 4, !dbg !19 + unreachable +} + +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone + +!llvm.dbg.gv = !{!0} +!llvm.dbg.sp = !{!1, !7, !10, !11, !12} + +!0 = metadata !{i32 589876, i32 0, metadata !1, metadata !"vsplive", metadata !"vsplive", metadata !"", metadata !2, i32 617, metadata !6, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] +!1 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"drt_vsprintf", metadata !"drt_vsprintf", metadata !"", i32 616, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 589865, metadata !20} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 589841, metadata !20, i32 12, metadata !"clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ] +!4 = metadata !{i32 589845, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!5 = metadata !{metadata !6} +!6 = metadata !{i32 589860, null, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!7 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"putc_mem", metadata !"putc_mem", metadata !"", i32 30, metadata !8, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!8 = metadata !{i32 589845, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !9, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!9 = metadata !{null} +!10 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"print_double", metadata !"print_double", metadata !"", i32 203, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"print_number", metadata !"print_number", metadata !"", i32 75, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!12 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"get_flags", metadata !"get_flags", metadata !"", i32 508, metadata !8, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!13 = metadata !{i32 653, i32 5, metadata !14, null} +!14 = metadata !{i32 589835, metadata !20, metadata !15, i32 652, i32 35, i32 2} ; [ DW_TAG_lexical_block ] +!15 = metadata !{i32 589835, metadata !20, metadata !1, i32 616, i32 1, i32 0} ; [ DW_TAG_lexical_block ] +!16 = metadata !{i32 590080, metadata !17, metadata !"do_tab_convert", metadata !2, i32 853, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ] +!17 = metadata !{i32 589835, metadata !20, metadata !14, i32 850, i32 12, i32 33} ; [ DW_TAG_lexical_block ] +!18 = metadata !{i32 853, i32 11, metadata !17, null} +!19 = metadata !{i32 853, i32 29, metadata !17, null} +!20 = metadata !{metadata !"print.i", metadata !"/Volumes/Ebi/echeng/radars/r9146594"} +!21 = metadata !{i32 0} diff --git a/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll b/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll new file mode 100644 index 0000000000..6f0ec34fc1 --- /dev/null +++ b/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=arm64 | FileCheck %s +define void @foo(i64 %val) { +; CHECK: foo +; The stack frame store is not 64-bit aligned. Make sure we use an +; instruction that can handle that. +; CHECK: stur x0, [sp, #20] + %a = alloca [49 x i32], align 4 + %p32 = getelementptr inbounds [49 x i32]* %a, i64 0, i64 2 + %p = bitcast i32* %p32 to i64* + store i64 %val, i64* %p, align 8 + ret void +} diff --git a/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll b/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll new file mode 100644 index 0000000000..88232fcc0b --- /dev/null +++ b/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -mtriple=arm64-apple-iOS5.0 + +; CPSR is not allocatable so fast allocatable wouldn't mark them killed. +; rdar://9313272 + +define hidden void @t() nounwind { +entry: + %cmp = icmp eq i32* null, undef + %frombool = zext i1 %cmp to i8 + store i8 %frombool, i8* undef, align 1 + %tmp4 = load i8* undef, align 1 + %tobool = trunc i8 %tmp4 to i1 + br i1 %tobool, label %land.lhs.true, label %if.end + +land.lhs.true: ; preds = %entry + unreachable + +if.end: ; preds = %entry + br i1 undef, label %land.lhs.true14, label %if.end33 + +land.lhs.true14: ; preds = %if.end + unreachable + +if.end33: ; preds = %if.end + unreachable +} diff --git a/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll b/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll new file mode 100644 index 0000000000..ea1cd02ca2 --- /dev/null +++ b/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s + +; Can't fold the increment by 1<<12 into a post-increment load +; rdar://10301335 + +@test_data = common global i32 0, align 4 + +define void @t() nounwind ssp { +; CHECK-LABEL: t: +entry: + br label %for.body + +for.body: +; CHECK: for.body +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}] +; CHECK: add x[[REG:[0-9]+]], +; CHECK: x[[REG]], #4096 + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = shl nsw i64 %indvars.iv, 12 + %add = add nsw i64 %0, 34628173824 + %1 = inttoptr i64 %add to i32* + %2 = load volatile i32* %1, align 4096 + store volatile i32 %2, i32* @test_data, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 200 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll b/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll new file mode 100644 index 0000000000..d47dbb2816 --- /dev/null +++ b/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -march=arm64 + +; The target lowering for integer comparisons was replacing some DAG nodes +; during operation legalization, which resulted in dangling pointers, +; cycles in DAGs, and eventually crashes. This is the testcase for +; one of those crashes. (rdar://10653656) + +define void @test(i1 zeroext %IsArrow) nounwind ssp align 2 { +entry: + br i1 undef, label %return, label %lor.lhs.false + +lor.lhs.false: + br i1 undef, label %return, label %if.end + +if.end: + %tmp.i = load i64* undef, align 8 + %and.i.i.i = and i64 %tmp.i, -16 + br i1 %IsArrow, label %if.else_crit_edge, label %if.end32 + +if.else_crit_edge: + br i1 undef, label %if.end32, label %return + +if.end32: + %0 = icmp ult i32 undef, 3 + %1 = zext i64 %tmp.i to i320 + %.pn.v = select i1 %0, i320 128, i320 64 + %.pn = shl i320 %1, %.pn.v + %ins346392 = or i320 %.pn, 0 + store i320 %ins346392, i320* undef, align 8 + br i1 undef, label %sw.bb.i.i, label %exit + +sw.bb.i.i: + unreachable + +exit: + unreachable + +return: + ret void +} diff --git a/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll b/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll new file mode 100644 index 0000000000..a4d37e4868 --- /dev/null +++ b/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=arm64 | FileCheck %s + +define i32 @foo(<4 x i32> %a, i32 %n) nounwind { +; CHECK-LABEL: foo: +; CHECK: fmov w0, s0 +; CHECK-NEXT: ret + %b = bitcast <4 x i32> %a to i128 + %c = trunc i128 %b to i32 + ret i32 %c +} + +define i64 @bar(<2 x i64> %a, i64 %n) nounwind { +; CHECK-LABEL: bar: +; CHECK: fmov x0, d0 +; CHECK-NEXT: ret + %b = bitcast <2 x i64> %a to i128 + %c = trunc i128 %b to i64 + ret i64 %c +} + diff --git a/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll b/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll new file mode 100644 index 0000000000..d59b0d0043 --- /dev/null +++ b/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march arm64 -mcpu=cyclone | FileCheck %s +; + +@b = private unnamed_addr constant [3 x i32] [i32 1768775988, i32 1685481784, i32 1836253201], align 4 + +; The important thing for this test is that we need an unaligned load of `l_b' +; ("ldr w2, [x1, #8]" in this case). + +; CHECK: adrp x[[PAGE:[0-9]+]], {{l_b@PAGE|.Lb}} +; CHECK: add x[[ADDR:[0-9]+]], x[[PAGE]], {{l_b@PAGEOFF|:lo12:.Lb}} +; CHECK-NEXT: ldr [[VAL:w[0-9]+]], [x[[ADDR]], #8] +; CHECK-NEXT: str [[VAL]], [x0, #8] +; CHECK-NEXT: ldr [[VAL2:x[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: str [[VAL2]], [x0] + +define void @foo(i8* %a) { + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([3 x i32]* @b to i8*), i64 12, i32 4, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind diff --git a/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll b/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll new file mode 100644 index 0000000000..d1840d3594 --- /dev/null +++ b/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -relocation-model=pic < %s | FileCheck %s --check-prefix=CHECK-LINUX +; + +define hidden void @t() optsize ssp { +entry: + store i64 zext (i32 ptrtoint (i64 (i32)* @x to i32) to i64), i64* undef, align 8 +; CHECK: adrp x{{[0-9]+}}, _x@GOTPAGE +; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, _x@GOTPAGEOFF] +; CHECK-NEXT: and x{{[0-9]+}}, x{{[0-9]+}}, #0xffffffff +; CHECK-NEXT: str x{{[0-9]+}}, [x{{[0-9]+}}] + unreachable +} + +declare i64 @x(i32) optsize + +; Worth checking the Linux code is sensible too: only way to access +; the GOT is via a 64-bit load. Just loading wN is unacceptable +; (there's no ELF relocation to do that). + +; CHECK-LINUX: adrp {{x[0-9]+}}, :got:x +; CHECK-LINUX: ldr {{x[0-9]+}}, [{{x[0-9]+}}, :got_lo12:x] diff --git a/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll b/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll new file mode 100644 index 0000000000..4b037db9c8 --- /dev/null +++ b/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll @@ -0,0 +1,50 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios -verify-machineinstrs | FileCheck %s + +; LdStOpt bug created illegal instruction: +; %D1, %D2 = LDPSi %X0, 1 +; rdar://11512047 + +%0 = type opaque +%struct.CGRect = type { %struct.CGPoint, %struct.CGSize } +%struct.CGPoint = type { double, double } +%struct.CGSize = type { double, double } + +@"OBJC_IVAR_$_UIScreen._bounds" = external hidden global i64, section "__DATA, __objc_ivar", align 8 + +define hidden %struct.CGRect @t(%0* nocapture %self, i8* nocapture %_cmd) nounwind readonly optsize ssp { +entry: +; CHECK-LABEL: t: +; CHECK: ldp d{{[0-9]+}}, d{{[0-9]+}} + %ivar = load i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4 + %0 = bitcast %0* %self to i8* + %add.ptr = getelementptr inbounds i8* %0, i64 %ivar + %add.ptr10.0 = bitcast i8* %add.ptr to double* + %tmp11 = load double* %add.ptr10.0, align 8 + %add.ptr.sum = add i64 %ivar, 8 + %add.ptr10.1 = getelementptr inbounds i8* %0, i64 %add.ptr.sum + %1 = bitcast i8* %add.ptr10.1 to double* + %tmp12 = load double* %1, align 8 + %add.ptr.sum17 = add i64 %ivar, 16 + %add.ptr4.1 = getelementptr inbounds i8* %0, i64 %add.ptr.sum17 + %add.ptr4.1.0 = bitcast i8* %add.ptr4.1 to double* + %tmp = load double* %add.ptr4.1.0, align 8 + %add.ptr4.1.sum = add i64 %ivar, 24 + %add.ptr4.1.1 = getelementptr inbounds i8* %0, i64 %add.ptr4.1.sum + %2 = bitcast i8* %add.ptr4.1.1 to double* + %tmp5 = load double* %2, align 8 + %insert14 = insertvalue %struct.CGPoint undef, double %tmp11, 0 + %insert16 = insertvalue %struct.CGPoint %insert14, double %tmp12, 1 + %insert = insertvalue %struct.CGRect undef, %struct.CGPoint %insert16, 0 + %insert7 = insertvalue %struct.CGSize undef, double %tmp, 0 + %insert9 = insertvalue %struct.CGSize %insert7, double %tmp5, 1 + %insert3 = insertvalue %struct.CGRect %insert, %struct.CGSize %insert9, 1 + ret %struct.CGRect %insert3 +} + +!llvm.module.flags = !{!0, !1, !2, !3} + +!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2} +!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0} +!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"} +!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0} +!4 = metadata !{} diff --git a/test/CodeGen/ARM64/2012-06-06-FPToUI.ll b/test/CodeGen/ARM64/2012-06-06-FPToUI.ll new file mode 100644 index 0000000000..dda4ff5bad --- /dev/null +++ b/test/CodeGen/ARM64/2012-06-06-FPToUI.ll @@ -0,0 +1,65 @@ +; RUN: llc -march=arm64 -O0 < %s | FileCheck %s +; RUN: llc -march=arm64 -O3 < %s | FileCheck %s + +@.str = private unnamed_addr constant [9 x i8] c"%lf %lu\0A\00", align 1 +@.str1 = private unnamed_addr constant [8 x i8] c"%lf %u\0A\00", align 1 +@.str2 = private unnamed_addr constant [8 x i8] c"%f %lu\0A\00", align 1 +@.str3 = private unnamed_addr constant [7 x i8] c"%f %u\0A\00", align 1 + +define void @testDouble(double %d) ssp { +; CHECK: fcvtzu x{{.}}, d{{.}} +; CHECK: fcvtzu w{{.}}, d{{.}} +entry: + %d.addr = alloca double, align 8 + store double %d, double* %d.addr, align 8 + %0 = load double* %d.addr, align 8 + %1 = load double* %d.addr, align 8 + %conv = fptoui double %1 to i64 + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), double %0, i64 %conv) + %2 = load double* %d.addr, align 8 + %3 = load double* %d.addr, align 8 + %conv1 = fptoui double %3 to i32 + %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str1, i32 0, i32 0), double %2, i32 %conv1) + ret void +} + +declare i32 @printf(i8*, ...) + +define void @testFloat(float %f) ssp { +; CHECK: fcvtzu x{{.}}, s{{.}} +; CHECK: fcvtzu w{{.}}, s{{.}} +entry: + %f.addr = alloca float, align 4 + store float %f, float* %f.addr, align 4 + %0 = load float* %f.addr, align 4 + %conv = fpext float %0 to double + %1 = load float* %f.addr, align 4 + %conv1 = fptoui float %1 to i64 + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str2, i32 0, i32 0), double %conv, i64 %conv1) + %2 = load float* %f.addr, align 4 + %conv2 = fpext float %2 to double + %3 = load float* %f.addr, align 4 + %conv3 = fptoui float %3 to i32 + %call4 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str3, i32 0, i32 0), double %conv2, i32 %conv3) + ret void +} + +define i32 @main(i32 %argc, i8** %argv) ssp { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + store i32 0, i32* %retval + store i32 %argc, i32* %argc.addr, align 4 + store i8** %argv, i8*** %argv.addr, align 8 + call void @testDouble(double 1.159198e+01) + call void @testFloat(float 0x40272F1800000000) + ret i32 0 +} + +!llvm.module.flags = !{!0, !1, !2, !3} + +!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2} +!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0} +!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"} +!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0} diff --git a/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll b/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll new file mode 100644 index 0000000000..55ecfb5d2b --- /dev/null +++ b/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios +; rdar://11849816 + +@shlib_path_substitutions = external hidden unnamed_addr global i8**, align 8 + +declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone + +declare noalias i8* @xmalloc(i64) optsize + +declare i64 @strlen(i8* nocapture) nounwind readonly optsize + +declare i8* @__strcpy_chk(i8*, i8*, i64) nounwind optsize + +declare i8* @__strcat_chk(i8*, i8*, i64) nounwind optsize + +declare noalias i8* @xstrdup(i8*) optsize + +define i8* @dyld_fix_path(i8* %path) nounwind optsize ssp { +entry: + br i1 undef, label %if.end56, label %for.cond + +for.cond: ; preds = %entry + br i1 undef, label %for.cond10, label %for.body + +for.body: ; preds = %for.cond + unreachable + +for.cond10: ; preds = %for.cond + br i1 undef, label %if.end56, label %for.body14 + +for.body14: ; preds = %for.cond10 + %call22 = tail call i64 @strlen(i8* undef) nounwind optsize + %sext = shl i64 %call22, 32 + %conv30 = ashr exact i64 %sext, 32 + %add29 = sub i64 0, %conv30 + %sub = add i64 %add29, 0 + %add31 = shl i64 %sub, 32 + %sext59 = add i64 %add31, 4294967296 + %conv33 = ashr exact i64 %sext59, 32 + %call34 = tail call noalias i8* @xmalloc(i64 %conv33) nounwind optsize + br i1 undef, label %cond.false45, label %cond.true43 + +cond.true43: ; preds = %for.body14 + unreachable + +cond.false45: ; preds = %for.body14 + %add.ptr = getelementptr inbounds i8* %path, i64 %conv30 + unreachable + +if.end56: ; preds = %for.cond10, %entry + ret i8* null +} + +declare i32 @strncmp(i8* nocapture, i8* nocapture, i64) nounwind readonly optsize + +declare i8* @strcpy(i8*, i8* nocapture) nounwind diff --git a/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll b/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll new file mode 100644 index 0000000000..b40a581d61 --- /dev/null +++ b/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -fp-contract=fast | FileCheck %s --check-prefix=FAST + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" +target triple = "arm64-apple-ios7.0.0" + +;FAST-LABEL: _Z9example25v: +;FAST: fcmgt.4s +;FAST: ret + +;CHECK-LABEL: _Z9example25v: +;CHECK: fcmgt.4s +;CHECK: ret + +define <4 x i32> @_Z9example25v( <4 x float> %N0, <4 x float> %N1) { + %A = fcmp olt <4 x float> %N0, %N1 + %B = zext <4 x i1> %A to <4 x i32> + ret <4 x i32> %B +} diff --git a/test/CodeGen/ARM64/2013-01-23-frem-crash.ll b/test/CodeGen/ARM64/2013-01-23-frem-crash.ll new file mode 100644 index 0000000000..94511243a4 --- /dev/null +++ b/test/CodeGen/ARM64/2013-01-23-frem-crash.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=arm64 +; Make sure we are not crashing on this test. + +define void @autogen_SD13158() { +entry: + %B26 = frem float 0.000000e+00, undef + br i1 undef, label %CF, label %CF77 + +CF: ; preds = %CF, %CF76 + store float %B26, float* undef + br i1 undef, label %CF, label %CF77 + +CF77: ; preds = %CF + ret void +} diff --git a/test/CodeGen/ARM64/2013-01-23-sext-crash.ll b/test/CodeGen/ARM64/2013-01-23-sext-crash.ll new file mode 100644 index 0000000000..404027bfd5 --- /dev/null +++ b/test/CodeGen/ARM64/2013-01-23-sext-crash.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -march=arm64 + +; Make sure we are not crashing on this test. + +define void @autogen_SD12881() { +BB: + %B17 = ashr <4 x i32> zeroinitializer, zeroinitializer + br label %CF + +CF: ; preds = %CF83, %CF, %BB + br i1 undef, label %CF, label %CF83 + +CF83: ; preds = %CF + %FC70 = sitofp <4 x i32> %B17 to <4 x double> + br label %CF +} + + +define void @autogen_SD12881_2() { +BB: + %B17 = ashr <4 x i32> zeroinitializer, zeroinitializer + br label %CF + +CF: ; preds = %CF83, %CF, %BB + br i1 undef, label %CF, label %CF83 + +CF83: ; preds = %CF + %FC70 = uitofp <4 x i32> %B17 to <4 x double> + br label %CF +} + +define void @_Z12my_example2bv() nounwind noinline ssp { +entry: + %0 = fptosi <2 x double> undef to <2 x i32> + store <2 x i32> %0, <2 x i32>* undef, align 8 + ret void +} diff --git a/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll b/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll new file mode 100644 index 0000000000..70e745fc57 --- /dev/null +++ b/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple + +;CHECK-LABEL: Shuff: +;CHECK: tbl.8b +;CHECK: ret +define <8 x i8 > @Shuff(<8 x i8> %in, <8 x i8>* %out) nounwind ssp { + %value = shufflevector <8 x i8> %in, <8 x i8> zeroinitializer, <8 x i32> + ret <8 x i8> %value +} + + diff --git a/test/CodeGen/ARM64/AdvSIMD-Scalar.ll b/test/CodeGen/ARM64/AdvSIMD-Scalar.ll new file mode 100644 index 0000000000..6397ac54d3 --- /dev/null +++ b/test/CodeGen/ARM64/AdvSIMD-Scalar.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -arm64-simd-scalar=true -asm-verbose=false | FileCheck %s +; +define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone { +; CHECK-LABEL: bar: +; CHECK: add.2d v[[REG:[0-9]+]], v0, v1 +; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1 +; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1 + %add = add <2 x i64> %a, %b + %vgetq_lane = extractelement <2 x i64> %add, i32 0 + %vgetq_lane2 = extractelement <2 x i64> %b, i32 0 + %add3 = add i64 %vgetq_lane, %vgetq_lane2 + %sub = sub i64 %vgetq_lane, %vgetq_lane2 + %vecinit = insertelement <2 x i64> undef, i64 %add3, i32 0 + %vecinit8 = insertelement <2 x i64> %vecinit, i64 %sub, i32 1 + ret <2 x i64> %vecinit8 +} + +define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { +; CHECK-LABEL: subdd_su64: +; CHECK: sub d0, d1, d0 +; CHECK-NEXT: ret + %vecext = extractelement <2 x i64> %a, i32 0 + %vecext1 = extractelement <2 x i64> %b, i32 0 + %sub.i = sub nsw i64 %vecext1, %vecext + %retval = bitcast i64 %sub.i to double + ret double %retval +} + +define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { +; CHECK-LABEL: vaddd_su64: +; CHECK: add d0, d1, d0 +; CHECK-NEXT: ret + %vecext = extractelement <2 x i64> %a, i32 0 + %vecext1 = extractelement <2 x i64> %b, i32 0 + %add.i = add nsw i64 %vecext1, %vecext + %retval = bitcast i64 %add.i to double + ret double %retval +} diff --git a/test/CodeGen/ARM64/aapcs.ll b/test/CodeGen/ARM64/aapcs.ll new file mode 100644 index 0000000000..27d2aa7b77 --- /dev/null +++ b/test/CodeGen/ARM64/aapcs.ll @@ -0,0 +1,86 @@ +; RUN: llc -mtriple=arm64-linux-gnu -enable-misched=false < %s | FileCheck %s + +@var = global i32 0, align 4 + +define i128 @test_i128_align(i32, i128 %arg, i32 %after) { + store i32 %after, i32* @var, align 4 +; CHECK: str w4, [{{x[0-9]+}}, :lo12:var] + + ret i128 %arg +; CHECK: mov x0, x2 +; CHECK: mov x1, x3 +} + +@var64 = global i64 0, align 8 + + ; Check stack slots are 64-bit at all times. +define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short, + i32 %int, i64 %long) { + ; Part of last store. Blasted scheduler. +; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32] + + %ext_bool = zext i1 %bool to i64 + store volatile i64 %ext_bool, i64* @var64, align 8 +; CHECK: ldr w[[EXT:[0-9]+]], [sp] +; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1 +; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64] + + %ext_char = zext i8 %char to i64 + store volatile i64 %ext_char, i64* @var64, align 8 +; CHECK: ldrb w[[EXT:[0-9]+]], [sp, #8] +; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] + + %ext_short = zext i16 %short to i64 + store volatile i64 %ext_short, i64* @var64, align 8 +; CHECK: ldrh w[[EXT:[0-9]+]], [sp, #16] +; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] + + %ext_int = zext i32 %int to i64 + store volatile i64 %ext_int, i64* @var64, align 8 +; CHECK: ldr w[[EXT:[0-9]+]], [sp, #24] +; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] + + store volatile i64 %long, i64* @var64, align 8 +; CHECK: str [[LONG]], [{{x[0-9]+}}, :lo12:var64] + + ret void +} + +; Make sure the callee does extensions (in the absence of zext/sext +; keyword on args) while we're here. + +define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) { + %ext_bool = zext i1 %bool to i64 + store volatile i64 %ext_bool, i64* @var64 +; CHECK: and [[EXT:x[0-9]+]], x0, #0x1 +; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64] + + %ext_char = sext i8 %char to i64 + store volatile i64 %ext_char, i64* @var64 +; CHECK: sxtb [[EXT:x[0-9]+]], x1 +; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64] + + %ext_short = zext i16 %short to i64 + store volatile i64 %ext_short, i64* @var64 +; CHECK: and [[EXT:x[0-9]+]], x2, #0xffff +; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64] + + %ext_int = zext i32 %int to i64 + store volatile i64 %ext_int, i64* @var64 +; CHECK: uxtw [[EXT:x[0-9]+]], x3 +; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64] + + ret void +} + +declare void @variadic(i32 %a, ...) + + ; Under AAPCS variadic functions have the same calling convention as + ; others. The extra arguments should go in registers rather than on the stack. +define void @test_variadic() { + call void(i32, ...)* @variadic(i32 0, i64 1, double 2.0) +; CHECK: fmov d0, #2.0 +; CHECK: orr x1, xzr, #0x1 +; CHECK: bl variadic + ret void +} diff --git a/test/CodeGen/ARM64/abi-varargs.ll b/test/CodeGen/ARM64/abi-varargs.ll new file mode 100644 index 0000000000..92db392cd0 --- /dev/null +++ b/test/CodeGen/ARM64/abi-varargs.ll @@ -0,0 +1,191 @@ +; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s +target triple = "arm64-apple-ios7.0.0" + +; rdar://13625505 +; Here we have 9 fixed integer arguments the 9th argument in on stack, the +; varargs start right after at 8-byte alignment. +define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp { +; CHECK-LABEL: fn9: +; 9th fixed argument +; CHECK: ldr {{w[0-9]+}}, [sp, #64] +; CHECK: add [[ARGS:x[0-9]+]], sp, #72 +; CHECK: add {{x[0-9]+}}, [[ARGS]], #8 +; First vararg +; CHECK: ldr {{w[0-9]+}}, [sp, #72] +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8 +; Second vararg +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}] +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8 +; Third vararg +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}] + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + %7 = alloca i32, align 4 + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + %args = alloca i8*, align 8 + %a10 = alloca i32, align 4 + %a11 = alloca i32, align 4 + %a12 = alloca i32, align 4 + store i32 %a1, i32* %1, align 4 + store i32 %a2, i32* %2, align 4 + store i32 %a3, i32* %3, align 4 + store i32 %a4, i32* %4, align 4 + store i32 %a5, i32* %5, align 4 + store i32 %a6, i32* %6, align 4 + store i32 %a7, i32* %7, align 4 + store i32 %a8, i32* %8, align 4 + store i32 %a9, i32* %9, align 4 + %10 = bitcast i8** %args to i8* + call void @llvm.va_start(i8* %10) + %11 = va_arg i8** %args, i32 + store i32 %11, i32* %a10, align 4 + %12 = va_arg i8** %args, i32 + store i32 %12, i32* %a11, align 4 + %13 = va_arg i8** %args, i32 + store i32 %13, i32* %a12, align 4 + ret void +} + +declare void @llvm.va_start(i8*) nounwind + +define i32 @main() nounwind ssp { +; CHECK-LABEL: main: +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] +; CHECK: str {{x[0-9]+}}, [sp, #8] +; CHECK: str {{w[0-9]+}}, [sp] + %a1 = alloca i32, align 4 + %a2 = alloca i32, align 4 + %a3 = alloca i32, align 4 + %a4 = alloca i32, align 4 + %a5 = alloca i32, align 4 + %a6 = alloca i32, align 4 + %a7 = alloca i32, align 4 + %a8 = alloca i32, align 4 + %a9 = alloca i32, align 4 + %a10 = alloca i32, align 4 + %a11 = alloca i32, align 4 + %a12 = alloca i32, align 4 + store i32 1, i32* %a1, align 4 + store i32 2, i32* %a2, align 4 + store i32 3, i32* %a3, align 4 + store i32 4, i32* %a4, align 4 + store i32 5, i32* %a5, align 4 + store i32 6, i32* %a6, align 4 + store i32 7, i32* %a7, align 4 + store i32 8, i32* %a8, align 4 + store i32 9, i32* %a9, align 4 + store i32 10, i32* %a10, align 4 + store i32 11, i32* %a11, align 4 + store i32 12, i32* %a12, align 4 + %1 = load i32* %a1, align 4 + %2 = load i32* %a2, align 4 + %3 = load i32* %a3, align 4 + %4 = load i32* %a4, align 4 + %5 = load i32* %a5, align 4 + %6 = load i32* %a6, align 4 + %7 = load i32* %a7, align 4 + %8 = load i32* %a8, align 4 + %9 = load i32* %a9, align 4 + %10 = load i32* %a10, align 4 + %11 = load i32* %a11, align 4 + %12 = load i32* %a12, align 4 + call void (i32, i32, i32, i32, i32, i32, i32, i32, i32, ...)* @fn9(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12) + ret i32 0 +} + +;rdar://13668483 +@.str = private unnamed_addr constant [4 x i8] c"fmt\00", align 1 +define void @foo(i8* %fmt, ...) nounwind { +entry: +; CHECK-LABEL: foo: +; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8 +; CHECK: ldr {{w[0-9]+}}, [sp, #48] +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #15 +; CHECK: and x[[ADDR:[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0 +; CHECK: ldr {{q[0-9]+}}, [x[[ADDR]]] + %fmt.addr = alloca i8*, align 8 + %args = alloca i8*, align 8 + %vc = alloca i32, align 4 + %vv = alloca <4 x i32>, align 16 + store i8* %fmt, i8** %fmt.addr, align 8 + %args1 = bitcast i8** %args to i8* + call void @llvm.va_start(i8* %args1) + %0 = va_arg i8** %args, i32 + store i32 %0, i32* %vc, align 4 + %1 = va_arg i8** %args, <4 x i32> + store <4 x i32> %1, <4 x i32>* %vv, align 16 + ret void +} + +define void @bar(i32 %x, <4 x i32> %y) nounwind { +entry: +; CHECK-LABEL: bar: +; CHECK: str {{q[0-9]+}}, [sp, #16] +; CHECK: str {{x[0-9]+}}, [sp] + %x.addr = alloca i32, align 4 + %y.addr = alloca <4 x i32>, align 16 + store i32 %x, i32* %x.addr, align 4 + store <4 x i32> %y, <4 x i32>* %y.addr, align 16 + %0 = load i32* %x.addr, align 4 + %1 = load <4 x i32>* %y.addr, align 16 + call void (i8*, ...)* @foo(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %0, <4 x i32> %1) + ret void +} + +; rdar://13668927 +; When passing 16-byte aligned small structs as vararg, make sure the caller +; side is 16-byte aligned on stack. +%struct.s41 = type { i32, i16, i32, i16 } +define void @foo2(i8* %fmt, ...) nounwind { +entry: +; CHECK-LABEL: foo2: +; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8 +; CHECK: ldr {{w[0-9]+}}, [sp, #48] +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #15 +; CHECK: and x[[ADDR:[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0 +; CHECK: ldr {{q[0-9]+}}, [x[[ADDR]]] + %fmt.addr = alloca i8*, align 8 + %args = alloca i8*, align 8 + %vc = alloca i32, align 4 + %vs = alloca %struct.s41, align 16 + store i8* %fmt, i8** %fmt.addr, align 8 + %args1 = bitcast i8** %args to i8* + call void @llvm.va_start(i8* %args1) + %0 = va_arg i8** %args, i32 + store i32 %0, i32* %vc, align 4 + %ap.cur = load i8** %args + %1 = getelementptr i8* %ap.cur, i32 15 + %2 = ptrtoint i8* %1 to i64 + %3 = and i64 %2, -16 + %ap.align = inttoptr i64 %3 to i8* + %ap.next = getelementptr i8* %ap.align, i32 16 + store i8* %ap.next, i8** %args + %4 = bitcast i8* %ap.align to %struct.s41* + %5 = bitcast %struct.s41* %vs to i8* + %6 = bitcast %struct.s41* %4 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* %6, i64 16, i32 16, i1 false) + ret void +} +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +define void @bar2(i32 %x, i128 %s41.coerce) nounwind { +entry: +; CHECK-LABEL: bar2: +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] +; CHECK: str {{x[0-9]+}}, [sp] + %x.addr = alloca i32, align 4 + %s41 = alloca %struct.s41, align 16 + store i32 %x, i32* %x.addr, align 4 + %0 = bitcast %struct.s41* %s41 to i128* + store i128 %s41.coerce, i128* %0, align 1 + %1 = load i32* %x.addr, align 4 + %2 = bitcast %struct.s41* %s41 to i128* + %3 = load i128* %2, align 1 + call void (i8*, ...)* @foo2(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %1, i128 %3) + ret void +} diff --git a/test/CodeGen/ARM64/abi.ll b/test/CodeGen/ARM64/abi.ll new file mode 100644 index 0000000000..a7693b6ba9 --- /dev/null +++ b/test/CodeGen/ARM64/abi.ll @@ -0,0 +1,236 @@ +; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s +; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s +target triple = "arm64-apple-darwin" + +; rdar://9932559 +define i64 @i8i16callee(i64 %a1, i64 %a2, i64 %a3, i8 signext %a4, i16 signext %a5, i64 %a6, i64 %a7, i64 %a8, i8 signext %b1, i16 signext %b2, i8 signext %b3, i8 signext %b4) nounwind readnone noinline { +entry: +; CHECK-LABEL: i8i16callee: +; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5. +; They are i8, i16, i8 and i8. +; CHECK: ldrsb {{w[0-9]+}}, [sp, #5] +; CHECK: ldrsh {{w[0-9]+}}, [sp, #2] +; CHECK: ldrsb {{w[0-9]+}}, [sp] +; CHECK: ldrsb {{w[0-9]+}}, [sp, #4] +; FAST-LABEL: i8i16callee: +; FAST: ldrb {{w[0-9]+}}, [sp, #5] +; FAST: ldrb {{w[0-9]+}}, [sp, #4] +; FAST: ldrh {{w[0-9]+}}, [sp, #2] +; FAST: ldrb {{w[0-9]+}}, [sp] + %conv = sext i8 %a4 to i64 + %conv3 = sext i16 %a5 to i64 + %conv8 = sext i8 %b1 to i64 + %conv9 = sext i16 %b2 to i64 + %conv11 = sext i8 %b3 to i64 + %conv13 = sext i8 %b4 to i64 + %add10 = add i64 %a2, %a1 + %add12 = add i64 %add10, %a3 + %add14 = add i64 %add12, %conv + %add = add i64 %add14, %conv3 + %add1 = add i64 %add, %a6 + %add2 = add i64 %add1, %a7 + %add4 = add i64 %add2, %a8 + %add5 = add i64 %add4, %conv8 + %add6 = add i64 %add5, %conv9 + %add7 = add i64 %add6, %conv11 + %add15 = add i64 %add7, %conv13 + %sext = shl i64 %add15, 32 + %conv17 = ashr exact i64 %sext, 32 + ret i64 %conv17 +} + +define i32 @i8i16caller() nounwind readnone { +entry: +; CHECK: i8i16caller +; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5. +; They are i8, i16, i8 and i8. +; CHECK: strb {{w[0-9]+}}, [sp, #5] +; CHECK: strb {{w[0-9]+}}, [sp, #4] +; CHECK: strh {{w[0-9]+}}, [sp, #2] +; CHECK: strb {{w[0-9]+}}, [sp] +; CHECK: bl +; FAST: i8i16caller +; FAST: strb {{w[0-9]+}}, [sp] +; FAST: strh {{w[0-9]+}}, [sp, #2] +; FAST: strb {{w[0-9]+}}, [sp, #4] +; FAST: strb {{w[0-9]+}}, [sp, #5] +; FAST: bl + %call = tail call i64 @i8i16callee(i64 0, i64 1, i64 2, i8 signext 3, i16 signext 4, i64 5, i64 6, i64 7, i8 signext 97, i16 signext 98, i8 signext 99, i8 signext 100) + %conv = trunc i64 %call to i32 + ret i32 %conv +} + +; rdar://12651543 +define double @circle_center([2 x float] %a) nounwind ssp { + %call = tail call double @ext([2 x float] %a) nounwind +; CHECK: circle_center +; CHECK: bl + ret double %call +} +declare double @ext([2 x float]) + +; rdar://12656141 +; 16-byte vector should be aligned at 16-byte when passing on stack. +; A double argument will be passed on stack, so vecotr should be at sp+16. +define double @fixed_4i(<4 x i32>* nocapture %in) nounwind { +entry: +; CHECK: fixed_4i +; CHECK: str [[REG_1:q[0-9]+]], [sp, #16] +; FAST: fixed_4i +; FAST: mov x[[ADDR:[0-9]+]], sp +; FAST: str [[REG_1:q[0-9]+]], [x[[ADDR]], #16] + %0 = load <4 x i32>* %in, align 16 + %call = tail call double @args_vec_4i(double 3.000000e+00, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, double 3.000000e+00, <4 x i32> %0, i8 signext 3) + ret double %call +} +declare double @args_vec_4i(double, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, double, <4 x i32>, i8 signext) + +; rdar://12695237 +; d8 at sp, i in register w0. +@g_d = common global double 0.000000e+00, align 8 +define void @test1(float %f1, double %d1, double %d2, double %d3, double %d4, + double %d5, double %d6, double %d7, double %d8, i32 %i) nounwind ssp { +entry: +; CHECK: test1 +; CHECK: ldr [[REG_1:d[0-9]+]], [sp] +; CHECK: scvtf [[REG_2:s[0-9]+]], w0 +; CHECK: fadd s0, [[REG_2]], s0 + %conv = sitofp i32 %i to float + %add = fadd float %conv, %f1 + %conv1 = fpext float %add to double + %add2 = fadd double %conv1, %d7 + %add3 = fadd double %add2, %d8 + store double %add3, double* @g_d, align 8 + ret void +} + +; i9 at sp, d1 in register s0. +define void @test2(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, + i32 %i7, i32 %i8, i32 %i9, float %d1) nounwind ssp { +entry: +; CHECK: test2 +; CHECK: scvtf [[REG_2:s[0-9]+]], w0 +; CHECK: fadd s0, [[REG_2]], s0 +; CHECK: ldr [[REG_1:s[0-9]+]], [sp] + %conv = sitofp i32 %i1 to float + %add = fadd float %conv, %d1 + %conv1 = fpext float %add to double + %conv2 = sitofp i32 %i8 to double + %add3 = fadd double %conv2, %conv1 + %conv4 = sitofp i32 %i9 to double + %add5 = fadd double %conv4, %add3 + store double %add5, double* @g_d, align 8 + ret void +} + +; rdar://12648441 +; Check alignment on stack for v64, f64, i64, f32, i32. +define double @test3(<2 x i32>* nocapture %in) nounwind { +entry: +; CHECK: test3 +; CHECK: str [[REG_1:d[0-9]+]], [sp, #8] +; FAST: test3 +; FAST: mov x[[ADDR:[0-9]+]], sp +; FAST: str [[REG_1:d[0-9]+]], [x[[ADDR]], #8] + %0 = load <2 x i32>* %in, align 8 + %call = tail call double @args_vec_2i(double 3.000000e+00, <2 x i32> %0, + <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, + <2 x i32> %0, float 3.000000e+00, <2 x i32> %0, i8 signext 3) + ret double %call +} +declare double @args_vec_2i(double, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, + <2 x i32>, <2 x i32>, <2 x i32>, float, <2 x i32>, i8 signext) + +define double @test4(double* nocapture %in) nounwind { +entry: +; CHECK: test4 +; CHECK: str [[REG_1:d[0-9]+]], [sp, #8] +; CHECK: str [[REG_2:w[0-9]+]], [sp] +; CHECK: orr w0, wzr, #0x3 + %0 = load double* %in, align 8 + %call = tail call double @args_f64(double 3.000000e+00, double %0, double %0, + double %0, double %0, double %0, double %0, double %0, + float 3.000000e+00, double %0, i8 signext 3) + ret double %call +} +declare double @args_f64(double, double, double, double, double, double, double, + double, float, double, i8 signext) + +define i64 @test5(i64* nocapture %in) nounwind { +entry: +; CHECK: test5 +; CHECK: strb [[REG_3:w[0-9]+]], [sp, #16] +; CHECK: str [[REG_1:x[0-9]+]], [sp, #8] +; CHECK: str [[REG_2:w[0-9]+]], [sp] + %0 = load i64* %in, align 8 + %call = tail call i64 @args_i64(i64 3, i64 %0, i64 %0, i64 %0, i64 %0, i64 %0, + i64 %0, i64 %0, i32 3, i64 %0, i8 signext 3) + ret i64 %call +} +declare i64 @args_i64(i64, i64, i64, i64, i64, i64, i64, i64, i32, i64, + i8 signext) + +define i32 @test6(float* nocapture %in) nounwind { +entry: +; CHECK: test6 +; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8] +; CHECK: str [[REG_1:s[0-9]+]], [sp, #4] +; CHECK: strh [[REG_3:w[0-9]+]], [sp] + %0 = load float* %in, align 4 + %call = tail call i32 @args_f32(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, + i32 7, i32 8, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, + float 6.0, float 7.0, float 8.0, i16 signext 3, float %0, + i8 signext 3) + ret i32 %call +} +declare i32 @args_f32(i32, i32, i32, i32, i32, i32, i32, i32, + float, float, float, float, float, float, float, float, + i16 signext, float, i8 signext) + +define i32 @test7(i32* nocapture %in) nounwind { +entry: +; CHECK: test7 +; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8] +; CHECK: str [[REG_1:w[0-9]+]], [sp, #4] +; CHECK: strh [[REG_3:w[0-9]+]], [sp] + %0 = load i32* %in, align 4 + %call = tail call i32 @args_i32(i32 3, i32 %0, i32 %0, i32 %0, i32 %0, i32 %0, + i32 %0, i32 %0, i16 signext 3, i32 %0, i8 signext 4) + ret i32 %call +} +declare i32 @args_i32(i32, i32, i32, i32, i32, i32, i32, i32, i16 signext, i32, + i8 signext) + +define i32 @test8(i32 %argc, i8** nocapture %argv) nounwind { +entry: +; CHECK: test8 +; CHECK: strb {{w[0-9]+}}, [sp, #3] +; CHECK: strb wzr, [sp, #2] +; CHECK: strb {{w[0-9]+}}, [sp, #1] +; CHECK: strb wzr, [sp] +; CHECK: bl +; FAST: test8 +; FAST: strb {{w[0-9]+}}, [sp] +; FAST: strb {{w[0-9]+}}, [sp, #1] +; FAST: strb {{w[0-9]+}}, [sp, #2] +; FAST: strb {{w[0-9]+}}, [sp, #3] +; FAST: bl + tail call void @args_i1(i1 zeroext false, i1 zeroext true, i1 zeroext false, + i1 zeroext true, i1 zeroext false, i1 zeroext true, + i1 zeroext false, i1 zeroext true, i1 zeroext false, + i1 zeroext true, i1 zeroext false, i1 zeroext true) + ret i32 0 +} + +declare void @args_i1(i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext, + i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext, + i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext) + +define i32 @i1_stack_incoming(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, + i64 %g, i64 %h, i64 %i, i1 zeroext %j) { +; CHECK-LABEL: i1_stack_incoming: +; CHECK: ldrb w0, [sp, #8] +; CHECK: ret + %v = zext i1 %j to i32 + ret i32 %v +} diff --git a/test/CodeGen/ARM64/abi_align.ll b/test/CodeGen/ARM64/abi_align.ll new file mode 100644 index 0000000000..61c661e48f --- /dev/null +++ b/test/CodeGen/ARM64/abi_align.ll @@ -0,0 +1,529 @@ +; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s +; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s +target triple = "arm64-apple-darwin" + +; rdar://12648441 +; Generated from arm64-arguments.c with -O2. +; Test passing structs with size < 8, < 16 and > 16 +; with alignment of 16 and without + +; Structs with size < 8 +%struct.s38 = type { i32, i16 } +; With alignment of 16, the size will be padded to multiple of 16 bytes. +%struct.s39 = type { i32, i16, [10 x i8] } +; Structs with size < 16 +%struct.s40 = type { i32, i16, i32, i16 } +%struct.s41 = type { i32, i16, i32, i16 } +; Structs with size > 16 +%struct.s42 = type { i32, i16, i32, i16, i32, i16 } +%struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] } + +@g38 = common global %struct.s38 zeroinitializer, align 4 +@g38_2 = common global %struct.s38 zeroinitializer, align 4 +@g39 = common global %struct.s39 zeroinitializer, align 16 +@g39_2 = common global %struct.s39 zeroinitializer, align 16 +@g40 = common global %struct.s40 zeroinitializer, align 4 +@g40_2 = common global %struct.s40 zeroinitializer, align 4 +@g41 = common global %struct.s41 zeroinitializer, align 16 +@g41_2 = common global %struct.s41 zeroinitializer, align 16 +@g42 = common global %struct.s42 zeroinitializer, align 4 +@g42_2 = common global %struct.s42 zeroinitializer, align 4 +@g43 = common global %struct.s43 zeroinitializer, align 16 +@g43_2 = common global %struct.s43 zeroinitializer, align 16 + +; structs with size < 8 bytes, passed via i64 in x1 and x2 +define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 { +entry: +; CHECK: f38 +; CHECK: add w[[A:[0-9]+]], w1, w0 +; CHECK: add {{w[0-9]+}}, w[[A]], w2 + %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32 + %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32 + %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32 + %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32 + %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16 + %sext = trunc i64 %sext8 to i32 + %conv = ashr exact i32 %sext, 16 + %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16 + %sext10 = trunc i64 %sext1011 to i32 + %conv6 = ashr exact i32 %sext10, 16 + %add = add i32 %s1.sroa.0.0.extract.trunc, %i + %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc + %add4 = add i32 %add3, %conv + %add7 = add i32 %add4, %conv6 + ret i32 %add7 +} + +define i32 @caller38() #1 { +entry: +; CHECK: caller38 +; CHECK: ldr x1, +; CHECK: ldr x2, + %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4 + %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 + %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5 + ret i32 %call +} + +declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, + i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0 + +; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16] +; i9 at [sp] +define i32 @caller38_stack() #1 { +entry: +; CHECK: caller38_stack +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] +; CHECK: movz w[[C:[0-9]+]], #9 +; CHECK: str w[[C]], [sp] + %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4 + %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 + %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, + i32 7, i32 8, i32 9, i64 %0, i64 %1) #5 + ret i32 %call +} + +; structs with size < 8 bytes, alignment of 16 +; passed via i128 in x1 and x3 +define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 { +entry: +; CHECK: f39 +; CHECK: add w[[A:[0-9]+]], w1, w0 +; CHECK: add {{w[0-9]+}}, w[[A]], w3 + %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32 + %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32 + %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32 + %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32 + %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16 + %sext = trunc i128 %sext8 to i32 + %conv = ashr exact i32 %sext, 16 + %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16 + %sext10 = trunc i128 %sext1011 to i32 + %conv6 = ashr exact i32 %sext10, 16 + %add = add i32 %s1.sroa.0.0.extract.trunc, %i + %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc + %add4 = add i32 %add3, %conv + %add7 = add i32 %add4, %conv6 + ret i32 %add7 +} + +define i32 @caller39() #1 { +entry: +; CHECK: caller39 +; CHECK: ldp x1, x2, +; CHECK: ldp x3, x4, + %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16 + %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 + %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5 + ret i32 %call +} + +declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, + i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0 + +; structs with size < 8 bytes, alignment 16 +; passed on stack at [sp+16] and [sp+32] +define i32 @caller39_stack() #1 { +entry: +; CHECK: caller39_stack +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32] +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] +; CHECK: movz w[[C:[0-9]+]], #9 +; CHECK: str w[[C]], [sp] + %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16 + %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 + %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, + i32 7, i32 8, i32 9, i128 %0, i128 %1) #5 + ret i32 %call +} + +; structs with size < 16 bytes +; passed via i128 in x1 and x3 +define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 { +entry: +; CHECK: f40 +; CHECK: add w[[A:[0-9]+]], w1, w0 +; CHECK: add {{w[0-9]+}}, w[[A]], w3 + %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0 + %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0 + %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32 + %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32 + %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32 + %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16 + %sext = trunc i64 %sext8 to i32 + %conv = ashr exact i32 %sext, 16 + %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32 + %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16 + %sext10 = trunc i64 %sext1011 to i32 + %conv6 = ashr exact i32 %sext10, 16 + %add = add i32 %s1.sroa.0.0.extract.trunc, %i + %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc + %add4 = add i32 %add3, %conv + %add7 = add i32 %add4, %conv6 + ret i32 %add7 +} + +define i32 @caller40() #1 { +entry: +; CHECK: caller40 +; CHECK: ldp x1, x2, +; CHECK: ldp x3, x4, + %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 + %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 + %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5 + ret i32 %call +} + +declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, + i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 + +; structs with size < 16 bytes +; passed on stack at [sp+8] and [sp+24] +define i32 @caller40_stack() #1 { +entry: +; CHECK: caller40_stack +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24] +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] +; CHECK: movz w[[C:[0-9]+]], #9 +; CHECK: str w[[C]], [sp] + %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 + %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 + %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, + i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5 + ret i32 %call +} + +; structs with size < 16 bytes, alignment of 16 +; passed via i128 in x1 and x3 +define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 { +entry: +; CHECK: f41 +; CHECK: add w[[A:[0-9]+]], w1, w0 +; CHECK: add {{w[0-9]+}}, w[[A]], w3 + %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32 + %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32 + %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32 + %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32 + %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16 + %sext = trunc i128 %sext8 to i32 + %conv = ashr exact i32 %sext, 16 + %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16 + %sext10 = trunc i128 %sext1011 to i32 + %conv6 = ashr exact i32 %sext10, 16 + %add = add i32 %s1.sroa.0.0.extract.trunc, %i + %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc + %add4 = add i32 %add3, %conv + %add7 = add i32 %add4, %conv6 + ret i32 %add7 +} + +define i32 @caller41() #1 { +entry: +; CHECK: caller41 +; CHECK: ldp x1, x2, +; CHECK: ldp x3, x4, + %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16 + %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 + %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5 + ret i32 %call +} + +declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, + i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0 + +; structs with size < 16 bytes, alignment of 16 +; passed on stack at [sp+16] and [sp+32] +define i32 @caller41_stack() #1 { +entry: +; CHECK: caller41_stack +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32] +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] +; CHECK: movz w[[C:[0-9]+]], #9 +; CHECK: str w[[C]], [sp] + %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16 + %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 + %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, + i32 7, i32 8, i32 9, i128 %0, i128 %1) #5 + ret i32 %call +} + +; structs with size of 22 bytes, passed indirectly in x1 and x2 +define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 { +entry: +; CHECK: f42 +; CHECK: ldr w[[A:[0-9]+]], [x1] +; CHECK: ldr w[[B:[0-9]+]], [x2] +; CHECK: add w[[C:[0-9]+]], w[[A]], w0 +; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]] +; FAST: f42 +; FAST: ldr w[[A:[0-9]+]], [x1] +; FAST: ldr w[[B:[0-9]+]], [x2] +; FAST: add w[[C:[0-9]+]], w[[A]], w0 +; FAST: add {{w[0-9]+}}, w[[C]], w[[B]] + %i1 = getelementptr inbounds %struct.s42* %s1, i64 0, i32 0 + %0 = load i32* %i1, align 4, !tbaa !0 + %i2 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 0 + %1 = load i32* %i2, align 4, !tbaa !0 + %s = getelementptr inbounds %struct.s42* %s1, i64 0, i32 1 + %2 = load i16* %s, align 2, !tbaa !3 + %conv = sext i16 %2 to i32 + %s5 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 1 + %3 = load i16* %s5, align 2, !tbaa !3 + %conv6 = sext i16 %3 to i32 + %add = add i32 %0, %i + %add3 = add i32 %add, %1 + %add4 = add i32 %add3, %conv + %add7 = add i32 %add4, %conv6 + ret i32 %add7 +} + +; For s1, we allocate a 22-byte space, pass its address via x1 +define i32 @caller42() #3 { +entry: +; CHECK: caller42 +; CHECK: str {{x[0-9]+}}, [sp, #48] +; CHECK: str {{q[0-9]+}}, [sp, #32] +; CHECK: str {{x[0-9]+}}, [sp, #16] +; CHECK: str {{q[0-9]+}}, [sp] +; CHECK: add x1, sp, #32 +; CHECK: mov x2, sp +; Space for s1 is allocated at sp+32 +; Space for s2 is allocated at sp + +; FAST: caller42 +; FAST: sub sp, sp, #96 +; Space for s1 is allocated at fp-24 = sp+72 +; Space for s2 is allocated at sp+48 +; FAST: sub x[[A:[0-9]+]], fp, #24 +; FAST: add x[[A:[0-9]+]], sp, #48 +; Call memcpy with size = 24 (0x18) +; FAST: orr {{x[0-9]+}}, xzr, #0x18 + %tmp = alloca %struct.s42, align 4 + %tmp1 = alloca %struct.s42, align 4 + %0 = bitcast %struct.s42* %tmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + %1 = bitcast %struct.s42* %tmp1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5 + ret i32 %call +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4 + +declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, + i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1, + %struct.s42* nocapture %s2) #2 + +define i32 @caller42_stack() #3 { +entry: +; CHECK: caller42_stack +; CHECK: mov fp, sp +; CHECK: sub sp, sp, #96 +; CHECK: stur {{x[0-9]+}}, [fp, #-16] +; CHECK: stur {{q[0-9]+}}, [fp, #-32] +; CHECK: str {{x[0-9]+}}, [sp, #48] +; CHECK: str {{q[0-9]+}}, [sp, #32] +; Space for s1 is allocated at fp-32 = sp+64 +; Space for s2 is allocated at sp+32 +; CHECK: add x[[B:[0-9]+]], sp, #32 +; CHECK: str x[[B]], [sp, #16] +; CHECK: sub x[[A:[0-9]+]], fp, #32 +; Address of s1 is passed on stack at sp+8 +; CHECK: str x[[A]], [sp, #8] +; CHECK: movz w[[C:[0-9]+]], #9 +; CHECK: str w[[C]], [sp] + +; FAST: caller42_stack +; Space for s1 is allocated at fp-24 +; Space for s2 is allocated at fp-48 +; FAST: sub x[[A:[0-9]+]], fp, #24 +; FAST: sub x[[B:[0-9]+]], fp, #48 +; Call memcpy with size = 24 (0x18) +; FAST: orr {{x[0-9]+}}, xzr, #0x18 +; FAST: str {{w[0-9]+}}, [sp] +; Address of s1 is passed on stack at sp+8 +; FAST: str {{x[0-9]+}}, [sp, #8] +; FAST: str {{x[0-9]+}}, [sp, #16] + %tmp = alloca %struct.s42, align 4 + %tmp1 = alloca %struct.s42, align 4 + %0 = bitcast %struct.s42* %tmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + %1 = bitcast %struct.s42* %tmp1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, + i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5 + ret i32 %call +} + +; structs with size of 22 bytes, alignment of 16 +; passed indirectly in x1 and x2 +define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 { +entry: +; CHECK: f43 +; CHECK: ldr w[[A:[0-9]+]], [x1] +; CHECK: ldr w[[B:[0-9]+]], [x2] +; CHECK: add w[[C:[0-9]+]], w[[A]], w0 +; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]] +; FAST: f43 +; FAST: ldr w[[A:[0-9]+]], [x1] +; FAST: ldr w[[B:[0-9]+]], [x2] +; FAST: add w[[C:[0-9]+]], w[[A]], w0 +; FAST: add {{w[0-9]+}}, w[[C]], w[[B]] + %i1 = getelementptr inbounds %struct.s43* %s1, i64 0, i32 0 + %0 = load i32* %i1, align 4, !tbaa !0 + %i2 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 0 + %1 = load i32* %i2, align 4, !tbaa !0 + %s = getelementptr inbounds %struct.s43* %s1, i64 0, i32 1 + %2 = load i16* %s, align 2, !tbaa !3 + %conv = sext i16 %2 to i32 + %s5 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 1 + %3 = load i16* %s5, align 2, !tbaa !3 + %conv6 = sext i16 %3 to i32 + %add = add i32 %0, %i + %add3 = add i32 %add, %1 + %add4 = add i32 %add3, %conv + %add7 = add i32 %add4, %conv6 + ret i32 %add7 +} + +define i32 @caller43() #3 { +entry: +; CHECK: caller43 +; CHECK: str {{q[0-9]+}}, [sp, #48] +; CHECK: str {{q[0-9]+}}, [sp, #32] +; CHECK: str {{q[0-9]+}}, [sp, #16] +; CHECK: str {{q[0-9]+}}, [sp] +; CHECK: add x1, sp, #32 +; CHECK: mov x2, sp +; Space for s1 is allocated at sp+32 +; Space for s2 is allocated at sp + +; FAST: caller43 +; FAST: mov fp, sp +; Space for s1 is allocated at sp+32 +; Space for s2 is allocated at sp +; FAST: add x1, sp, #32 +; FAST: mov x2, sp +; FAST: str {{x[0-9]+}}, [sp, #32] +; FAST: str {{x[0-9]+}}, [sp, #40] +; FAST: str {{x[0-9]+}}, [sp, #48] +; FAST: str {{x[0-9]+}}, [sp, #56] +; FAST: str {{x[0-9]+}}, [sp] +; FAST: str {{x[0-9]+}}, [sp, #8] +; FAST: str {{x[0-9]+}}, [sp, #16] +; FAST: str {{x[0-9]+}}, [sp, #24] + %tmp = alloca %struct.s43, align 16 + %tmp1 = alloca %struct.s43, align 16 + %0 = bitcast %struct.s43* %tmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + %1 = bitcast %struct.s43* %tmp1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5 + ret i32 %call +} + +declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, + i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1, + %struct.s43* nocapture %s2) #2 + +define i32 @caller43_stack() #3 { +entry: +; CHECK: caller43_stack +; CHECK: mov fp, sp +; CHECK: sub sp, sp, #96 +; CHECK: stur {{q[0-9]+}}, [fp, #-16] +; CHECK: stur {{q[0-9]+}}, [fp, #-32] +; CHECK: str {{q[0-9]+}}, [sp, #48] +; CHECK: str {{q[0-9]+}}, [sp, #32] +; Space for s1 is allocated at fp-32 = sp+64 +; Space for s2 is allocated at sp+32 +; CHECK: add x[[B:[0-9]+]], sp, #32 +; CHECK: str x[[B]], [sp, #16] +; CHECK: sub x[[A:[0-9]+]], fp, #32 +; Address of s1 is passed on stack at sp+8 +; CHECK: str x[[A]], [sp, #8] +; CHECK: movz w[[C:[0-9]+]], #9 +; CHECK: str w[[C]], [sp] + +; FAST: caller43_stack +; FAST: sub sp, sp, #96 +; Space for s1 is allocated at fp-32 = sp+64 +; Space for s2 is allocated at sp+32 +; FAST: sub x[[A:[0-9]+]], fp, #32 +; FAST: add x[[B:[0-9]+]], sp, #32 +; FAST: stur {{x[0-9]+}}, [fp, #-32] +; FAST: stur {{x[0-9]+}}, [fp, #-24] +; FAST: stur {{x[0-9]+}}, [fp, #-16] +; FAST: stur {{x[0-9]+}}, [fp, #-8] +; FAST: str {{x[0-9]+}}, [sp, #32] +; FAST: str {{x[0-9]+}}, [sp, #40] +; FAST: str {{x[0-9]+}}, [sp, #48] +; FAST: str {{x[0-9]+}}, [sp, #56] +; FAST: str {{w[0-9]+}}, [sp] +; Address of s1 is passed on stack at sp+8 +; FAST: str {{x[0-9]+}}, [sp, #8] +; FAST: str {{x[0-9]+}}, [sp, #16] + %tmp = alloca %struct.s43, align 16 + %tmp1 = alloca %struct.s43, align 16 + %0 = bitcast %struct.s43* %tmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + %1 = bitcast %struct.s43* %tmp1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, + i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5 + ret i32 %call +} + +; rdar://13668927 +; Check that we don't split an i128. +declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, + i32 %i6, i32 %i7, i128 %s1, i32 %i8) + +define i32 @i128_split() { +entry: +; CHECK: i128_split +; "i128 %0" should be on stack at [sp]. +; "i32 8" should be on stack at [sp, #16]. +; CHECK: str {{w[0-9]+}}, [sp, #16] +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp] +; FAST: i128_split +; FAST: mov x[[ADDR:[0-9]+]], sp +; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16] +; FAST: stp {{x[0-9]+}}, {{x[0-9]+}}, [x[[ADDR]]] + %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16 + %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5, + i32 6, i32 7, i128 %0, i32 8) #5 + ret i32 %call +} + +declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, + i32 %i6, i32 %i7, i64 %s1, i32 %i8) + +define i32 @i64_split() { +entry: +; CHECK: i64_split +; "i64 %0" should be in register x7. +; "i32 8" should be on stack at [sp]. +; CHECK: ldr x7, [{{x[0-9]+}}] +; CHECK: str {{w[0-9]+}}, [sp] +; FAST: i64_split +; FAST: ldr x7, [{{x[0-9]+}}] +; FAST: str {{w[0-9]+}}, [sp] + %0 = load i64* bitcast (%struct.s41* @g41 to i64*), align 16 + %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5, + i32 6, i32 7, i64 %0, i32 8) #5 + ret i32 %call +} + +attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } +attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } +attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } +attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } +attributes #4 = { nounwind } +attributes #5 = { nobuiltin } + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"short", metadata !1} +!4 = metadata !{i64 0, i64 4, metadata !0, i64 4, i64 2, metadata !3, i64 8, i64 4, metadata !0, i64 12, i64 2, metadata !3, i64 16, i64 4, metadata !0, i64 20, i64 2, metadata !3} diff --git a/test/CodeGen/ARM64/addp.ll b/test/CodeGen/ARM64/addp.ll new file mode 100644 index 0000000000..8283a0005c --- /dev/null +++ b/test/CodeGen/ARM64/addp.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s + +define double @foo(<2 x double> %a) nounwind { +; CHECK-LABEL: foo: +; CHECK: faddp.2d d0, v0 +; CHECK-NEXT: ret + %lane0.i = extractelement <2 x double> %a, i32 0 + %lane1.i = extractelement <2 x double> %a, i32 1 + %vpaddd.i = fadd double %lane0.i, %lane1.i + ret double %vpaddd.i +} + +define i64 @foo0(<2 x i64> %a) nounwind { +; CHECK-LABEL: foo0: +; CHECK: addp.2d d0, v0 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret + %lane0.i = extractelement <2 x i64> %a, i32 0 + %lane1.i = extractelement <2 x i64> %a, i32 1 + %vpaddd.i = add i64 %lane0.i, %lane1.i + ret i64 %vpaddd.i +} + +define float @foo1(<2 x float> %a) nounwind { +; CHECK-LABEL: foo1: +; CHECK: faddp.2s +; CHECK-NEXT: ret + %lane0.i = extractelement <2 x float> %a, i32 0 + %lane1.i = extractelement <2 x float> %a, i32 1 + %vpaddd.i = fadd float %lane0.i, %lane1.i + ret float %vpaddd.i +} diff --git a/test/CodeGen/ARM64/addr-mode-folding.ll b/test/CodeGen/ARM64/addr-mode-folding.ll new file mode 100644 index 0000000000..dff2331d29 --- /dev/null +++ b/test/CodeGen/ARM64/addr-mode-folding.ll @@ -0,0 +1,171 @@ +; RUN: llc -O3 -mtriple arm64-apple-ios3 %s -o - | FileCheck %s +; + +@block = common global i8* null, align 8 + +define i32 @fct(i32 %i1, i32 %i2) { +; CHECK: @fct +; Sign extension is used more than once, thus it should not be folded. +; CodeGenPrepare is not sharing sext accross uses, thus this is folded because +; of that. +; _CHECK-NOT_: , sxtw] +entry: + %idxprom = sext i32 %i1 to i64 + %0 = load i8** @block, align 8 + %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom + %1 = load i8* %arrayidx, align 1 + %idxprom1 = sext i32 %i2 to i64 + %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1 + %2 = load i8* %arrayidx2, align 1 + %cmp = icmp eq i8 %1, %2 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + %cmp7 = icmp ugt i8 %1, %2 + %conv8 = zext i1 %cmp7 to i32 + br label %return + +if.end: ; preds = %entry + %inc = add nsw i32 %i1, 1 + %inc9 = add nsw i32 %i2, 1 + %idxprom10 = sext i32 %inc to i64 + %arrayidx11 = getelementptr inbounds i8* %0, i64 %idxprom10 + %3 = load i8* %arrayidx11, align 1 + %idxprom12 = sext i32 %inc9 to i64 + %arrayidx13 = getelementptr inbounds i8* %0, i64 %idxprom12 + %4 = load i8* %arrayidx13, align 1 + %cmp16 = icmp eq i8 %3, %4 + br i1 %cmp16, label %if.end23, label %if.then18 + +if.then18: ; preds = %if.end + %cmp21 = icmp ugt i8 %3, %4 + %conv22 = zext i1 %cmp21 to i32 + br label %return + +if.end23: ; preds = %if.end + %inc24 = add nsw i32 %i1, 2 + %inc25 = add nsw i32 %i2, 2 + %idxprom26 = sext i32 %inc24 to i64 + %arrayidx27 = getelementptr inbounds i8* %0, i64 %idxprom26 + %5 = load i8* %arrayidx27, align 1 + %idxprom28 = sext i32 %inc25 to i64 + %arrayidx29 = getelementptr inbounds i8* %0, i64 %idxprom28 + %6 = load i8* %arrayidx29, align 1 + %cmp32 = icmp eq i8 %5, %6 + br i1 %cmp32, label %return, label %if.then34 + +if.then34: ; preds = %if.end23 + %cmp37 = icmp ugt i8 %5, %6 + %conv38 = zext i1 %cmp37 to i32 + br label %return + +return: ; preds = %if.end23, %if.then34, %if.then18, %if.then + %retval.0 = phi i32 [ %conv8, %if.then ], [ %conv22, %if.then18 ], [ %conv38, %if.then34 ], [ 1, %if.end23 ] + ret i32 %retval.0 +} + +define i32 @fct1(i32 %i1, i32 %i2) optsize { +; CHECK: @fct1 +; Addressing are folded when optimizing for code size. +; CHECK: , sxtw] +; CHECK: , sxtw] +entry: + %idxprom = sext i32 %i1 to i64 + %0 = load i8** @block, align 8 + %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom + %1 = load i8* %arrayidx, align 1 + %idxprom1 = sext i32 %i2 to i64 + %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1 + %2 = load i8* %arrayidx2, align 1 + %cmp = icmp eq i8 %1, %2 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + %cmp7 = icmp ugt i8 %1, %2 + %conv8 = zext i1 %cmp7 to i32 + br label %return + +if.end: ; preds = %entry + %inc = add nsw i32 %i1, 1 + %inc9 = add nsw i32 %i2, 1 + %idxprom10 = sext i32 %inc to i64 + %arrayidx11 = getelementptr inbounds i8* %0, i64 %idxprom10 + %3 = load i8* %arrayidx11, align 1 + %idxprom12 = sext i32 %inc9 to i64 + %arrayidx13 = getelementptr inbounds i8* %0, i64 %idxprom12 + %4 = load i8* %arrayidx13, align 1 + %cmp16 = icmp eq i8 %3, %4 + br i1 %cmp16, label %if.end23, label %if.then18 + +if.then18: ; preds = %if.end + %cmp21 = icmp ugt i8 %3, %4 + %conv22 = zext i1 %cmp21 to i32 + br label %return + +if.end23: ; preds = %if.end + %inc24 = add nsw i32 %i1, 2 + %inc25 = add nsw i32 %i2, 2 + %idxprom26 = sext i32 %inc24 to i64 + %arrayidx27 = getelementptr inbounds i8* %0, i64 %idxprom26 + %5 = load i8* %arrayidx27, align 1 + %idxprom28 = sext i32 %inc25 to i64 + %arrayidx29 = getelementptr inbounds i8* %0, i64 %idxprom28 + %6 = load i8* %arrayidx29, align 1 + %cmp32 = icmp eq i8 %5, %6 + br i1 %cmp32, label %return, label %if.then34 + +if.then34: ; preds = %if.end23 + %cmp37 = icmp ugt i8 %5, %6 + %conv38 = zext i1 %cmp37 to i32 + br label %return + +return: ; preds = %if.end23, %if.then34, %if.then18, %if.then + %retval.0 = phi i32 [ %conv8, %if.then ], [ %conv22, %if.then18 ], [ %conv38, %if.then34 ], [ 1, %if.end23 ] + ret i32 %retval.0 +} + +; CHECK: @test +; CHECK-NOT: , uxtw #2] +define i32 @test(i32* %array, i8 zeroext %c, i32 %arg) { +entry: + %conv = zext i8 %c to i32 + %add = sub i32 0, %arg + %tobool = icmp eq i32 %conv, %add + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i8 %c to i64 + %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom + %0 = load volatile i32* %arrayidx, align 4 + %1 = load volatile i32* %arrayidx, align 4 + %add3 = add nsw i32 %1, %0 + br label %if.end + +if.end: ; preds = %entry, %if.then + %res.0 = phi i32 [ %add3, %if.then ], [ 0, %entry ] + ret i32 %res.0 +} + + +; CHECK: @test2 +; CHECK: , uxtw #2] +; CHECK: , uxtw #2] +define i32 @test2(i32* %array, i8 zeroext %c, i32 %arg) optsize { +entry: + %conv = zext i8 %c to i32 + %add = sub i32 0, %arg + %tobool = icmp eq i32 %conv, %add + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i8 %c to i64 + %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom + %0 = load volatile i32* %arrayidx, align 4 + %1 = load volatile i32* %arrayidx, align 4 + %add3 = add nsw i32 %1, %0 + br label %if.end + +if.end: ; preds = %entry, %if.then + %res.0 = phi i32 [ %add3, %if.then ], [ 0, %entry ] + ret i32 %res.0 +} diff --git a/test/CodeGen/ARM64/addr-type-promotion.ll b/test/CodeGen/ARM64/addr-type-promotion.ll new file mode 100644 index 0000000000..0677603473 --- /dev/null +++ b/test/CodeGen/ARM64/addr-type-promotion.ll @@ -0,0 +1,82 @@ +; RUN: llc -march arm64 < %s | FileCheck %s +; rdar://13452552 +; ModuleID = 'reduced_test.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" +target triple = "arm64-apple-ios3.0.0" + +@block = common global i8* null, align 8 + +define zeroext i8 @fullGtU(i32 %i1, i32 %i2) { +; CHECK: fullGtU +; CHECK: adrp [[PAGE:x[0-9]+]], _block@GOTPAGE +; CHECK: ldr [[ADDR:x[0-9]+]], {{\[}}[[PAGE]], _block@GOTPAGEOFF] +; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]] +; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], x0, sxtw] +; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], x1, sxtw] +; CHECK-NEXT cmp [[BLOCKVAL1]], [[BLOCKVAL2]] +; CHECK-NEXT b.ne +; Next BB +; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw +; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw +; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1] +; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1] +; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]] +; CHECK-NEXT: b.ne +; Next BB +; CHECK: ldrb [[LOADEDVAL3:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #2] +; CHECK-NEXT: ldrb [[LOADEDVAL4:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #2] +; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]] +entry: + %idxprom = sext i32 %i1 to i64 + %tmp = load i8** @block, align 8 + %arrayidx = getelementptr inbounds i8* %tmp, i64 %idxprom + %tmp1 = load i8* %arrayidx, align 1 + %idxprom1 = sext i32 %i2 to i64 + %arrayidx2 = getelementptr inbounds i8* %tmp, i64 %idxprom1 + %tmp2 = load i8* %arrayidx2, align 1 + %cmp = icmp eq i8 %tmp1, %tmp2 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + %cmp7 = icmp ugt i8 %tmp1, %tmp2 + %conv9 = zext i1 %cmp7 to i8 + br label %return + +if.end: ; preds = %entry + %inc = add nsw i32 %i1, 1 + %inc10 = add nsw i32 %i2, 1 + %idxprom11 = sext i32 %inc to i64 + %arrayidx12 = getelementptr inbounds i8* %tmp, i64 %idxprom11 + %tmp3 = load i8* %arrayidx12, align 1 + %idxprom13 = sext i32 %inc10 to i64 + %arrayidx14 = getelementptr inbounds i8* %tmp, i64 %idxprom13 + %tmp4 = load i8* %arrayidx14, align 1 + %cmp17 = icmp eq i8 %tmp3, %tmp4 + br i1 %cmp17, label %if.end25, label %if.then19 + +if.then19: ; preds = %if.end + %cmp22 = icmp ugt i8 %tmp3, %tmp4 + %conv24 = zext i1 %cmp22 to i8 + br label %return + +if.end25: ; preds = %if.end + %inc26 = add nsw i32 %i1, 2 + %inc27 = add nsw i32 %i2, 2 + %idxprom28 = sext i32 %inc26 to i64 + %arrayidx29 = getelementptr inbounds i8* %tmp, i64 %idxprom28 + %tmp5 = load i8* %arrayidx29, align 1 + %idxprom30 = sext i32 %inc27 to i64 + %arrayidx31 = getelementptr inbounds i8* %tmp, i64 %idxprom30 + %tmp6 = load i8* %arrayidx31, align 1 + %cmp34 = icmp eq i8 %tmp5, %tmp6 + br i1 %cmp34, label %return, label %if.then36 + +if.then36: ; preds = %if.end25 + %cmp39 = icmp ugt i8 %tmp5, %tmp6 + %conv41 = zext i1 %cmp39 to i8 + br label %return + +return: ; preds = %if.then36, %if.end25, %if.then19, %if.then + %retval.0 = phi i8 [ %conv9, %if.then ], [ %conv24, %if.then19 ], [ %conv41, %if.then36 ], [ 0, %if.end25 ] + ret i8 %retval.0 +} diff --git a/test/CodeGen/ARM64/addrmode.ll b/test/CodeGen/ARM64/addrmode.ll new file mode 100644 index 0000000000..e1312376e2 --- /dev/null +++ b/test/CodeGen/ARM64/addrmode.ll @@ -0,0 +1,72 @@ +; RUN: llc -march=arm64 < %s | FileCheck %s +; rdar://10232252 + +@object = external hidden global i64, section "__DATA, __objc_ivar", align 8 + +; base + offset (imm9) +; CHECK: @t1 +; CHECK: ldr xzr, [x{{[0-9]+}}, #8] +; CHECK: ret +define void @t1() { + %incdec.ptr = getelementptr inbounds i64* @object, i64 1 + %tmp = load volatile i64* %incdec.ptr, align 8 + ret void +} + +; base + offset (> imm9) +; CHECK: @t2 +; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264 +; CHECK: ldr xzr, [ +; CHECK: [[ADDREG]]] +; CHECK: ret +define void @t2() { + %incdec.ptr = getelementptr inbounds i64* @object, i64 -33 + %tmp = load volatile i64* %incdec.ptr, align 8 + ret void +} + +; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes) +; CHECK: @t3 +; CHECK: ldr xzr, [x{{[0-9]+}}, #32760] +; CHECK: ret +define void @t3() { + %incdec.ptr = getelementptr inbounds i64* @object, i64 4095 + %tmp = load volatile i64* %incdec.ptr, align 8 + ret void +} + +; base + unsigned offset (> imm12 * size of type in bytes) +; CHECK: @t4 +; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #32768 +; CHECK: ldr xzr, [ +; CHECK: [[ADDREG]]] +; CHECK: ret +define void @t4() { + %incdec.ptr = getelementptr inbounds i64* @object, i64 4096 + %tmp = load volatile i64* %incdec.ptr, align 8 + ret void +} + +; base + reg +; CHECK: @t5 +; CHECK: ldr xzr, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #3] +; CHECK: ret +define void @t5(i64 %a) { + %incdec.ptr = getelementptr inbounds i64* @object, i64 %a + %tmp = load volatile i64* %incdec.ptr, align 8 + ret void +} + +; base + reg + imm +; CHECK: @t6 +; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3 +; CHECK-NEXT: add [[ADDREG]], [[ADDREG]], #32768 +; CHECK: ldr xzr, [ +; CHECK: [[ADDREG]]] +; CHECK: ret +define void @t6(i64 %a) { + %tmp1 = getelementptr inbounds i64* @object, i64 %a + %incdec.ptr = getelementptr inbounds i64* %tmp1, i64 4096 + %tmp = load volatile i64* %incdec.ptr, align 8 + ret void +} diff --git a/test/CodeGen/ARM64/alloc-no-stack-realign.ll b/test/CodeGen/ARM64/alloc-no-stack-realign.ll new file mode 100644 index 0000000000..f396bc9917 --- /dev/null +++ b/test/CodeGen/ARM64/alloc-no-stack-realign.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=false | FileCheck %s + +; rdar://12713765 +; Make sure we are not creating stack objects that are assumed to be 64-byte +; aligned. +@T3_retval = common global <16 x float> zeroinitializer, align 16 + +define void @test(<16 x float>* noalias sret %agg.result) nounwind ssp { +entry: +; CHECK: test +; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], [sp, #32] +; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], [sp] +; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], {{\[}}[[BASE:x[0-9]+]], #32] +; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], {{\[}}[[BASE]]] + %retval = alloca <16 x float>, align 16 + %0 = load <16 x float>* @T3_retval, align 16 + store <16 x float> %0, <16 x float>* %retval + %1 = load <16 x float>* %retval + store <16 x float> %1, <16 x float>* %agg.result, align 16 + ret void +} diff --git a/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll b/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll new file mode 100644 index 0000000000..3750f31b37 --- /dev/null +++ b/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=arm64 -mcpu=cyclone < %s | FileCheck %s + +; CHECK: foo +; CHECK: ldr w[[REG:[0-9]+]], [x19, #264] +; CHECK: str w[[REG]], [x19, #132] +; CHECK: ldr w{{[0-9]+}}, [x19, #264] + +define i32 @foo(i32 %a) nounwind { + %retval = alloca i32, align 4 + %a.addr = alloca i32, align 4 + %arr = alloca [32 x i32], align 4 + %i = alloca i32, align 4 + %arr2 = alloca [32 x i32], align 4 + %j = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + %tmp = load i32* %a.addr, align 4 + %tmp1 = zext i32 %tmp to i64 + %v = mul i64 4, %tmp1 + %vla = alloca i8, i64 %v, align 4 + %tmp2 = bitcast i8* %vla to i32* + %tmp3 = load i32* %a.addr, align 4 + store i32 %tmp3, i32* %i, align 4 + %tmp4 = load i32* %a.addr, align 4 + store i32 %tmp4, i32* %j, align 4 + %tmp5 = load i32* %j, align 4 + store i32 %tmp5, i32* %retval + %x = load i32* %retval + ret i32 %x +} diff --git a/test/CodeGen/ARM64/andCmpBrToTBZ.ll b/test/CodeGen/ARM64/andCmpBrToTBZ.ll new file mode 100644 index 0000000000..419497722f --- /dev/null +++ b/test/CodeGen/ARM64/andCmpBrToTBZ.ll @@ -0,0 +1,72 @@ +; RUN: llc -O1 -march=arm64 -enable-andcmp-sinking=true < %s | FileCheck %s +; ModuleID = 'and-cbz-extr-mr.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" +target triple = "arm64-apple-ios7.0.0" + +define zeroext i1 @foo(i1 %IsEditable, i1 %isTextField, i8* %str1, i8* %str2, i8* %str3, i8* %str4, i8* %str5, i8* %str6, i8* %str7, i8* %str8, i8* %str9, i8* %str10, i8* %str11, i8* %str12, i8* %str13, i32 %int1, i8* %str14) unnamed_addr #0 align 2 { +; CHECK: _foo: +entry: + %tobool = icmp eq i8* %str14, null + br i1 %tobool, label %return, label %if.end + +; CHECK: %if.end +; CHECK: tbz +if.end: ; preds = %entry + %and.i.i.i = and i32 %int1, 4 + %tobool.i.i.i = icmp eq i32 %and.i.i.i, 0 + br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i + +land.rhs.i: ; preds = %if.end + %cmp.i.i.i = icmp eq i8* %str12, %str13 + br i1 %cmp.i.i.i, label %if.then3, label %lor.rhs.i.i.i + +lor.rhs.i.i.i: ; preds = %land.rhs.i + %cmp.i13.i.i.i = icmp eq i8* %str10, %str11 + br i1 %cmp.i13.i.i.i, label %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, label %if.end5 + +_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit: ; preds = %lor.rhs.i.i.i + %cmp.i.i.i.i = icmp eq i8* %str8, %str9 + br i1 %cmp.i.i.i.i, label %if.then3, label %if.end5 + +if.then3: ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %land.rhs.i + %tmp11 = load i8* %str14, align 8 + %tmp12 = and i8 %tmp11, 2 + %tmp13 = icmp ne i8 %tmp12, 0 + br label %return + +if.end5: ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %lor.rhs.i.i.i +; CHECK: %if.end5 +; CHECK: tbz + br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i19 + +land.rhs.i19: ; preds = %if.end5 + %cmp.i.i.i18 = icmp eq i8* %str6, %str7 + br i1 %cmp.i.i.i18, label %if.then7, label %lor.rhs.i.i.i23 + +lor.rhs.i.i.i23: ; preds = %land.rhs.i19 + %cmp.i13.i.i.i22 = icmp eq i8* %str3, %str4 + br i1 %cmp.i13.i.i.i22, label %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, label %if.end12 + +_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28: ; preds = %lor.rhs.i.i.i23 + %cmp.i.i.i.i26 = icmp eq i8* %str1, %str2 + br i1 %cmp.i.i.i.i26, label %if.then7, label %if.end12 + +if.then7: ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, %land.rhs.i19 + br i1 %isTextField, label %if.then9, label %if.end12 + +if.then9: ; preds = %if.then7 + %tmp23 = load i8* %str5, align 8 + %tmp24 = and i8 %tmp23, 2 + %tmp25 = icmp ne i8 %tmp24, 0 + br label %return + +if.end12: ; preds = %if.then7, %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, %lor.rhs.i.i.i23, %if.end5, %if.end + %lnot = xor i1 %IsEditable, true + br label %return + +return: ; preds = %if.end12, %if.then9, %if.then3, %entry + %retval.0 = phi i1 [ %tmp13, %if.then3 ], [ %tmp25, %if.then9 ], [ %lnot, %if.end12 ], [ true, %entry ] + ret i1 %retval.0 +} + +attributes #0 = { nounwind ssp } diff --git a/test/CodeGen/ARM64/anyregcc-crash.ll b/test/CodeGen/ARM64/anyregcc-crash.ll new file mode 100644 index 0000000000..241cf974c0 --- /dev/null +++ b/test/CodeGen/ARM64/anyregcc-crash.ll @@ -0,0 +1,19 @@ +; RUN: not llc < %s -mtriple=arm64-apple-darwin 2>&1 | FileCheck %s +; +; Check that misuse of anyregcc results in a compile time error. + +; CHECK: LLVM ERROR: ran out of registers during register allocation +define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8, + i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16, + i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24, + i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32) { +entry: + %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 32, + i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8, + i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16, + i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24, + i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32) + ret i64 %result +} + +declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...) diff --git a/test/CodeGen/ARM64/anyregcc.ll b/test/CodeGen/ARM64/anyregcc.ll new file mode 100644 index 0000000000..9e22c5ae18 --- /dev/null +++ b/test/CodeGen/ARM64/anyregcc.ll @@ -0,0 +1,358 @@ +; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s + +; Stackmap Header: no constants - 6 callsites +; CHECK-LABEL: .section __LLVM_STACKMAPS,__llvm_stackmaps +; CHECK-NEXT: __LLVM_StackMaps: +; Header +; CHECK-NEXT: .long 0 +; Num Functions +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .long _test +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .long _property_access1 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .long _property_access2 +; CHECK-NEXT: .long 32 +; CHECK-NEXT: .long _property_access3 +; CHECK-NEXT: .long 32 +; CHECK-NEXT: .long _anyreg_test1 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .long _anyreg_test2 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .long _patchpoint_spilldef +; CHECK-NEXT: .long 112 +; CHECK-NEXT: .long _patchpoint_spillargs +; CHECK-NEXT: .long 128 +; Num Constants +; CHECK-NEXT: .long 0 +; Num Callsites +; CHECK-NEXT: .long 8 + +; test +; CHECK-LABEL: .long L{{.*}}-_test +; CHECK-NEXT: .short 0 +; 3 locations +; CHECK-NEXT: .short 3 +; Loc 0: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 2: Constant 3 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 3 +define i64 @test() nounwind ssp uwtable { +entry: + call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 0, i32 16, i8* null, i32 2, i32 1, i32 2, i64 3) + ret i64 0 +} + +; property access 1 - %obj is an anyreg call argument and should therefore be in a register +; CHECK-LABEL: .long L{{.*}}-_property_access1 +; CHECK-NEXT: .short 0 +; 2 locations +; CHECK-NEXT: .short 2 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @property_access1(i8* %obj) nounwind ssp uwtable { +entry: + %f = inttoptr i64 281474417671919 to i8* + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 1, i32 20, i8* %f, i32 1, i8* %obj) + ret i64 %ret +} + +; property access 2 - %obj is an anyreg call argument and should therefore be in a register +; CHECK-LABEL: .long L{{.*}}-_property_access2 +; CHECK-NEXT: .short 0 +; 2 locations +; CHECK-NEXT: .short 2 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @property_access2() nounwind ssp uwtable { +entry: + %obj = alloca i64, align 8 + %f = inttoptr i64 281474417671919 to i8* + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %f, i32 1, i64* %obj) + ret i64 %ret +} + +; property access 3 - %obj is a frame index +; CHECK-LABEL: .long L{{.*}}-_property_access3 +; CHECK-NEXT: .short 0 +; 2 locations +; CHECK-NEXT: .short 2 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Direct FP - 8 +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 29 +; CHECK-NEXT: .long -8 +define i64 @property_access3() nounwind ssp uwtable { +entry: + %obj = alloca i64, align 8 + %f = inttoptr i64 281474417671919 to i8* + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 3, i32 20, i8* %f, i32 0, i64* %obj) + ret i64 %ret +} + +; anyreg_test1 +; CHECK-LABEL: .long L{{.*}}-_anyreg_test1 +; CHECK-NEXT: .short 0 +; 14 locations +; CHECK-NEXT: .short 14 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 2: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 3: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 4: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 5: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 6: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 7: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 8: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 9: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 10: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 11: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 12: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 13: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable { +entry: + %f = inttoptr i64 281474417671919 to i8* + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 4, i32 20, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) + ret i64 %ret +} + +; anyreg_test2 +; CHECK-LABEL: .long L{{.*}}-_anyreg_test2 +; CHECK-NEXT: .short 0 +; 14 locations +; CHECK-NEXT: .short 14 +; Loc 0: Register <-- this is the return register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 2: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 3: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 4: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 5: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 6: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 7: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 8: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 9: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 10: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 11: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 12: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 13: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable { +entry: + %f = inttoptr i64 281474417671919 to i8* + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) + ret i64 %ret +} + +; Test spilling the return value of an anyregcc call. +; +; [JS] Assertion: "Folded a def to a non-store!" +; +; CHECK-LABEL: .long L{{.*}}-_patchpoint_spilldef +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 3 +; Loc 0: Register (some register that will be spilled to the stack) +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { +entry: + %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 16, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2) + tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind + ret i64 %result +} + +; Test spilling the arguments of an anyregcc call. +; +; [JS] AnyRegCC argument ends up being spilled +; +; CHECK-LABEL: .long L{{.*}}-_patchpoint_spillargs +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 5 +; Loc 0: Return a register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 1: Arg0 in a Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 2: Arg1 in a Register +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; Loc 3: Arg2 spilled to FP -96 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 29 +; CHECK-NEXT: .long -96 +; Loc 4: Arg3 spilled to FP - 88 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 29 +; CHECK-NEXT: .long -88 +define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { +entry: + tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind + %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 13, i32 16, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4) + ret i64 %result +} + +declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...) +declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...) diff --git a/test/CodeGen/ARM64/arith-saturating.ll b/test/CodeGen/ARM64/arith-saturating.ll new file mode 100644 index 0000000000..437ebb8fe6 --- /dev/null +++ b/test/CodeGen/ARM64/arith-saturating.ll @@ -0,0 +1,153 @@ +; RUN: llc < %s -march=arm64 | FileCheck %s + +define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: qadds: +; CHECK: sqadd s0, s0, s1 + %vecext = extractelement <4 x i32> %b, i32 0 + %vecext1 = extractelement <4 x i32> %c, i32 0 + %vqadd.i = tail call i32 @llvm.arm64.neon.sqadd.i32(i32 %vecext, i32 %vecext1) nounwind + ret i32 %vqadd.i +} + +define i64 @qaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: qaddd: +; CHECK: sqadd d0, d0, d1 + %vecext = extractelement <2 x i64> %b, i32 0 + %vecext1 = extractelement <2 x i64> %c, i32 0 + %vqadd.i = tail call i64 @llvm.arm64.neon.sqadd.i64(i64 %vecext, i64 %vecext1) nounwind + ret i64 %vqadd.i +} + +define i32 @uqadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: uqadds: +; CHECK: uqadd s0, s0, s1 + %vecext = extractelement <4 x i32> %b, i32 0 + %vecext1 = extractelement <4 x i32> %c, i32 0 + %vqadd.i = tail call i32 @llvm.arm64.neon.uqadd.i32(i32 %vecext, i32 %vecext1) nounwind + ret i32 %vqadd.i +} + +define i64 @uqaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: uqaddd: +; CHECK: uqadd d0, d0, d1 + %vecext = extractelement <2 x i64> %b, i32 0 + %vecext1 = extractelement <2 x i64> %c, i32 0 + %vqadd.i = tail call i64 @llvm.arm64.neon.uqadd.i64(i64 %vecext, i64 %vecext1) nounwind + ret i64 %vqadd.i +} + +declare i64 @llvm.arm64.neon.uqadd.i64(i64, i64) nounwind readnone +declare i32 @llvm.arm64.neon.uqadd.i32(i32, i32) nounwind readnone +declare i64 @llvm.arm64.neon.sqadd.i64(i64, i64) nounwind readnone +declare i32 @llvm.arm64.neon.sqadd.i32(i32, i32) nounwind readnone + +define i32 @qsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: qsubs: +; CHECK: sqsub s0, s0, s1 + %vecext = extractelement <4 x i32> %b, i32 0 + %vecext1 = extractelement <4 x i32> %c, i32 0 + %vqsub.i = tail call i32 @llvm.arm64.neon.sqsub.i32(i32 %vecext, i32 %vecext1) nounwind + ret i32 %vqsub.i +} + +define i64 @qsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: qsubd: +; CHECK: sqsub d0, d0, d1 + %vecext = extractelement <2 x i64> %b, i32 0 + %vecext1 = extractelement <2 x i64> %c, i32 0 + %vqsub.i = tail call i64 @llvm.arm64.neon.sqsub.i64(i64 %vecext, i64 %vecext1) nounwind + ret i64 %vqsub.i +} + +define i32 @uqsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: uqsubs: +; CHECK: uqsub s0, s0, s1 + %vecext = extractelement <4 x i32> %b, i32 0 + %vecext1 = extractelement <4 x i32> %c, i32 0 + %vqsub.i = tail call i32 @llvm.arm64.neon.uqsub.i32(i32 %vecext, i32 %vecext1) nounwind + ret i32 %vqsub.i +} + +define i64 @uqsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { +; CHECK-LABEL: uqsubd: +; CHECK: uqsub d0, d0, d1 + %vecext = extractelement <2 x i64> %b, i32 0 + %vecext1 = extractelement <2 x i64> %c, i32 0 + %vqsub.i = tail call i64 @llvm.arm64.neon.uqsub.i64(i64 %vecext, i64 %vecext1) nounwind + ret i64 %vqsub.i +} + +declare i64 @llvm.arm64.neon.uqsub.i64(i64, i64) nounwind readnone +declare i32 @llvm.arm64.neon.uqsub.i32(i32, i32) nounwind readnone +declare i64 @llvm.arm64.neon.sqsub.i64(i64, i64) nounwind readnone +declare i32 @llvm.arm64.neon.sqsub.i32(i32, i32) nounwind readnone + +define i32 @qabss(<4 x i32> %b, <4 x i32> %c) nounwind readnone { +; CHECK-LABEL: qabss: +; CHECK: sqabs s0, s0 +; CHECK: ret + %vecext = extractelement <4 x i32> %b, i32 0 + %vqabs.i = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %vecext) nounwind + ret i32 %vqabs.i +} + +define i64 @qabsd(<2 x i64> %b, <2 x i64> %c) nounwind readnone { +; CHECK-LABEL: qabsd: +; CHECK: sqabs d0, d0 +; CHECK: ret + %vecext = extractelement <2 x i64> %b, i32 0 + %vqabs.i = tail call i64 @llvm.arm64.neon.sqabs.i64(i64 %vecext) nounwind + ret i64 %vqabs.i +} + +define i32 @qnegs(<4 x i32> %b, <4 x i32> %c) nounwind readnone { +; CHECK-LABEL: qnegs: +; CHECK: sqneg s0, s0 +; CHECK: ret + %vecext = extractelement <4 x i32> %b, i32 0 + %vqneg.i = tail call i32 @llvm.arm64.neon.sqneg.i32(i32 %vecext) nounwind + ret i32 %vqneg.i +} + +define i64 @qnegd(<2 x i64> %b, <2 x i64> %c) nounwind readnone { +; CHECK-LABEL: qnegd: +; CHECK: sqneg d0, d0 +; CHECK: ret + %vecext = extractelement <2 x i64> %b, i32 0 + %vqneg.i = tail call i64 @llvm.arm64.neon.sqneg.i64(i64 %vecext) nounwind + ret i64 %vqneg.i +} + +declare i64 @llvm.arm64.neon.sqneg.i64(i64) nounwind readnone +declare i32 @llvm.arm64.neon.sqneg.i32(i32) nounwind readnone +declare i64 @llvm.arm64.neon.sqabs.i64(i64) nounwind readnone +declare i32 @llvm.arm64.neon.sqabs.i32(i32) nounwind readnone + + +define i32 @vqmovund(<2 x i64> %b) nounwind readnone { +; CHECK-LABEL: vqmovund: +; CHECK: sqxtun s0, d0 + %vecext = extractelement <2 x i64> %b, i32 0 + %vqmovun.i = tail call i32 @llvm.arm64.neon.scalar.sqxtun.i32.i64(i64 %vecext) nounwind + ret i32 %vqmovun.i +} + +define i32 @vqmovnd_s(<2 x i64> %b) nounwind readnone { +; CHECK-LABEL: vqmovnd_s: +; CHECK: sqxtn s0, d0 + %vecext = extractelement <2 x i64> %b, i32 0 + %vqmovn.i = tail call i32 @llvm.arm64.neon.scalar.sqxtn.i32.i64(i64 %vecext) nounwind + ret i32 %vqmovn.i +} + +define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone { +; CHECK-LABEL: vqmovnd_u: +; CHECK: uqxtn s0, d0 + %vecext = extractelement <2 x i64> %b, i32 0 + %vqmovn.i = tail call i32 @llvm.arm64.neon.scalar.uqxtn.i32.i64(i64 %vecext) nounwind + ret i32 %vqmovn.i +} + +declare i32 @llvm.arm64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone +declare i32 @llvm.arm64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone +declare i32 @llvm.arm64.neon.scalar.sqxtun.i32.i64(i64) nounwind readnone diff --git a/test/CodeGen/ARM64/arith.ll b/test/CodeGen/ARM64/arith.ll new file mode 100644 index 0000000000..b6ff0da3b2 --- /dev/null +++ b/test/CodeGen/ARM64/arith.ll @@ -0,0 +1,262 @@ +; RUN: llc < %s -march=arm64 -asm-verbose=false | FileCheck %s + +define i32 @t1(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t1: +; CHECK: add w0, w1, w0 +; CHECK: ret + %add = add i32 %b, %a + ret i32 %add +} + +define i32 @t2(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t2: +; CHECK: udiv w0, w0, w1 +; CHECK: ret + %udiv = udiv i32 %a, %b + ret i32 %udiv +} + +define i64 @t3(i64 %a, i64 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t3: +; CHECK: udiv x0, x0, x1 +; CHECK: ret + %udiv = udiv i64 %a, %b + ret i64 %udiv +} + +define i32 @t4(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t4: +; CHECK: sdiv w0, w0, w1 +; CHECK: ret + %sdiv = sdiv i32 %a, %b + ret i32 %sdiv +} + +define i64 @t5(i64 %a, i64 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t5: +; CHECK: sdiv x0, x0, x1 +; CHECK: ret + %sdiv = sdiv i64 %a, %b + ret i64 %sdiv +} + +define i32 @t6(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t6: +; CHECK: lslv w0, w0, w1 +; CHECK: ret + %shl = shl i32 %a, %b + ret i32 %shl +} + +define i64 @t7(i64 %a, i64 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t7: +; CHECK: lslv x0, x0, x1 +; CHECK: ret + %shl = shl i64 %a, %b + ret i64 %shl +} + +define i32 @t8(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t8: +; CHECK: lsrv w0, w0, w1 +; CHECK: ret + %lshr = lshr i32 %a, %b + ret i32 %lshr +} + +define i64 @t9(i64 %a, i64 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t9: +; CHECK: lsrv x0, x0, x1 +; CHECK: ret + %lshr = lshr i64 %a, %b + ret i64 %lshr +} + +define i32 @t10(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t10: +; CHECK: asrv w0, w0, w1 +; CHECK: ret + %ashr = ashr i32 %a, %b + ret i32 %ashr +} + +define i64 @t11(i64 %a, i64 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t11: +; CHECK: asrv x0, x0, x1 +; CHECK: ret + %ashr = ashr i64 %a, %b + ret i64 %ashr +} + +define i32 @t12(i16 %a, i32 %x) nounwind ssp { +entry: +; CHECK-LABEL: t12: +; CHECK: add w0, w1, w0, sxth +; CHECK: ret + %c = sext i16 %a to i32 + %e = add i32 %x, %c + ret i32 %e +} + +define i32 @t13(i16 %a, i32 %x) nounwind ssp { +entry: +; CHECK-LABEL: t13: +; CHECK: add w0, w1, w0, sxth #2 +; CHECK: ret + %c = sext i16 %a to i32 + %d = shl i32 %c, 2 + %e = add i32 %x, %d + ret i32 %e +} + +define i64 @t14(i16 %a, i64 %x) nounwind ssp { +entry: +; CHECK-LABEL: t14: +; CHECK: add x0, x1, w0, uxth #3 +; CHECK: ret + %c = zext i16 %a to i64 + %d = shl i64 %c, 3 + %e = add i64 %x, %d + ret i64 %e +} + +; rdar://9160598 +define i64 @t15(i64 %a, i64 %x) nounwind ssp { +entry: +; CHECK-LABEL: t15: +; CHECK: add x0, x1, w0, uxtw +; CHECK: ret + %b = and i64 %a, 4294967295 + %c = add i64 %x, %b + ret i64 %c +} + +define i64 @t16(i64 %x) nounwind ssp { +entry: +; CHECK-LABEL: t16: +; CHECK: lsl x0, x0, #1 +; CHECK: ret + %a = shl i64 %x, 1 + ret i64 %a +} + +; rdar://9166974 +define i64 @t17(i16 %a, i64 %x) nounwind ssp { +entry: +; CHECK-LABEL: t17: +; CHECK: sxth [[REG:x[0-9]+]], x0 +; CHECK: sub x0, xzr, [[REG]], lsl #32 +; CHECK: ret + %tmp16 = sext i16 %a to i64 + %tmp17 = mul i64 %tmp16, -4294967296 + ret i64 %tmp17 +} + +define i32 @t18(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t18: +; CHECK: sdiv w0, w0, w1 +; CHECK: ret + %sdiv = call i32 @llvm.arm64.sdiv.i32(i32 %a, i32 %b) + ret i32 %sdiv +} + +define i64 @t19(i64 %a, i64 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t19: +; CHECK: sdiv x0, x0, x1 +; CHECK: ret + %sdiv = call i64 @llvm.arm64.sdiv.i64(i64 %a, i64 %b) + ret i64 %sdiv +} + +define i32 @t20(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t20: +; CHECK: udiv w0, w0, w1 +; CHECK: ret + %udiv = call i32 @llvm.arm64.udiv.i32(i32 %a, i32 %b) + ret i32 %udiv +} + +define i64 @t21(i64 %a, i64 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: t21: +; CHECK: udiv x0, x0, x1 +; CHECK: ret + %udiv = call i64 @llvm.arm64.udiv.i64(i64 %a, i64 %b) + ret i64 %udiv +} + +declare i32 @llvm.arm64.sdiv.i32(i32, i32) nounwind readnone +declare i64 @llvm.arm64.sdiv.i64(i64, i64) nounwind readnone +declare i32 @llvm.arm64.udiv.i32(i32, i32) nounwind readnone +declare i64 @llvm.arm64.udiv.i64(i64, i64) nounwind readnone + +; 32-bit not. +define i32 @inv_32(i32 %x) nounwind ssp { +entry: +; CHECK: inv_32 +; CHECK: mvn w0, w0 +; CHECK: ret + %inv = xor i32 %x, -1 + ret i32 %inv +} + +; 64-bit not. +define i64 @inv_64(i64 %x) nounwind ssp { +entry: +; CHECK: inv_64 +; CHECK: mvn x0, x0 +; CHECK: ret + %inv = xor i64 %x, -1 + ret i64 %inv +} + +; Multiplying by a power of two plus or minus one is better done via shift +; and add/sub rather than the madd/msub instructions. The latter are 4+ cycles, +; and the former are two (total for the two instruction sequence for subtract). +define i32 @f0(i32 %a) nounwind readnone ssp { +; CHECK-LABEL: f0: +; CHECK-NEXT: add w0, w0, w0, lsl #3 +; CHECK-NEXT: ret + %res = mul i32 %a, 9 + ret i32 %res +} + +define i64 @f1(i64 %a) nounwind readnone ssp { +; CHECK-LABEL: f1: +; CHECK-NEXT: lsl x8, x0, #4 +; CHECK-NEXT: sub x0, x8, x0 +; CHECK-NEXT: ret + %res = mul i64 %a, 15 + ret i64 %res +} + +define i32 @f2(i32 %a) nounwind readnone ssp { +; CHECK-LABEL: f2: +; CHECK-NEXT: lsl w8, w0, #3 +; CHECK-NEXT: sub w0, w8, w0 +; CHECK-NEXT: ret + %res = mul nsw i32 %a, 7 + ret i32 %res +} + +define i64 @f3(i64 %a) nounwind readnone ssp { +; CHECK-LABEL: f3: +; CHECK-NEXT: add x0, x0, x0, lsl #4 +; CHECK-NEXT: ret + %res = mul nsw i64 %a, 17 + ret i64 %res +} diff --git a/test/CodeGen/ARM64/atomic-128.ll b/test/CodeGen/ARM64/atomic-128.ll new file mode 100644 index 0000000000..a0039a3237 --- /dev/null +++ b/test/CodeGen/ARM64/atomic-128.ll @@ -0,0 +1,213 @@ +; RUN: llc < %s -march=arm64 -mtriple=arm64-linux-gnu -verify-machineinstrs | FileCheck %s + +@var = global i128 0 + +define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { +; CHECK-LABEL: val_compare_and_swap: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[RESULTLO:x[0-9]+]], [[RESULTHI:x[0-9]+]], [x0] +; CHECK: cmp [[RESULTLO]], x2 +; CHECK: sbc xzr, [[RESULTHI]], x3 +; CHECK: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] +; CHECK: stxp [[SCRATCH_RES:w[0-9]+]], x4, x5, [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] +; CHECK: [[LABEL2]]: + %val = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire + ret i128 %val +} + +define void @fetch_and_nand(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_nand: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: bic [[SCRATCH_REGLO:x[0-9]+]], x2, [[DEST_REGLO]] +; CHECK: bic [[SCRATCH_REGHI:x[0-9]+]], x3, [[DEST_REGHI]] +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw nand i128* %p, i128 %bits release + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_or(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_or: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: orr [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2 +; CHECK: orr [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3 +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw or i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_add(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_add: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: adds [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2 +; CHECK: adc [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3 +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw add i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_sub(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_sub: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: subs [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2 +; CHECK: sbc [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3 +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw sub i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_min(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_min: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: cmp [[DEST_REGLO]], x2 +; CHECK: sbc xzr, [[DEST_REGHI]], x3 +; CHECK: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, lt +; CHECK: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, lt +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw min i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_max(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_max: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: cmp [[DEST_REGLO]], x2 +; CHECK: sbc xzr, [[DEST_REGHI]], x3 +; CHECK: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, gt +; CHECK: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, gt +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw max i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_umin(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_umin: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: cmp [[DEST_REGLO]], x2 +; CHECK: sbc xzr, [[DEST_REGHI]], x3 +; CHECK: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, cc +; CHECK: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, cc +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw umin i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define void @fetch_and_umax(i128* %p, i128 %bits) { +; CHECK-LABEL: fetch_and_umax: +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0] +; CHECK: cmp [[DEST_REGLO]], x2 +; CHECK: sbc xzr, [[DEST_REGHI]], x3 +; CHECK: csel [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, hi +; CHECK: csel [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, hi +; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0] +; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]] + +; CHECK: str [[DEST_REGHI]] +; CHECK: str [[DEST_REGLO]] + %val = atomicrmw umax i128* %p, i128 %bits seq_cst + store i128 %val, i128* @var, align 16 + ret void +} + +define i128 @atomic_load_seq_cst(i128* %p) { +; CHECK-LABEL: atomic_load_seq_cst: +; CHECK-NOT: dmb +; CHECK-LABEL: ldaxp +; CHECK-NOT: dmb + %r = load atomic i128* %p seq_cst, align 16 + ret i128 %r +} + +define i128 @atomic_load_relaxed(i128* %p) { +; CHECK-LABEL: atomic_load_relaxed: +; CHECK-NOT: dmb +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0] +; CHECK: orr [[SAMELO:x[0-9]+]], [[LO]], xzr +; CHECK: orr [[SAMEHI:x[0-9]+]], [[HI]], xzr +; CHECK: stxp [[SUCCESS:w[0-9]+]], [[SAMELO]], [[SAMEHI]], [x0] +; CHECK: cbnz [[SUCCESS]], [[LABEL]] +; CHECK-NOT: dmb + %r = load atomic i128* %p monotonic, align 16 + ret i128 %r +} + + +define void @atomic_store_seq_cst(i128 %in, i128* %p) { +; CHECK-LABEL: atomic_store_seq_cst: +; CHECK-NOT: dmb +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxp xzr, xzr, [x2] +; CHECK: stlxp [[SUCCESS:w[0-9]+]], x0, x1, [x2] +; CHECK: cbnz [[SUCCESS]], [[LABEL]] +; CHECK-NOT: dmb + store atomic i128 %in, i128* %p seq_cst, align 16 + ret void +} + +define void @atomic_store_release(i128 %in, i128* %p) { +; CHECK-LABEL: atomic_store_release: +; CHECK-NOT: dmb +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxp xzr, xzr, [x2] +; CHECK: stlxp [[SUCCESS:w[0-9]+]], x0, x1, [x2] +; CHECK: cbnz [[SUCCESS]], [[LABEL]] +; CHECK-NOT: dmb + store atomic i128 %in, i128* %p release, align 16 + ret void +} + +define void @atomic_store_relaxed(i128 %in, i128* %p) { +; CHECK-LABEL: atomic_store_relaxed: +; CHECK-NOT: dmb +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxp xzr, xzr, [x2] +; CHECK: stxp [[SUCCESS:w[0-9]+]], x0, x1, [x2] +; CHECK: cbnz [[SUCCESS]], [[LABEL]] +; CHECK-NOT: dmb + store atomic i128 %in, i128* %p unordered, align 16 + ret void +} diff --git a/test/CodeGen/ARM64/atomic.ll b/test/CodeGen/ARM64/atomic.ll new file mode 100644 index 0000000000..cf8cf7d7d9 --- /dev/null +++ b/test/CodeGen/ARM64/atomic.ll @@ -0,0 +1,343 @@ +; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s + +define i32 @val_compare_and_swap(i32* %p) { +; CHECK-LABEL: val_compare_and_swap: +; CHECK: orr [[NEWVAL_REG:w[0-9]+]], wzr, #0x4 +; CHECK: orr [[OLDVAL_REG:w[0-9]+]], wzr, #0x7 +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr [[RESULT:w[0-9]+]], [x0] +; CHECK: cmp [[RESULT]], [[OLDVAL_REG]] +; CHECK: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] +; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0] +; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: [[LABEL2]]: + %val = cmpxchg i32* %p, i32 7, i32 4 acquire acquire + ret i32 %val +} + +define i64 @val_compare_and_swap_64(i64* %p) { +; CHECK-LABEL: val_compare_and_swap_64: +; CHECK: orr [[NEWVAL_REG:x[0-9]+]], xzr, #0x4 +; CHECK: orr [[OLDVAL_REG:x[0-9]+]], xzr, #0x7 +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxr [[RESULT:x[0-9]+]], [x0] +; CHECK: cmp [[RESULT]], [[OLDVAL_REG]] +; CHECK: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] +; CHECK-NOT: stxr [[NEWVAL_REG]], [[NEWVAL_REG]] +; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0] +; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: [[LABEL2]]: + %val = cmpxchg i64* %p, i64 7, i64 4 monotonic monotonic + ret i64 %val +} + +define i32 @fetch_and_nand(i32* %p) { +; CHECK-LABEL: fetch_and_nand: +; CHECK: orr [[OLDVAL_REG:w[0-9]+]], wzr, #0x7 +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxr w[[DEST_REG:[0-9]+]], [x0] +; CHECK: bic [[SCRATCH2_REG:w[0-9]+]], [[OLDVAL_REG]], w[[DEST_REG]] +; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]] +; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] +; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: mov x0, x[[DEST_REG]] + %val = atomicrmw nand i32* %p, i32 7 release + ret i32 %val +} + +define i64 @fetch_and_nand_64(i64* %p) { +; CHECK-LABEL: fetch_and_nand_64: +; CHECK: orr [[OLDVAL_REG:x[0-9]+]], xzr, #0x7 +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr [[DEST_REG:x[0-9]+]], [x0] +; CHECK: bic [[SCRATCH2_REG:x[0-9]+]], [[OLDVAL_REG]], [[DEST_REG]] +; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] +; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: mov x0, [[DEST_REG]] + %val = atomicrmw nand i64* %p, i64 7 acq_rel + ret i64 %val +} + +define i32 @fetch_and_or(i32* %p) { +; CHECK-LABEL: fetch_and_or: +; CHECK: movz [[OLDVAL_REG:w[0-9]+]], #5 +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr w[[DEST_REG:[0-9]+]], [x0] +; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], [[OLDVAL_REG]] +; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]] +; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] +; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: mov x0, x[[DEST_REG]] + %val = atomicrmw or i32* %p, i32 5 seq_cst + ret i32 %val +} + +define i64 @fetch_and_or_64(i64* %p) { +; CHECK: fetch_and_or_64: +; CHECK: orr [[OLDVAL_REG:x[0-9]+]], xzr, #0x7 +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxr [[DEST_REG:x[0-9]+]], [x0] +; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], [[OLDVAL_REG]] +; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] +; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: mov x0, [[DEST_REG]] + %val = atomicrmw or i64* %p, i64 7 monotonic + ret i64 %val +} + +define void @acquire_fence() { + fence acquire + ret void + ; CHECK-LABEL: acquire_fence: + ; CHECK: dmb ishld +} + +define void @release_fence() { + fence release + ret void + ; CHECK-LABEL: release_fence: + ; CHECK: dmb ish{{$}} +} + +define void @seq_cst_fence() { + fence seq_cst + ret void + ; CHECK-LABEL: seq_cst_fence: + ; CHECK: dmb ish{{$}} +} + +define i32 @atomic_load(i32* %p) { + %r = load atomic i32* %p seq_cst, align 4 + ret i32 %r + ; CHECK-LABEL: atomic_load: + ; CHECK: ldar +} + +define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) { +; CHECK-LABEL: atomic_load_relaxed_8: + %ptr_unsigned = getelementptr i8* %p, i32 4095 + %val_unsigned = load atomic i8* %ptr_unsigned monotonic, align 1 +; CHECK: ldrb {{w[0-9]+}}, [x0, #4095] + + %ptr_regoff = getelementptr i8* %p, i32 %off32 + %val_regoff = load atomic i8* %ptr_regoff unordered, align 1 + %tot1 = add i8 %val_unsigned, %val_regoff + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: ldrb {{w[0-9]+}}, [x0, x1, sxtw] + + %ptr_unscaled = getelementptr i8* %p, i32 -256 + %val_unscaled = load atomic i8* %ptr_unscaled monotonic, align 1 + %tot2 = add i8 %tot1, %val_unscaled +; CHECK: ldurb {{w[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm) + %val_random = load atomic i8* %ptr_random unordered, align 1 + %tot3 = add i8 %tot2, %val_random +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: ldrb {{w[0-9]+}}, [x[[ADDR]]] + + ret i8 %tot3 +} + +define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) { +; CHECK-LABEL: atomic_load_relaxed_16: + %ptr_unsigned = getelementptr i16* %p, i32 4095 + %val_unsigned = load atomic i16* %ptr_unsigned monotonic, align 2 +; CHECK: ldrh {{w[0-9]+}}, [x0, #8190] + + %ptr_regoff = getelementptr i16* %p, i32 %off32 + %val_regoff = load atomic i16* %ptr_regoff unordered, align 2 + %tot1 = add i16 %val_unsigned, %val_regoff + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: ldrh {{w[0-9]+}}, [x0, x1, sxtw #1] + + %ptr_unscaled = getelementptr i16* %p, i32 -128 + %val_unscaled = load atomic i16* %ptr_unscaled monotonic, align 2 + %tot2 = add i16 %tot1, %val_unscaled +; CHECK: ldurh {{w[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm) + %val_random = load atomic i16* %ptr_random unordered, align 2 + %tot3 = add i16 %tot2, %val_random +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: ldrh {{w[0-9]+}}, [x[[ADDR]]] + + ret i16 %tot3 +} + +define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) { +; CHECK-LABEL: atomic_load_relaxed_32: + %ptr_unsigned = getelementptr i32* %p, i32 4095 + %val_unsigned = load atomic i32* %ptr_unsigned monotonic, align 4 +; CHECK: ldr {{w[0-9]+}}, [x0, #16380] + + %ptr_regoff = getelementptr i32* %p, i32 %off32 + %val_regoff = load atomic i32* %ptr_regoff unordered, align 4 + %tot1 = add i32 %val_unsigned, %val_regoff + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: ldr {{w[0-9]+}}, [x0, x1, sxtw #2] + + %ptr_unscaled = getelementptr i32* %p, i32 -64 + %val_unscaled = load atomic i32* %ptr_unscaled monotonic, align 4 + %tot2 = add i32 %tot1, %val_unscaled +; CHECK: ldur {{w[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm) + %val_random = load atomic i32* %ptr_random unordered, align 4 + %tot3 = add i32 %tot2, %val_random +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: ldr {{w[0-9]+}}, [x[[ADDR]]] + + ret i32 %tot3 +} + +define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) { +; CHECK-LABEL: atomic_load_relaxed_64: + %ptr_unsigned = getelementptr i64* %p, i32 4095 + %val_unsigned = load atomic i64* %ptr_unsigned monotonic, align 8 +; CHECK: ldr {{x[0-9]+}}, [x0, #32760] + + %ptr_regoff = getelementptr i64* %p, i32 %off32 + %val_regoff = load atomic i64* %ptr_regoff unordered, align 8 + %tot1 = add i64 %val_unsigned, %val_regoff + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: ldr {{x[0-9]+}}, [x0, x1, sxtw #3] + + %ptr_unscaled = getelementptr i64* %p, i32 -32 + %val_unscaled = load atomic i64* %ptr_unscaled monotonic, align 8 + %tot2 = add i64 %tot1, %val_unscaled +; CHECK: ldur {{x[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm) + %val_random = load atomic i64* %ptr_random unordered, align 8 + %tot3 = add i64 %tot2, %val_random +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: ldr {{x[0-9]+}}, [x[[ADDR]]] + + ret i64 %tot3 +} + + +define void @atomc_store(i32* %p) { + store atomic i32 4, i32* %p seq_cst, align 4 + ret void + ; CHECK-LABEL: atomc_store: + ; CHECK: stlr +} + +define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) { +; CHECK-LABEL: atomic_store_relaxed_8: + %ptr_unsigned = getelementptr i8* %p, i32 4095 + store atomic i8 %val, i8* %ptr_unsigned monotonic, align 1 +; CHECK: strb {{w[0-9]+}}, [x0, #4095] + + %ptr_regoff = getelementptr i8* %p, i32 %off32 + store atomic i8 %val, i8* %ptr_regoff unordered, align 1 + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: strb {{w[0-9]+}}, [x0, x1, sxtw] + + %ptr_unscaled = getelementptr i8* %p, i32 -256 + store atomic i8 %val, i8* %ptr_unscaled monotonic, align 1 +; CHECK: sturb {{w[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm) + store atomic i8 %val, i8* %ptr_random unordered, align 1 +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: strb {{w[0-9]+}}, [x[[ADDR]]] + + ret void +} + +define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) { +; CHECK-LABEL: atomic_store_relaxed_16: + %ptr_unsigned = getelementptr i16* %p, i32 4095 + store atomic i16 %val, i16* %ptr_unsigned monotonic, align 2 +; CHECK: strh {{w[0-9]+}}, [x0, #8190] + + %ptr_regoff = getelementptr i16* %p, i32 %off32 + store atomic i16 %val, i16* %ptr_regoff unordered, align 2 + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: strh {{w[0-9]+}}, [x0, x1, sxtw #1] + + %ptr_unscaled = getelementptr i16* %p, i32 -128 + store atomic i16 %val, i16* %ptr_unscaled monotonic, align 2 +; CHECK: sturh {{w[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm) + store atomic i16 %val, i16* %ptr_random unordered, align 2 +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: strh {{w[0-9]+}}, [x[[ADDR]]] + + ret void +} + +define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) { +; CHECK-LABEL: atomic_store_relaxed_32: + %ptr_unsigned = getelementptr i32* %p, i32 4095 + store atomic i32 %val, i32* %ptr_unsigned monotonic, align 4 +; CHECK: str {{w[0-9]+}}, [x0, #16380] + + %ptr_regoff = getelementptr i32* %p, i32 %off32 + store atomic i32 %val, i32* %ptr_regoff unordered, align 4 + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: str {{w[0-9]+}}, [x0, x1, sxtw #2] + + %ptr_unscaled = getelementptr i32* %p, i32 -64 + store atomic i32 %val, i32* %ptr_unscaled monotonic, align 4 +; CHECK: stur {{w[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm) + store atomic i32 %val, i32* %ptr_random unordered, align 4 +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: str {{w[0-9]+}}, [x[[ADDR]]] + + ret void +} + +define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) { +; CHECK-LABEL: atomic_store_relaxed_64: + %ptr_unsigned = getelementptr i64* %p, i32 4095 + store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8 +; CHECK: str {{x[0-9]+}}, [x0, #32760] + + %ptr_regoff = getelementptr i64* %p, i32 %off32 + store atomic i64 %val, i64* %ptr_regoff unordered, align 8 + ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg. +; CHECK: str {{x[0-9]+}}, [x0, x1, sxtw #3] + + %ptr_unscaled = getelementptr i64* %p, i32 -32 + store atomic i64 %val, i64* %ptr_unscaled monotonic, align 8 +; CHECK: stur {{x[0-9]+}}, [x0, #-256] + + %ptr_random = getelementptr i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm) + store atomic i64 %val, i64* %ptr_random unordered, align 8 +; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936 +; CHECK: str {{x[0-9]+}}, [x[[ADDR]]] + + ret void +} + +; rdar://11531169 +; rdar://11531308 + +%"class.X::Atomic" = type { %struct.x_atomic_t } +%struct.x_atomic_t = type { i32 } + +@counter = external hidden global %"class.X::Atomic", align 4 + +define i32 @next_id() nounwind optsize ssp align 2 { +entry: + %0 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic"* @counter, i64 0, i32 0, i32 0), i32 1 seq_cst + %add.i = add i32 %0, 1 + %tobool = icmp eq i32 %add.i, 0 + br i1 %tobool, label %if.else, label %return + +if.else: ; preds = %entry + %1 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic"* @counter, i64 0, i32 0, i32 0), i32 1 seq_cst + %add.i2 = add i32 %1, 1 + br label %return + +return: ; preds = %if.else, %entry + %retval.0 = phi i32 [ %add.i2, %if.else ], [ %add.i, %entry ] + ret i32 %retval.0 +} diff --git a/test/CodeGen/ARM64/big-imm-offsets.ll b/test/CodeGen/ARM64/big-imm-offsets.ll new file mode 100644 index 0000000000..a56df07a49 --- /dev/null +++ b/test/CodeGen/ARM64/big-imm-offsets.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=arm64 < %s + + +; Make sure large offsets aren't mistaken for valid immediate offsets. +; +define void @f(i32* nocapture %p) { +entry: + %a = ptrtoint i32* %p to i64 + %ao = add i64 %a, 25769803792 + %b = inttoptr i64 %ao to i32* + store volatile i32 0, i32* %b, align 4 + store volatile i32 0, i32* %b, align 4 + ret void +} diff --git a/test/CodeGen/ARM64/big-stack.ll b/test/CodeGen/ARM64/big-stack.ll new file mode 100644 index 0000000000..56ca30c17b --- /dev/null +++ b/test/CodeGen/ARM64/big-stack.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s | FileCheck %s +target triple = "arm64-apple-macosx10" + +; Check that big stacks are generated correctly. +; Currently, this is done by a sequence of sub instructions, +; which can encode immediate with a 12 bits mask an optionally +; shift left (up to 12). I.e., 16773120 is the biggest value. +; +; CHECK-LABEL: foo: +; CHECK: sub sp, sp, #16773120 +; CHECK: sub sp, sp, #16773120 +; CHECK: sub sp, sp, #8192 +define void @foo() nounwind ssp { +entry: + %buffer = alloca [33554432 x i8], align 1 + %arraydecay = getelementptr inbounds [33554432 x i8]* %buffer, i64 0, i64 0 + call void @doit(i8* %arraydecay) nounwind + ret void +} + +declare void @doit(i8*) diff --git a/test/CodeGen/ARM64/bitfield-extract.ll b/test/CodeGen/ARM64/bitfield-extract.ll new file mode 100644 index 0000000000..96b6967a97 --- /dev/null +++ b/test/CodeGen/ARM64/bitfield-extract.ll @@ -0,0 +1,406 @@ +; RUN: llc < %s -march=arm64 | FileCheck %s +%struct.X = type { i8, i8, [2 x i8] } +%struct.Y = type { i32, i8 } +%struct.Z = type { i8, i8, [2 x i8], i16 } +%struct.A = type { i64, i8 } + +define void @foo(%struct.X* nocapture %x, %struct.Y* nocapture %y) nounwind optsize ssp { +; CHECK-LABEL: foo: +; CHECK: ubfm +; CHECK-NOT: and +; CHECK: ret + + %tmp = bitcast %struct.X* %x to i32* + %tmp1 = load i32* %tmp, align 4 + %b = getelementptr inbounds %struct.Y* %y, i64 0, i32 1 + %bf.clear = lshr i32 %tmp1, 3 + %bf.clear.lobit = and i32 %bf.clear, 1 + %frombool = trunc i32 %bf.clear.lobit to i8 + store i8 %frombool, i8* %b, align 1 + ret void +} + +define i32 @baz(i64 %cav1.coerce) nounwind { +; CHECK-LABEL: baz: +; CHECK: sbfm w0, w0, #0, #3 + %tmp = trunc i64 %cav1.coerce to i32 + %tmp1 = shl i32 %tmp, 28 + %bf.val.sext = ashr exact i32 %tmp1, 28 + ret i32 %bf.val.sext +} + +define i32 @bar(i64 %cav1.coerce) nounwind { +; CHECK-LABEL: bar: +; CHECK: sbfm w0, w0, #4, #9 + %tmp = trunc i64 %cav1.coerce to i32 + %cav1.sroa.0.1.insert = shl i32 %tmp, 22 + %tmp1 = ashr i32 %cav1.sroa.0.1.insert, 26 + ret i32 %tmp1 +} + +define void @fct1(%struct.Z* nocapture %x, %struct.A* nocapture %y) nounwind optsize ssp { +; CHECK-LABEL: fct1: +; CHECK: ubfm +; CHECK-NOT: and +; CHECK: ret + + %tmp = bitcast %struct.Z* %x to i64* + %tmp1 = load i64* %tmp, align 4 + %b = getelementptr inbounds %struct.A* %y, i64 0, i32 0 + %bf.clear = lshr i64 %tmp1, 3 + %bf.clear.lobit = and i64 %bf.clear, 1 + store i64 %bf.clear.lobit, i64* %b, align 8 + ret void +} + +define i64 @fct2(i64 %cav1.coerce) nounwind { +; CHECK-LABEL: fct2: +; CHECK: sbfm x0, x0, #0, #35 + %tmp = shl i64 %cav1.coerce, 28 + %bf.val.sext = ashr exact i64 %tmp, 28 + ret i64 %bf.val.sext +} + +define i64 @fct3(i64 %cav1.coerce) nounwind { +; CHECK-LABEL: fct3: +; CHECK: sbfm x0, x0, #4, #41 + %cav1.sroa.0.1.insert = shl i64 %cav1.coerce, 22 + %tmp1 = ashr i64 %cav1.sroa.0.1.insert, 26 + ret i64 %tmp1 +} + +define void @fct4(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct4: +; CHECK: ldr [[REG1:x[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], x1, #16, #39 +; CHECK-NEXT: str [[REG1]], +; CHECK-NEXT: ret + %0 = load i64* %y, align 8 + %and = and i64 %0, -16777216 + %shr = lshr i64 %x, 16 + %and1 = and i64 %shr, 16777215 + %or = or i64 %and, %and1 + store i64 %or, i64* %y, align 8 + ret void +} + +define void @fct5(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct5: +; CHECK: ldr [[REG1:w[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], w1, #16, #18 +; CHECK-NEXT: str [[REG1]], +; CHECK-NEXT: ret + %0 = load i32* %y, align 8 + %and = and i32 %0, -8 + %shr = lshr i32 %x, 16 + %and1 = and i32 %shr, 7 + %or = or i32 %and, %and1 + store i32 %or, i32* %y, align 8 + ret void +} + +; Check if we can still catch bfm instruction when we drop some low bits +define void @fct6(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct6: +; CHECK: ldr [[REG1:w[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], w1, #16, #18 +; lsr is an alias of ubfm +; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #2 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i32* %y, align 8 + %and = and i32 %0, -8 + %shr = lshr i32 %x, 16 + %and1 = and i32 %shr, 7 + %or = or i32 %and, %and1 + %shr1 = lshr i32 %or, 2 + store i32 %shr1, i32* %y, align 8 + ret void +} + + +; Check if we can still catch bfm instruction when we drop some high bits +define void @fct7(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct7: +; CHECK: ldr [[REG1:w[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], w1, #16, #18 +; lsl is an alias of ubfm +; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i32* %y, align 8 + %and = and i32 %0, -8 + %shr = lshr i32 %x, 16 + %and1 = and i32 %shr, 7 + %or = or i32 %and, %and1 + %shl = shl i32 %or, 2 + store i32 %shl, i32* %y, align 8 + ret void +} + + +; Check if we can still catch bfm instruction when we drop some low bits +; (i64 version) +define void @fct8(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct8: +; CHECK: ldr [[REG1:x[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], x1, #16, #18 +; lsr is an alias of ubfm +; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #2 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i64* %y, align 8 + %and = and i64 %0, -8 + %shr = lshr i64 %x, 16 + %and1 = and i64 %shr, 7 + %or = or i64 %and, %and1 + %shr1 = lshr i64 %or, 2 + store i64 %shr1, i64* %y, align 8 + ret void +} + + +; Check if we can still catch bfm instruction when we drop some high bits +; (i64 version) +define void @fct9(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct9: +; CHECK: ldr [[REG1:x[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], x1, #16, #18 +; lsr is an alias of ubfm +; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i64* %y, align 8 + %and = and i64 %0, -8 + %shr = lshr i64 %x, 16 + %and1 = and i64 %shr, 7 + %or = or i64 %and, %and1 + %shl = shl i64 %or, 2 + store i64 %shl, i64* %y, align 8 + ret void +} + +; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr) +; (i32 version) +define void @fct10(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct10: +; CHECK: ldr [[REG1:w[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], w1, #0, #2 +; lsl is an alias of ubfm +; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i32* %y, align 8 + %and = and i32 %0, -8 + %and1 = and i32 %x, 7 + %or = or i32 %and, %and1 + %shl = shl i32 %or, 2 + store i32 %shl, i32* %y, align 8 + ret void +} + +; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr) +; (i64 version) +define void @fct11(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct11: +; CHECK: ldr [[REG1:x[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], x1, #0, #2 +; lsl is an alias of ubfm +; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i64* %y, align 8 + %and = and i64 %0, -8 + %and1 = and i64 %x, 7 + %or = or i64 %and, %and1 + %shl = shl i64 %or, 2 + store i64 %shl, i64* %y, align 8 + ret void +} + +define zeroext i1 @fct12bis(i32 %tmp2) unnamed_addr nounwind ssp align 2 { +; CHECK-LABEL: fct12bis: +; CHECK-NOT: and +; CHECK: ubfm w0, w0, #11, #11 + %and.i.i = and i32 %tmp2, 2048 + %tobool.i.i = icmp ne i32 %and.i.i, 0 + ret i1 %tobool.i.i +} + +; Check if we can still catch bfm instruction when we drop some high bits +; and some low bits +define void @fct12(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct12: +; CHECK: ldr [[REG1:w[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], w1, #16, #18 +; lsr is an alias of ubfm +; CHECK-NEXT: ubfm [[REG2:w[0-9]+]], [[REG1]], #2, #29 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i32* %y, align 8 + %and = and i32 %0, -8 + %shr = lshr i32 %x, 16 + %and1 = and i32 %shr, 7 + %or = or i32 %and, %and1 + %shl = shl i32 %or, 2 + %shr2 = lshr i32 %shl, 4 + store i32 %shr2, i32* %y, align 8 + ret void +} + +; Check if we can still catch bfm instruction when we drop some high bits +; and some low bits +; (i64 version) +define void @fct13(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct13: +; CHECK: ldr [[REG1:x[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], x1, #16, #18 +; lsr is an alias of ubfm +; CHECK-NEXT: ubfm [[REG2:x[0-9]+]], [[REG1]], #2, #61 +; CHECK-NEXT: str [[REG2]], +; CHECK-NEXT: ret + %0 = load i64* %y, align 8 + %and = and i64 %0, -8 + %shr = lshr i64 %x, 16 + %and1 = and i64 %shr, 7 + %or = or i64 %and, %and1 + %shl = shl i64 %or, 2 + %shr2 = lshr i64 %shl, 4 + store i64 %shr2, i64* %y, align 8 + ret void +} + + +; Check if we can still catch bfm instruction when we drop some high bits +; and some low bits +define void @fct14(i32* nocapture %y, i32 %x, i32 %x1) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct14: +; CHECK: ldr [[REG1:w[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], w1, #16, #23 +; lsr is an alias of ubfm +; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #4 +; CHECK-NEXT: bfm [[REG2]], w2, #5, #7 +; lsl is an alias of ubfm +; CHECK-NEXT: lsl [[REG3:w[0-9]+]], [[REG2]], #2 +; CHECK-NEXT: str [[REG3]], +; CHECK-NEXT: ret + %0 = load i32* %y, align 8 + %and = and i32 %0, -256 + %shr = lshr i32 %x, 16 + %and1 = and i32 %shr, 255 + %or = or i32 %and, %and1 + %shl = lshr i32 %or, 4 + %and2 = and i32 %shl, -8 + %shr1 = lshr i32 %x1, 5 + %and3 = and i32 %shr1, 7 + %or1 = or i32 %and2, %and3 + %shl1 = shl i32 %or1, 2 + store i32 %shl1, i32* %y, align 8 + ret void +} + +; Check if we can still catch bfm instruction when we drop some high bits +; and some low bits +; (i64 version) +define void @fct15(i64* nocapture %y, i64 %x, i64 %x1) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct15: +; CHECK: ldr [[REG1:x[0-9]+]], +; CHECK-NEXT: bfm [[REG1]], x1, #16, #23 +; lsr is an alias of ubfm +; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #4 +; CHECK-NEXT: bfm [[REG2]], x2, #5, #7 +; lsl is an alias of ubfm +; CHECK-NEXT: lsl [[REG3:x[0-9]+]], [[REG2]], #2 +; CHECK-NEXT: str [[REG3]], +; CHECK-NEXT: ret + %0 = load i64* %y, align 8 + %and = and i64 %0, -256 + %shr = lshr i64 %x, 16 + %and1 = and i64 %shr, 255 + %or = or i64 %and, %and1 + %shl = lshr i64 %or, 4 + %and2 = and i64 %shl, -8 + %shr1 = lshr i64 %x1, 5 + %and3 = and i64 %shr1, 7 + %or1 = or i64 %and2, %and3 + %shl1 = shl i64 %or1, 2 + store i64 %shl1, i64* %y, align 8 + ret void +} + +; Check if we can still catch bfm instruction when we drop some high bits +; and some low bits and a masking operation has to be kept +define void @fct16(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct16: +; CHECK: ldr [[REG1:w[0-9]+]], +; Create the constant +; CHECK: movz [[REGCST:w[0-9]+]], #26, lsl #16 +; CHECK: movk [[REGCST]], #33120 +; Do the masking +; CHECK: and [[REG2:w[0-9]+]], [[REG1]], [[REGCST]] +; CHECK-NEXT: bfm [[REG2]], w1, #16, #18 +; lsr is an alias of ubfm +; CHECK-NEXT: ubfm [[REG3:w[0-9]+]], [[REG2]], #2, #29 +; CHECK-NEXT: str [[REG3]], +; CHECK-NEXT: ret + %0 = load i32* %y, align 8 + %and = and i32 %0, 1737056 + %shr = lshr i32 %x, 16 + %and1 = and i32 %shr, 7 + %or = or i32 %and, %and1 + %shl = shl i32 %or, 2 + %shr2 = lshr i32 %shl, 4 + store i32 %shr2, i32* %y, align 8 + ret void +} + + +; Check if we can still catch bfm instruction when we drop some high bits +; and some low bits and a masking operation has to be kept +; (i64 version) +define void @fct17(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp { +entry: +; CHECK-LABEL: fct17: +; CHECK: ldr [[REG1:x[0-9]+]], +; Create the constant +; CHECK: movz [[REGCST:x[0-9]+]], #26, lsl #16 +; CHECK: movk [[REGCST]], #33120 +; Do the masking +; CHECK: and [[REG2:x[0-9]+]], [[REG1]], [[REGCST]] +; CHECK-NEXT: bfm [[REG2]], x1, #16, #18 +; lsr is an alias of ubfm +; CHECK-NEXT: ubfm [[REG3:x[0-9]+]], [[REG2]], #2, #61 +; CHECK-NEXT: str [[REG3]], +; CHECK-NEXT: ret + %0 = load i64* %y, align 8 + %and = and i64 %0, 1737056 + %shr = lshr i64 %x, 16 + %and1 = and i64 %shr, 7 + %or = or i64 %and, %and1 + %shl = shl i64 %or, 2 + %shr2 = lshr i64 %shl, 4 + store i64 %shr2, i64* %y, align 8 + ret void +} + +define i64 @fct18(i32 %xor72) nounwind ssp { +; CHECK-LABEL: fct18: +; CHECK: ubfm x0, x0, #9, #16 + %shr81 = lshr i32 %xor72, 9 + %conv82 = zext i32 %shr81 to i64 + %result = and i64 %conv82, 255 + ret i64 %result +} diff --git a/test/CodeGen/ARM64/blockaddress.ll b/test/CodeGen/ARM64/blockaddress.ll new file mode 100644 index 0000000000..ac4f19e65d --- /dev/null +++ b/test/CodeGen/ARM64/blockaddress.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s --check-prefix=CHECK-LINUX +; RUN: llc < %s -mtriple=arm64-linux-gnu -code-model=large| FileCheck %s --check-prefix=CHECK-LARGE + +; rdar://9188695 + +define i64 @t() nounwind ssp { +entry: +; CHECK-LABEL: t: +; CHECK: adrp [[REG:x[0-9]+]], Ltmp1@PAGE +; CHECK: add {{x[0-9]+}}, [[REG]], Ltmp1@PAGEOFF + +; CHECK-LINUX-LABEL: t: +; CHECK-LINUX: adrp [[REG:x[0-9]+]], .Ltmp1 +; CHECK-LINUX: add {{x[0-9]+}}, [[REG]], :lo12:.Ltmp1 + +; CHECK-LARGE-LABEL: t: +; CHECK-LARGE: movz [[ADDR_REG:x[0-9]+]], #:abs_g3:[[DEST_LBL:.Ltmp[0-9]+]] +; CHECK-LARGE: movk [[ADDR_REG]], #:abs_g2_nc:[[DEST_LBL]] +; CHECK-LARGE: movk [[ADDR_REG]], #:abs_g1_nc:[[DEST_LBL]] +; CHECK-LARGE: movk [[ADDR_REG]], #:abs_g0_nc:[[DEST_LBL]] + + %recover = alloca i64, align 8 + store volatile i64 ptrtoint (i8* blockaddress(@t, %mylabel) to i64), i64* %recover, align 8 + br label %mylabel + +mylabel: + %tmp = load volatile i64* %recover, align 8 + ret i64 %tmp +} diff --git a/test/CodeGen/ARM64/build-vector.ll b/test/CodeGen/ARM64/build-vector.ll new file mode 100644 index 0000000000..1d137ae6e6 --- /dev/null +++ b/test/CodeGen/ARM64/build-vector.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s + +; Check that building up a vector w/ only one non-zero lane initializes +; intelligently. +define void @one_lane(i32* nocapture %out_int, i32 %skip0) nounwind { +; CHECK-LABEL: one_lane: +; CHECK: dup.16b v[[REG:[0-9]+]], wzr +; CHECK-NEXT: ins.b v[[REG]][0], w1 +; v and q are aliases, and str is prefered against st.16b when possible +; rdar://11246289 +; CHECK: str q[[REG]], [x0] +; CHECK: ret + %conv = trunc i32 %skip0 to i8 + %vset_lane = insertelement <16 x i8> , i8 %conv, i32 0 + %tmp = bitcast i32* %out_int to <4 x i32>* + %tmp1 = bitcast <16 x i8> %vset_lane to <4 x i32> + store <4 x i32> %tmp1, <4 x i32>* %tmp, align 16 + ret void +} + +; Check that building a vector from floats doesn't insert an unnecessary +; copy for lane zero. +define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind { +; CHECK-LABEL: foo: +; CHECK-NOT: ins.s v0[0], v0[0] +; CHECK: ins.s v0[1], v1[0] +; CHECK: ins.s v0[2], v2[0] +; CHECK: ins.s v0[3], v3[0] +; CHECK: ret + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float %b, i32 1 + %3 = insertelement <4 x float> %2, float %c, i32 2 + %4 = insertelement <4 x float> %3, float %d, i32 3 + ret <4 x float> %4 +} diff --git a/test/CodeGen/ARM64/call-tailcalls.ll b/test/CodeGen/ARM64/call-tailcalls.ll new file mode 100644 index 0000000000..487c1d9bec --- /dev/null +++ b/test/CodeGen/ARM64/call-tailcalls.ll @@ -0,0 +1,91 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s + +@t = weak global i32 ()* null +@x = external global i32, align 4 + +define void @t2() { +; CHECK-LABEL: t2: +; CHECK: adrp x[[GOTADDR:[0-9]+]], _t@GOTPAGE +; CHECK: ldr x[[ADDR:[0-9]+]], [x[[GOTADDR]], _t@GOTPAGEOFF] +; CHECK: ldr x[[DEST:[0-9]+]], [x[[ADDR]]] +; CHECK: br x[[DEST]] + %tmp = load i32 ()** @t + %tmp.upgrd.2 = tail call i32 %tmp() + ret void +} + +define void @t3() { +; CHECK-LABEL: t3: +; CHECK: b _t2 + tail call void @t2() + ret void +} + +define double @t4(double %a) nounwind readonly ssp { +; CHECK-LABEL: t4: +; CHECK: b _sin + %tmp = tail call double @sin(double %a) nounwind readonly + ret double %tmp +} + +define float @t5(float %a) nounwind readonly ssp { +; CHECK-LABEL: t5: +; CHECK: b _sinf + %tmp = tail call float @sinf(float %a) nounwind readonly + ret float %tmp +} + +define void @t7() nounwind { +; CHECK-LABEL: t7: +; CHECK: b _foo +; CHECK: b _bar + + br i1 undef, label %bb, label %bb1.lr.ph + +bb1.lr.ph: ; preds = %entry + tail call void @bar() nounwind + ret void + +bb: ; preds = %entry + tail call void @foo() nounwind + ret void +} + +define i32 @t8(i32 %x) nounwind ssp { +; CHECK-LABEL: t8: +; CHECK: b _a +; CHECK: b _b +; CHECK: b _c + %and = and i32 %x, 1 + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %call = tail call i32 @a(i32 %x) nounwind + br label %return + +if.end: ; preds = %entry + %and1 = and i32 %x, 2 + %tobool2 = icmp eq i32 %and1, 0 + br i1 %tobool2, label %if.end5, label %if.then3 + +if.then3: ; preds = %if.end + %call4 = tail call i32 @b(i32 %x) nounwind + br label %return + +if.end5: ; preds = %if.end + %call6 = tail call i32 @c(i32 %x) nounwind + br label %return + +return: ; preds = %if.end5, %if.then3, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ %call4, %if.then3 ], [ %call6, %if.end5 ] + ret i32 %retval.0 +} + +declare float @sinf(float) nounwind readonly +declare double @sin(double) nounwind readonly +declare void @bar() nounwind +declare void @foo() nounwind +declare i32 @a(i32) +declare i32 @b(i32) +declare i32 @c(i32) diff --git a/test/CodeGen/ARM64/cast-opt.ll b/test/CodeGen/ARM64/cast-opt.ll new file mode 100644 index 0000000000..3d7f25773a --- /dev/null +++ b/test/CodeGen/ARM64/cast-opt.ll @@ -0,0 +1,31 @@ +; RUN: llc -O3 -march=arm64 -mtriple arm64-apple-ios5.0.0 < %s | FileCheck %s +; +; Zero truncation is not necessary when the values are extended properly +; already. + +@block = common global i8* null, align 8 + +define zeroext i8 @foo(i32 %i1, i32 %i2) { +; CHECK-LABEL: foo: +; CHECK: csinc +; CHECK-NOT: and +entry: + %idxprom = sext i32 %i1 to i64 + %0 = load i8** @block, align 8 + %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom + %1 = load i8* %arrayidx, align 1 + %idxprom1 = sext i32 %i2 to i64 + %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1 + %2 = load i8* %arrayidx2, align 1 + %cmp = icmp eq i8 %1, %2 + br i1 %cmp, label %return, label %if.then + +if.then: ; preds = %entry + %cmp7 = icmp ugt i8 %1, %2 + %conv9 = zext i1 %cmp7 to i8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i8 [ %conv9, %if.then ], [ 1, %entry ] + ret i8 %retval.0 +} diff --git a/test/CodeGen/ARM64/ccmp-heuristics.ll b/test/CodeGen/ARM64/ccmp-heuristics.ll new file mode 100644 index 0000000000..5575997e53 --- /dev/null +++ b/test/CodeGen/ARM64/ccmp-heuristics.ll @@ -0,0 +1,190 @@ +; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -arm64-ccmp | FileCheck %s +target triple = "arm64-apple-ios7.0.0" + +@channelColumns = external global i64 +@channelTracks = external global i64 +@mazeRoute = external hidden unnamed_addr global i8*, align 8 +@TOP = external global i64* +@BOT = external global i64* +@netsAssign = external global i64* + +; Function from yacr2/maze.c +; The branch at the end of %if.then is driven by %cmp5 and %cmp6. +; Isel converts the and i1 into two branches, and arm64-ccmp should not convert +; it back again. %cmp6 has much higher latency than %cmp5. +; CHECK: Maze1 +; CHECK: %if.then +; CHECK: cmp x{{[0-9]+}}, #2 +; CHECK-NEXT b.cc +; CHECK: %if.then +; CHECK: cmp x{{[0-9]+}}, #2 +; CHECK-NEXT b.cc +define i32 @Maze1() nounwind ssp { +entry: + %0 = load i64* @channelColumns, align 8, !tbaa !0 + %cmp90 = icmp eq i64 %0, 0 + br i1 %cmp90, label %for.end, label %for.body + +for.body: ; preds = %for.inc, %entry + %1 = phi i64 [ %0, %entry ], [ %37, %for.inc ] + %i.092 = phi i64 [ 1, %entry ], [ %inc53, %for.inc ] + %numLeft.091 = phi i32 [ 0, %entry ], [ %numLeft.1, %for.inc ] + %2 = load i8** @mazeRoute, align 8, !tbaa !3 + %arrayidx = getelementptr inbounds i8* %2, i64 %i.092 + %3 = load i8* %arrayidx, align 1, !tbaa !1 + %tobool = icmp eq i8 %3, 0 + br i1 %tobool, label %for.inc, label %if.then + +if.then: ; preds = %for.body + %4 = load i64** @TOP, align 8, !tbaa !3 + %arrayidx1 = getelementptr inbounds i64* %4, i64 %i.092 + %5 = load i64* %arrayidx1, align 8, !tbaa !0 + %6 = load i64** @netsAssign, align 8, !tbaa !3 + %arrayidx2 = getelementptr inbounds i64* %6, i64 %5 + %7 = load i64* %arrayidx2, align 8, !tbaa !0 + %8 = load i64** @BOT, align 8, !tbaa !3 + %arrayidx3 = getelementptr inbounds i64* %8, i64 %i.092 + %9 = load i64* %arrayidx3, align 8, !tbaa !0 + %arrayidx4 = getelementptr inbounds i64* %6, i64 %9 + %10 = load i64* %arrayidx4, align 8, !tbaa !0 + %cmp5 = icmp ugt i64 %i.092, 1 + %cmp6 = icmp ugt i64 %10, 1 + %or.cond = and i1 %cmp5, %cmp6 + br i1 %or.cond, label %land.lhs.true7, label %if.else + +land.lhs.true7: ; preds = %if.then + %11 = load i64* @channelTracks, align 8, !tbaa !0 + %add = add i64 %11, 1 + %call = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 %add, i64 %10, i64 0, i64 %7, i32 -1, i32 -1) + %tobool8 = icmp eq i32 %call, 0 + br i1 %tobool8, label %land.lhs.true7.if.else_crit_edge, label %if.then9 + +land.lhs.true7.if.else_crit_edge: ; preds = %land.lhs.true7 + %.pre = load i64* @channelColumns, align 8, !tbaa !0 + br label %if.else + +if.then9: ; preds = %land.lhs.true7 + %12 = load i8** @mazeRoute, align 8, !tbaa !3 + %arrayidx10 = getelementptr inbounds i8* %12, i64 %i.092 + store i8 0, i8* %arrayidx10, align 1, !tbaa !1 + %13 = load i64** @TOP, align 8, !tbaa !3 + %arrayidx11 = getelementptr inbounds i64* %13, i64 %i.092 + %14 = load i64* %arrayidx11, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %14) + %15 = load i64** @BOT, align 8, !tbaa !3 + %arrayidx12 = getelementptr inbounds i64* %15, i64 %i.092 + %16 = load i64* %arrayidx12, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %16) + br label %for.inc + +if.else: ; preds = %land.lhs.true7.if.else_crit_edge, %if.then + %17 = phi i64 [ %.pre, %land.lhs.true7.if.else_crit_edge ], [ %1, %if.then ] + %cmp13 = icmp ult i64 %i.092, %17 + %or.cond89 = and i1 %cmp13, %cmp6 + br i1 %or.cond89, label %land.lhs.true16, label %if.else24 + +land.lhs.true16: ; preds = %if.else + %18 = load i64* @channelTracks, align 8, !tbaa !0 + %add17 = add i64 %18, 1 + %call18 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 %add17, i64 %10, i64 0, i64 %7, i32 1, i32 -1) + %tobool19 = icmp eq i32 %call18, 0 + br i1 %tobool19, label %if.else24, label %if.then20 + +if.then20: ; preds = %land.lhs.true16 + %19 = load i8** @mazeRoute, align 8, !tbaa !3 + %arrayidx21 = getelementptr inbounds i8* %19, i64 %i.092 + store i8 0, i8* %arrayidx21, align 1, !tbaa !1 + %20 = load i64** @TOP, align 8, !tbaa !3 + %arrayidx22 = getelementptr inbounds i64* %20, i64 %i.092 + %21 = load i64* %arrayidx22, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %21) + %22 = load i64** @BOT, align 8, !tbaa !3 + %arrayidx23 = getelementptr inbounds i64* %22, i64 %i.092 + %23 = load i64* %arrayidx23, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %23) + br label %for.inc + +if.else24: ; preds = %land.lhs.true16, %if.else + br i1 %cmp5, label %land.lhs.true26, label %if.else36 + +land.lhs.true26: ; preds = %if.else24 + %24 = load i64* @channelTracks, align 8, !tbaa !0 + %cmp27 = icmp ult i64 %7, %24 + br i1 %cmp27, label %land.lhs.true28, label %if.else36 + +land.lhs.true28: ; preds = %land.lhs.true26 + %add29 = add i64 %24, 1 + %call30 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 0, i64 %7, i64 %add29, i64 %10, i32 -1, i32 1) + %tobool31 = icmp eq i32 %call30, 0 + br i1 %tobool31, label %if.else36, label %if.then32 + +if.then32: ; preds = %land.lhs.true28 + %25 = load i8** @mazeRoute, align 8, !tbaa !3 + %arrayidx33 = getelementptr inbounds i8* %25, i64 %i.092 + store i8 0, i8* %arrayidx33, align 1, !tbaa !1 + %26 = load i64** @TOP, align 8, !tbaa !3 + %arrayidx34 = getelementptr inbounds i64* %26, i64 %i.092 + %27 = load i64* %arrayidx34, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %27) + %28 = load i64** @BOT, align 8, !tbaa !3 + %arrayidx35 = getelementptr inbounds i64* %28, i64 %i.092 + %29 = load i64* %arrayidx35, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %29) + br label %for.inc + +if.else36: ; preds = %land.lhs.true28, %land.lhs.true26, %if.else24 + %30 = load i64* @channelColumns, align 8, !tbaa !0 + %cmp37 = icmp ult i64 %i.092, %30 + br i1 %cmp37, label %land.lhs.true38, label %if.else48 + +land.lhs.true38: ; preds = %if.else36 + %31 = load i64* @channelTracks, align 8, !tbaa !0 + %cmp39 = icmp ult i64 %7, %31 + br i1 %cmp39, label %land.lhs.true40, label %if.else48 + +land.lhs.true40: ; preds = %land.lhs.true38 + %add41 = add i64 %31, 1 + %call42 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 0, i64 %7, i64 %add41, i64 %10, i32 1, i32 1) + %tobool43 = icmp eq i32 %call42, 0 + br i1 %tobool43, label %if.else48, label %if.then44 + +if.then44: ; preds = %land.lhs.true40 + %32 = load i8** @mazeRoute, align 8, !tbaa !3 + %arrayidx45 = getelementptr inbounds i8* %32, i64 %i.092 + store i8 0, i8* %arrayidx45, align 1, !tbaa !1 + %33 = load i64** @TOP, align 8, !tbaa !3 + %arrayidx46 = getelementptr inbounds i64* %33, i64 %i.092 + %34 = load i64* %arrayidx46, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %34) + %35 = load i64** @BOT, align 8, !tbaa !3 + %arrayidx47 = getelementptr inbounds i64* %35, i64 %i.092 + %36 = load i64* %arrayidx47, align 8, !tbaa !0 + tail call fastcc void @CleanNet(i64 %36) + br label %for.inc + +if.else48: ; preds = %land.lhs.true40, %land.lhs.true38, %if.else36 + %inc = add nsw i32 %numLeft.091, 1 + br label %for.inc + +for.inc: ; preds = %if.else48, %if.then44, %if.then32, %if.then20, %if.then9, %for.body + %numLeft.1 = phi i32 [ %numLeft.091, %if.then9 ], [ %numLeft.091, %if.then20 ], [ %numLeft.091, %if.then32 ], [ %numLeft.091, %if.then44 ], [ %inc, %if.else48 ], [ %numLeft.091, %for.body ] + %inc53 = add i64 %i.092, 1 + %37 = load i64* @channelColumns, align 8, !tbaa !0 + %cmp = icmp ugt i64 %inc53, %37 + br i1 %cmp, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + %numLeft.0.lcssa = phi i32 [ 0, %entry ], [ %numLeft.1, %for.inc ] + ret i32 %numLeft.0.lcssa +} + +; Materializable +declare hidden fastcc i32 @Maze1Mech(i64, i64, i64, i64, i64, i32, i32) nounwind ssp + +; Materializable +declare hidden fastcc void @CleanNet(i64) nounwind ssp + +!0 = metadata !{metadata !"long", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"any pointer", metadata !1} diff --git a/test/CodeGen/ARM64/ccmp.ll b/test/CodeGen/ARM64/ccmp.ll new file mode 100644 index 0000000000..79e6f94e3f --- /dev/null +++ b/test/CodeGen/ARM64/ccmp.ll @@ -0,0 +1,289 @@ +; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -arm64-ccmp -arm64-stress-ccmp | FileCheck %s +target triple = "arm64-apple-ios" + +; CHECK: single_same +; CHECK: cmp w0, #5 +; CHECK-NEXT: ccmp w1, #17, #4, ne +; CHECK-NEXT: b.ne +; CHECK: %if.then +; CHECK: bl _foo +; CHECK: %if.end +define i32 @single_same(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp eq i32 %a, 5 + %cmp1 = icmp eq i32 %b, 17 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} + +; Different condition codes for the two compares. +; CHECK: single_different +; CHECK: cmp w0, #6 +; CHECK-NEXT: ccmp w1, #17, #0, ge +; CHECK-NEXT: b.eq +; CHECK: %if.then +; CHECK: bl _foo +; CHECK: %if.end +define i32 @single_different(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp sle i32 %a, 5 + %cmp1 = icmp ne i32 %b, 17 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} + +; Second block clobbers the flags, can't convert (easily). +; CHECK: single_flagclobber +; CHECK: cmp +; CHECK: b.eq +; CHECK: cmp +; CHECK: b.gt +define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp eq i32 %a, 5 + br i1 %cmp, label %if.then, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %cmp1 = icmp slt i32 %b, 7 + %mul = shl nsw i32 %b, 1 + %add = add nsw i32 %b, 1 + %cond = select i1 %cmp1, i32 %mul, i32 %add + %cmp2 = icmp slt i32 %cond, 17 + br i1 %cmp2, label %if.then, label %if.end + +if.then: ; preds = %lor.lhs.false, %entry + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: ; preds = %if.then, %lor.lhs.false + ret i32 7 +} + +; Second block clobbers the flags and ends with a tbz terminator. +; CHECK: single_flagclobber_tbz +; CHECK: cmp +; CHECK: b.eq +; CHECK: cmp +; CHECK: tbz +define i32 @single_flagclobber_tbz(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp eq i32 %a, 5 + br i1 %cmp, label %if.then, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %cmp1 = icmp slt i32 %b, 7 + %mul = shl nsw i32 %b, 1 + %add = add nsw i32 %b, 1 + %cond = select i1 %cmp1, i32 %mul, i32 %add + %and = and i32 %cond, 8 + %cmp2 = icmp ne i32 %and, 0 + br i1 %cmp2, label %if.then, label %if.end + +if.then: ; preds = %lor.lhs.false, %entry + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: ; preds = %if.then, %lor.lhs.false + ret i32 7 +} + +; Speculatively execute division by zero. +; The sdiv/udiv instructions do not trap when the divisor is zero, so they are +; safe to speculate. +; CHECK: speculate_division +; CHECK-NOT: cmp +; CHECK: sdiv +; CHECK: cmp +; CHECK-NEXT: ccmp +define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp sgt i32 %a, 0 + br i1 %cmp, label %land.lhs.true, label %if.end + +land.lhs.true: + %div = sdiv i32 %b, %a + %cmp1 = icmp slt i32 %div, 17 + br i1 %cmp1, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} + +; Floating point compare. +; CHECK: single_fcmp +; CHECK: cmp +; CHECK-NOT: b. +; CHECK: fccmp {{.*}}, #8, ge +; CHECK: b.lt +define i32 @single_fcmp(i32 %a, float %b) nounwind ssp { +entry: + %cmp = icmp sgt i32 %a, 0 + br i1 %cmp, label %land.lhs.true, label %if.end + +land.lhs.true: + %conv = sitofp i32 %a to float + %div = fdiv float %b, %conv + %cmp1 = fcmp oge float %div, 1.700000e+01 + br i1 %cmp1, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} + +; Chain multiple compares. +; CHECK: multi_different +; CHECK: cmp +; CHECK: ccmp +; CHECK: ccmp +; CHECK: b. +define void @multi_different(i32 %a, i32 %b, i32 %c) nounwind ssp { +entry: + %cmp = icmp sgt i32 %a, %b + br i1 %cmp, label %land.lhs.true, label %if.end + +land.lhs.true: + %div = sdiv i32 %b, %a + %cmp1 = icmp eq i32 %div, 5 + %cmp4 = icmp sgt i32 %div, %c + %or.cond = and i1 %cmp1, %cmp4 + br i1 %or.cond, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret void +} + +; Convert a cbz in the head block. +; CHECK: cbz_head +; CHECK: cmp w0, #0 +; CHECK: ccmp +define i32 @cbz_head(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp eq i32 %a, 0 + %cmp1 = icmp ne i32 %b, 17 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} + +; Check that the immediate operand is in range. The ccmp instruction encodes a +; smaller range of immediates than subs/adds. +; The ccmp immediates must be in the range 0-31. +; CHECK: immediate_range +; CHECK-NOT: ccmp +define i32 @immediate_range(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp eq i32 %a, 5 + %cmp1 = icmp eq i32 %b, 32 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} + +; Convert a cbz in the second block. +; CHECK: cbz_second +; CHECK: cmp w0, #0 +; CHECK: ccmp w1, #0, #0, ne +; CHECK: b.eq +define i32 @cbz_second(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp eq i32 %a, 0 + %cmp1 = icmp ne i32 %b, 0 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} + +; Convert a cbnz in the second block. +; CHECK: cbnz_second +; CHECK: cmp w0, #0 +; CHECK: ccmp w1, #0, #4, ne +; CHECK: b.ne +define i32 @cbnz_second(i32 %a, i32 %b) nounwind ssp { +entry: + %cmp = icmp eq i32 %a, 0 + %cmp1 = icmp eq i32 %b, 0 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.end + +if.then: + %call = tail call i32 @foo() nounwind + br label %if.end + +if.end: + ret i32 7 +} +declare i32 @foo() + +%str1 = type { %str2 } +%str2 = type { [24 x i8], i8*, i32, %str1*, i32, [4 x i8], %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, i8*, i8, i8*, %str1*, i8* } + +; Test case distilled from 126.gcc. +; The phi in sw.bb.i.i gets multiple operands for the %entry predecessor. +; CHECK: build_modify_expr +define void @build_modify_expr() nounwind ssp { +entry: + switch i32 undef, label %sw.bb.i.i [ + i32 69, label %if.end85 + i32 70, label %if.end85 + i32 71, label %if.end85 + i32 72, label %if.end85 + i32 73, label %if.end85 + i32 105, label %if.end85 + i32 106, label %if.end85 + ] + +if.end85: + ret void + +sw.bb.i.i: + %ref.tr.i.i = phi %str1* [ %0, %sw.bb.i.i ], [ undef, %entry ] + %operands.i.i = getelementptr inbounds %str1* %ref.tr.i.i, i64 0, i32 0, i32 2 + %arrayidx.i.i = bitcast i32* %operands.i.i to %str1** + %0 = load %str1** %arrayidx.i.i, align 8 + %code1.i.i.phi.trans.insert = getelementptr inbounds %str1* %0, i64 0, i32 0, i32 0, i64 16 + br label %sw.bb.i.i +} diff --git a/test/CodeGen/ARM64/coalesce-ext.ll b/test/CodeGen/ARM64/coalesce-ext.ll new file mode 100644 index 0000000000..9e8d08e055 --- /dev/null +++ b/test/CodeGen/ARM64/coalesce-ext.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=arm64 -mtriple=arm64-apple-darwin < %s | FileCheck %s +; Check that the peephole optimizer knows about sext and zext instructions. +; CHECK: test1sext +define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind { + %C = add i64 %A, %B + ; CHECK: add x[[SUM:[0-9]+]], x0, x1 + %D = trunc i64 %C to i32 + %E = shl i64 %C, 32 + %F = ashr i64 %E, 32 + ; CHECK: sxtw x[[EXT:[0-9]+]], x[[SUM]] + store volatile i64 %F, i64 *%P2 + ; CHECK: str x[[EXT]] + store volatile i32 %D, i32* %P + ; Reuse low bits of extended register, don't extend live range of SUM. + ; CHECK: str w[[SUM]] + ret i32 %D +} diff --git a/test/CodeGen/ARM64/code-model-large-abs.ll b/test/CodeGen/ARM64/code-model-large-abs.ll new file mode 100644 index 0000000000..264da2da25 --- /dev/null +++ b/test/CodeGen/ARM64/code-model-large-abs.ll @@ -0,0 +1,72 @@ +; RUN: llc -mtriple=arm64-none-linux-gnu -code-model=large < %s | FileCheck %s + +@var8 = global i8 0 +@var16 = global i16 0 +@var32 = global i32 0 +@var64 = global i64 0 + +define i8* @global_addr() { +; CHECK-LABEL: global_addr: + ret i8* @var8 + ; The movz/movk calculation should end up returned directly in x0. +; CHECK: movz x0, #:abs_g3:var8 +; CHECK: movk x0, #:abs_g2_nc:var8 +; CHECK: movk x0, #:abs_g1_nc:var8 +; CHECK: movk x0, #:abs_g0_nc:var8 +; CHECK-NEXT: ret +} + +define i8 @global_i8() { +; CHECK-LABEL: global_i8: + %val = load i8* @var8 + ret i8 %val +; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var8 +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var8 +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var8 +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var8 +; CHECK: ldrb w0, [x[[ADDR_REG]]] +} + +define i16 @global_i16() { +; CHECK-LABEL: global_i16: + %val = load i16* @var16 + ret i16 %val +; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var16 +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var16 +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var16 +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var16 +; CHECK: ldrh w0, [x[[ADDR_REG]]] +} + +define i32 @global_i32() { +; CHECK-LABEL: global_i32: + %val = load i32* @var32 + ret i32 %val +; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var32 +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var32 +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var32 +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var32 +; CHECK: ldr w0, [x[[ADDR_REG]]] +} + +define i64 @global_i64() { +; CHECK-LABEL: global_i64: + %val = load i64* @var64 + ret i64 %val +; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var64 +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var64 +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var64 +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var64 +; CHECK: ldr x0, [x[[ADDR_REG]]] +} + +define <2 x i64> @constpool() { +; CHECK-LABEL: constpool: + ret <2 x i64> + +; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:[[CPADDR:.LCPI[0-9]+_[0-9]+]] +; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:[[CPADDR]] +; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:[[CPADDR]] +; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:[[CPADDR]] +; CHECK: ldr q0, [x[[ADDR_REG]]] +} diff --git a/test/CodeGen/ARM64/collect-loh-garbage-crash.ll b/test/CodeGen/ARM64/collect-loh-garbage-crash.ll new file mode 100644 index 0000000000..98cb625d2d --- /dev/null +++ b/test/CodeGen/ARM64/collect-loh-garbage-crash.ll @@ -0,0 +1,37 @@ +; RUN: llc -mtriple=arm64-apple-ios -O3 -arm64-collect-loh -arm64-collect-loh-bb-only=true -arm64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s +; Check that the LOH analysis does not crash when the analysed chained +; contains instructions that are filtered out. +; +; Before the fix for , these cases were removed +; from the main container. Now, the deterministic container does not allow +; to remove arbitrary values, so we have to live with garbage values. +; + +%"class.H4ISP::H4ISPDevice" = type { i32 (%"class.H4ISP::H4ISPDevice"*, i32, i8*, i8*)*, i8*, i32*, %"class.H4ISP::H4ISPCameraManager"* } + +%"class.H4ISP::H4ISPCameraManager" = type opaque + +declare i32 @_ZN5H4ISP11H4ISPDevice32ISP_SelectBestMIPIFrequencyIndexEjPj(%"class.H4ISP::H4ISPDevice"*) + +@pH4ISPDevice = hidden global %"class.H4ISP::H4ISPDevice"* null, align 8 + +; CHECK-LABEL: _foo: +; CHECK: ret +; CHECK-NOT: .loh AdrpLdrGotLdr +define void @foo() { +entry: + br label %if.then83 +if.then83: ; preds = %if.end81 + %tmp = load %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8 + %call84 = call i32 @_ZN5H4ISP11H4ISPDevice32ISP_SelectBestMIPIFrequencyIndexEjPj(%"class.H4ISP::H4ISPDevice"* %tmp) #19 + tail call void asm sideeffect "", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27}"() + %tmp2 = load %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8 + tail call void asm sideeffect "", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x28}"() + %pCameraManager.i268 = getelementptr inbounds %"class.H4ISP::H4ISPDevice"* %tmp2, i64 0, i32 3 + %tmp3 = load %"class.H4ISP::H4ISPCameraManager"** %pCameraManager.i268, align 8 + %tobool.i269 = icmp eq %"class.H4ISP::H4ISPCameraManager"* %tmp3, null + br i1 %tobool.i269, label %if.then83, label %end +end: + ret void +} + diff --git a/test/CodeGen/ARM64/collect-loh-str.ll b/test/CodeGen/ARM64/collect-loh-str.ll new file mode 100644 index 0000000000..fc63f8bcc2 --- /dev/null +++ b/test/CodeGen/ARM64/collect-loh-str.ll @@ -0,0 +1,23 @@ +; RUN: llc -mtriple=arm64-apple-ios -O2 -arm64-collect-loh -arm64-collect-loh-bb-only=false < %s -o - | FileCheck %s +; Test case for . +; AdrpAddStr cannot be used when the store uses same +; register as address and value. Indeed, the related +; if applied, may completely remove the definition or +; at least provide a wrong one (with the offset folded +; into the definition). + +%struct.anon = type { i32*, i32** } + +@pptp_wan_head = internal global %struct.anon zeroinitializer, align 8 + +; CHECK-LABEL: _pptp_wan_init +; CHECK: ret +; CHECK-NOT: AdrpAddStr +define i32 @pptp_wan_init() { +entry: + store i32* null, i32** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 0), align 8 + store i32** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 0), i32*** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 1), align 8 + ret i32 0 +} + + diff --git a/test/CodeGen/ARM64/collect-loh.ll b/test/CodeGen/ARM64/collect-loh.ll new file mode 100644 index 0000000000..08ab0620b8 --- /dev/null +++ b/test/CodeGen/ARM64/collect-loh.ll @@ -0,0 +1,47 @@ +; RUN: llc -mtriple=arm64-apple-ios -O2 -arm64-collect-loh -arm64-collect-loh-bb-only=false < %s -o - | FileCheck %s + +@a = internal unnamed_addr global i32 0, align 4 +@b = external global i32 + +; Function Attrs: noinline nounwind ssp +define void @foo(i32 %t) { +entry: + %tmp = load i32* @a, align 4 + %add = add nsw i32 %tmp, %t + store i32 %add, i32* @a, align 4 + ret void +} + +; Function Attrs: nounwind ssp +; Testcase for , AdrpAdrp reuse is valid only when the first adrp +; dominates the second. +; The first adrp comes from the loading of 'a' and the second the loading of 'b'. +; 'a' is loaded in if.then, 'b' in if.end4, if.then does not dominates if.end4. +; CHECK-LABEL: _test +; CHECK: ret +; CHECK-NOT: .loh AdrpAdrp +define i32 @test(i32 %t) { +entry: + %cmp = icmp sgt i32 %t, 5 + br i1 %cmp, label %if.then, label %if.end4 + +if.then: ; preds = %entry + %tmp = load i32* @a, align 4 + %add = add nsw i32 %tmp, %t + %cmp1 = icmp sgt i32 %add, 12 + br i1 %cmp1, label %if.then2, label %if.end4 + +if.then2: ; preds = %if.then + tail call void @foo(i32 %add) + %tmp1 = load i32* @a, align 4 + br label %if.end4 + +if.end4: ; preds = %if.then2, %if.then, %entry + %t.addr.0 = phi i32 [ %tmp1, %if.then2 ], [ %t, %if.then ], [ %t, %entry ] + %tmp2 = load i32* @b, align 4 + %add5 = add nsw i32 %tmp2, %t.addr.0 + tail call void @foo(i32 %add5) + %tmp3 = load i32* @b, align 4 + %add6 = add nsw i32 %tmp3, %t.addr.0 + ret i32 %add6 +} diff --git a/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S b/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S new file mode 100644 index 0000000000..250732d6e8 --- /dev/null +++ b/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S @@ -0,0 +1,17 @@ +; RUN: llvm-mc -triple arm64-apple-darwin -filetype=obj -o /dev/null %s + + .text + .globl _foo + .cfi_startproc +_foo: + stp x29, x30, [sp, #-16]! + .cfi_adjust_cfa_offset 16 + + ldp x29, x30, [sp], #16 + .cfi_adjust_cfa_offset -16 + .cfi_restore x29 + .cfi_restore x30 + + ret + + .cfi_endproc diff --git a/test/CodeGen/ARM64/complex-ret.ll b/test/CodeGen/ARM64/complex-ret.ll new file mode 100644 index 0000000000..93d50a5986 --- /dev/null +++ b/test/CodeGen/ARM64/complex-ret.ll @@ -0,0 +1,7 @@ +; RUN: llc -march=arm64 -o - %s | FileCheck %s + +define { i192, i192, i21, i192 } @foo(i192) { +; CHECK-LABEL: foo: +; CHECK: stp xzr, xzr, [x8] + ret { i192, i192, i21, i192 } {i192 0, i192 1, i21 2, i192 3} +} diff --git a/test/CodeGen/ARM64/convert-v2f64-v2i32.ll b/test/CodeGen/ARM64/convert-v2f64-v2i32.ll new file mode 100644 index 0000000000..1a07c98655 --- /dev/null +++ b/test/CodeGen/ARM64/convert-v2f64-v2i32.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s + +; CHECK: fptosi_1 +; CHECK: fcvtzs.2d +; CHECK: xtn.2s +; CHECK: ret +define void @fptosi_1() nounwind noinline ssp { +entry: + %0 = fptosi <2 x double> undef to <2 x i32> + store <2 x i32> %0, <2 x i32>* undef, align 8 + ret void +} + +; CHECK: fptoui_1 +; CHECK: fcvtzu.2d +; CHECK: xtn.2s +; CHECK: ret +define void @fptoui_1() nounwind noinline ssp { +entry: + %0 = fptoui <2 x double> undef to <2 x i32> + store <2 x i32> %0, <2 x i32>* undef, align 8 + ret void +} + diff --git a/test/CodeGen/ARM64/convert-v2i32-v2f64.ll b/test/CodeGen/ARM64/convert-v2i32-v2f64.ll new file mode 100644 index 0000000000..63129a4b83 --- /dev/null +++ b/test/CodeGen/ARM64/convert-v2i32-v2f64.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s + +define <2 x double> @f1(<2 x i32> %v) nounwind readnone { +; CHECK-LABEL: f1: +; CHECK: sshll.2d v0, v0, #0 +; CHECK-NEXT: scvtf.2d v0, v0 +; CHECK-NEXT: ret + %conv = sitofp <2 x i32> %v to <2 x double> + ret <2 x double> %conv +} +define <2 x double> @f2(<2 x i32> %v) nounwind readnone { +; CHECK-LABEL: f2: +; CHECK: ushll.2d v0, v0, #0 +; CHECK-NEXT: ucvtf.2d v0, v0 +; CHECK-NEXT: ret + %conv = uitofp <2 x i32> %v to <2 x double> + ret <2 x double> %conv +} + +; CHECK: autogen_SD19655 +; CHECK: scvtf +; CHECK: ret +define void @autogen_SD19655() { + %T = load <2 x i64>* undef + %F = sitofp <2 x i64> undef to <2 x float> + store <2 x float> %F, <2 x float>* undef + ret void +} + diff --git a/test/CodeGen/ARM64/copy-tuple.ll b/test/CodeGen/ARM64/copy-tuple.ll new file mode 100644 index 0000000000..6325c3f855 --- /dev/null +++ b/test/CodeGen/ARM64/copy-tuple.ll @@ -0,0 +1,146 @@ +; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s + +; The main purpose of this test is to find out whether copyPhysReg can deal with +; the memmove-like situation arising in tuples, where an early copy can clobber +; the value needed by a later one if the tuples overlap. + +; We use dummy inline asm to force LLVM to generate a COPY between the registers +; we want by clobbering all the others. + +define void @test_D1D2_from_D0D1(i8* %addr) #0 { +; CHECK-LABEL: test_D1D2_from_D0D1: +; CHECK: orr.8b v2, v1 +; CHECK: orr.8b v1, v0 +entry: + %addr_v8i8 = bitcast i8* %addr to <8 x i8>* + %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 + %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 + tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + + tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + ret void +} + +define void @test_D0D1_from_D1D2(i8* %addr) #0 { +; CHECK-LABEL: test_D0D1_from_D1D2: +; CHECK: orr.8b v0, v1 +; CHECK: orr.8b v1, v2 +entry: + %addr_v8i8 = bitcast i8* %addr to <8 x i8>* + %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 + %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 + tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + + tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + ret void +} + +define void @test_D0D1_from_D31D0(i8* %addr) #0 { +; CHECK-LABEL: test_D0D1_from_D31D0: +; CHECK: orr.8b v1, v0 +; CHECK: orr.8b v0, v31 +entry: + %addr_v8i8 = bitcast i8* %addr to <8 x i8>* + %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 + %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 + tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + + tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + ret void +} + +define void @test_D31D0_from_D0D1(i8* %addr) #0 { +; CHECK-LABEL: test_D31D0_from_D0D1: +; CHECK: orr.8b v31, v0 +; CHECK: orr.8b v0, v1 +entry: + %addr_v8i8 = bitcast i8* %addr to <8 x i8>* + %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 + %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 + tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + + tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"() + tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) + ret void +} + +define void @test_D2D3D4_from_D0D1D2(i8* %addr) #0 { +; CHECK-LABEL: test_D2D3D4_from_D0D1D2: +; CHECK: orr.8b v4, v2 +; CHECK: orr.8b v3, v1 +; CHECK: orr.8b v2, v0 +entry: + %addr_v8i8 = bitcast i8* %addr to <8 x i8>* + %vec = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) + %vec0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 0 + %vec1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 1 + %vec2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 2 + + tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr) + + tail call void asm sideeffect "", "~{v0},~{v1},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr) + ret void +} + +define void @test_Q0Q1Q2_from_Q1Q2Q3(i8* %addr) #0 { +; CHECK-LABEL: test_Q0Q1Q2_from_Q1Q2Q3: +; CHECK: orr.16b v0, v1 +; CHECK: orr.16b v1, v2 +; CHECK: orr.16b v2, v3 +entry: + %addr_v16i8 = bitcast i8* %addr to <16 x i8>* + %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0v16i8(<16 x i8>* %addr_v16i8) + %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0 + %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1 + %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2 + tail call void asm sideeffect "", "~{v0},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr) + + tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr) + ret void +} + +define void @test_Q1Q2Q3Q4_from_Q30Q31Q0Q1(i8* %addr) #0 { +; CHECK-LABEL: test_Q1Q2Q3Q4_from_Q30Q31Q0Q1: +; CHECK: orr.16b v4, v1 +; CHECK: orr.16b v3, v0 +; CHECK: orr.16b v2, v31 +; CHECK: orr.16b v1, v30 + %addr_v16i8 = bitcast i8* %addr to <16 x i8>* + %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0v16i8(<16 x i8>* %addr_v16i8) + %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0 + %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1 + %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2 + %vec3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 3 + + tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}"() + tail call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr) + + tail call void asm sideeffect "", "~{v0},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + tail call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr) + ret void +} + +declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>*) +declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0v8i8(<8 x i8>*) +declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0v16i8(<16 x i8>*) +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0v16i8(<16 x i8>*) + +declare void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) +declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) +declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) +declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) diff --git a/test/CodeGen/ARM64/crc32.ll b/test/CodeGen/ARM64/crc32.ll new file mode 100644 index 0000000000..609eb44122 --- /dev/null +++ b/test/CodeGen/ARM64/crc32.ll @@ -0,0 +1,71 @@ +; RUN: llc -march=arm64 -o - %s | FileCheck %s + +define i32 @test_crc32b(i32 %cur, i8 %next) { +; CHECK-LABEL: test_crc32b: +; CHECK: crc32b w0, w0, w1 + %bits = zext i8 %next to i32 + %val = call i32 @llvm.arm64.crc32b(i32 %cur, i32 %bits) + ret i32 %val +} + +define i32 @test_crc32h(i32 %cur, i16 %next) { +; CHECK-LABEL: test_crc32h: +; CHECK: crc32h w0, w0, w1 + %bits = zext i16 %next to i32 + %val = call i32 @llvm.arm64.crc32h(i32 %cur, i32 %bits) + ret i32 %val +} + +define i32 @test_crc32w(i32 %cur, i32 %next) { +; CHECK-LABEL: test_crc32w: +; CHECK: crc32w w0, w0, w1 + %val = call i32 @llvm.arm64.crc32w(i32 %cur, i32 %next) + ret i32 %val +} + +define i32 @test_crc32x(i32 %cur, i64 %next) { +; CHECK-LABEL: test_crc32x: +; CHECK: crc32x w0, w0, x1 + %val = call i32 @llvm.arm64.crc32x(i32 %cur, i64 %next) + ret i32 %val +} + +define i32 @test_crc32cb(i32 %cur, i8 %next) { +; CHECK-LABEL: test_crc32cb: +; CHECK: crc32cb w0, w0, w1 + %bits = zext i8 %next to i32 + %val = call i32 @llvm.arm64.crc32cb(i32 %cur, i32 %bits) + ret i32 %val +} + +define i32 @test_crc32ch(i32 %cur, i16 %next) { +; CHECK-LABEL: test_crc32ch: +; CHECK: crc32ch w0, w0, w1 + %bits = zext i16 %next to i32 + %val = call i32 @llvm.arm64.crc32ch(i32 %cur, i32 %bits) + ret i32 %val +} + +define i32 @test_crc32cw(i32 %cur, i32 %next) { +; CHECK-LABEL: test_crc32cw: +; CHECK: crc32cw w0, w0, w1 + %val = call i32 @llvm.arm64.crc32cw(i32 %cur, i32 %next) + ret i32 %val +} + +define i32 @test_crc32cx(i32 %cur, i64 %next) { +; CHECK-LABEL: test_crc32cx: +; CHECK: crc32cx w0, w0, x1 + %val = call i32 @llvm.arm64.crc32cx(i32 %cur, i64 %next) + ret i32 %val +} + +declare i32 @llvm.arm64.crc32b(i32, i32) +declare i32 @llvm.arm64.crc32h(i32, i32) +declare i32 @llvm.arm64.crc32w(i32, i32) +declare i32 @llvm.arm64.crc32x(i32, i64) + +declare i32 @llvm.arm64.crc32cb(i32, i32) +declare i32 @llvm.arm64.crc32ch(i32, i32) +declare i32 @llvm.arm64.crc32cw(i32, i32) +declare i32 @llvm.arm64.crc32cx(i32, i64) diff --git a/test/CodeGen/ARM64/crypto.ll b/test/CodeGen/ARM64/crypto.ll new file mode 100644 index 0000000000..3804310287 --- /dev/null +++ b/test/CodeGen/ARM64/crypto.ll @@ -0,0 +1,135 @@ +; RUN: llc -march=arm64 -arm64-neon-syntax=apple -o - %s | FileCheck %s + +declare <16 x i8> @llvm.arm64.crypto.aese(<16 x i8> %data, <16 x i8> %key) +declare <16 x i8> @llvm.arm64.crypto.aesd(<16 x i8> %data, <16 x i8> %key) +declare <16 x i8> @llvm.arm64.crypto.aesmc(<16 x i8> %data) +declare <16 x i8> @llvm.arm64.crypto.aesimc(<16 x i8> %data) + +define <16 x i8> @test_aese(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: test_aese: +; CHECK: aese.16b v0, v1 + %res = call <16 x i8> @llvm.arm64.crypto.aese(<16 x i8> %data, <16 x i8> %key) + ret <16 x i8> %res +} + +define <16 x i8> @test_aesd(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: test_aesd: +; CHECK: aesd.16b v0, v1 + %res = call <16 x i8> @llvm.arm64.crypto.aesd(<16 x i8> %data, <16 x i8> %key) + ret <16 x i8> %res +} + +define <16 x i8> @test_aesmc(<16 x i8> %data) { +; CHECK-LABEL: test_aesmc: +; CHECK: aesmc.16b v0, v0 + %res = call <16 x i8> @llvm.arm64.crypto.aesmc(<16 x i8> %data) + ret <16 x i8> %res +} + +define <16 x i8> @test_aesimc(<16 x i8> %data) { +; CHECK-LABEL: test_aesimc: +; CHECK: aesimc.16b v0, v0 + %res = call <16 x i8> @llvm.arm64.crypto.aesimc(<16 x i8> %data) + ret <16 x i8> %res +} + +declare <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) +declare <4 x i32> @llvm.arm64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) +declare <4 x i32> @llvm.arm64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) +declare i32 @llvm.arm64.crypto.sha1h(i32 %hash_e) +declare <4 x i32> @llvm.arm64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11) +declare <4 x i32> @llvm.arm64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) + +define <4 x i32> @test_sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { +; CHECK-LABEL: test_sha1c: +; CHECK: fmov [[HASH_E:s[0-9]+]], w0 +; CHECK: sha1c.4s q0, [[HASH_E]], v1 + %res = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) + ret <4 x i32> %res +} + +; Incomplete removal of unnecessary FMOV instructions in intrinsic SHA1 +define <4 x i32> @test_sha1c_in_a_row(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { +; CHECK-LABEL: test_sha1c_in_a_row: +; CHECK: fmov [[HASH_E:s[0-9]+]], w0 +; CHECK: sha1c.4s q[[SHA1RES:[0-9]+]], [[HASH_E]], v1 +; CHECK-NOT: fmov +; CHECK: sha1c.4s q0, s[[SHA1RES]], v1 + %res = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) + %extract = extractelement <4 x i32> %res, i32 0 + %res2 = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %extract, <4 x i32> %wk) + ret <4 x i32> %res2 +} + +define <4 x i32> @test_sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { +; CHECK-LABEL: test_sha1p: +; CHECK: fmov [[HASH_E:s[0-9]+]], w0 +; CHECK: sha1p.4s q0, [[HASH_E]], v1 + %res = call <4 x i32> @llvm.arm64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) + ret <4 x i32> %res +} + +define <4 x i32> @test_sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { +; CHECK-LABEL: test_sha1m: +; CHECK: fmov [[HASH_E:s[0-9]+]], w0 +; CHECK: sha1m.4s q0, [[HASH_E]], v1 + %res = call <4 x i32> @llvm.arm64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) + ret <4 x i32> %res +} + +define i32 @test_sha1h(i32 %hash_e) { +; CHECK-LABEL: test_sha1h: +; CHECK: fmov [[HASH_E:s[0-9]+]], w0 +; CHECK: sha1h [[RES:s[0-9]+]], [[HASH_E]] +; CHECK: fmov w0, [[RES]] + %res = call i32 @llvm.arm64.crypto.sha1h(i32 %hash_e) + ret i32 %res +} + +define <4 x i32> @test_sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11) { +; CHECK-LABEL: test_sha1su0: +; CHECK: sha1su0.4s v0, v1, v2 + %res = call <4 x i32> @llvm.arm64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11) + ret <4 x i32> %res +} + +define <4 x i32> @test_sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) { +; CHECK-LABEL: test_sha1su1: +; CHECK: sha1su1.4s v0, v1 + %res = call <4 x i32> @llvm.arm64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.arm64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) +declare <4 x i32> @llvm.arm64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) +declare <4 x i32> @llvm.arm64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) +declare <4 x i32> @llvm.arm64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) + +define <4 x i32> @test_sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) { +; CHECK-LABEL: test_sha256h: +; CHECK: sha256h.4s q0, q1, v2 + %res = call <4 x i32> @llvm.arm64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) + ret <4 x i32> %res +} + +define <4 x i32> @test_sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) { +; CHECK-LABEL: test_sha256h2: +; CHECK: sha256h2.4s q0, q1, v2 + + %res = call <4 x i32> @llvm.arm64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) + ret <4 x i32> %res +} + +define <4 x i32> @test_sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) { +; CHECK-LABEL: test_sha256su0: +; CHECK: sha256su0.4s v0, v1 + %res = call <4 x i32> @llvm.arm64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) + ret <4 x i32> %res +} + +define <4 x i32> @test_sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) { +; CHECK-LABEL: test_sha256su1: +; CHECK: sha256su1.4s v0, v1, v2 + %res = call <4 x i32> @llvm.arm64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) + ret <4 x i32> %res +} diff --git a/test/CodeGen/ARM64/cse.ll b/test/CodeGen/ARM64/cse.ll new file mode 100644 index 0000000000..d98bfd6053 --- /dev/null +++ b/test/CodeGen/ARM64/cse.ll @@ -0,0 +1,59 @@ +; RUN: llc -O3 < %s | FileCheck %s +target triple = "arm64-apple-ios" + +; rdar://12462006 +; CSE between "icmp reg reg" and "sub reg reg". +; Both can be in the same basic block or in different basic blocks. +define i8* @t1(i8* %base, i32* nocapture %offset, i32 %size) nounwind { +entry: +; CHECK-LABEL: t1: +; CHECK: subs +; CHECK-NOT: cmp +; CHECK-NOT: sub +; CHECK: b.ge +; CHECK: sub +; CHECK: sub +; CHECK_NOT: sub +; CHECK: ret + %0 = load i32* %offset, align 4 + %cmp = icmp slt i32 %0, %size + %s = sub nsw i32 %0, %size + br i1 %cmp, label %return, label %if.end + +if.end: + %sub = sub nsw i32 %0, %size + %s2 = sub nsw i32 %s, %size + %s3 = sub nsw i32 %sub, %s2 + store i32 %s3, i32* %offset, align 4 + %add.ptr = getelementptr inbounds i8* %base, i32 %sub + br label %return + +return: + %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ] + ret i8* %retval.0 +} + +; CSE between "icmp reg imm" and "sub reg imm". +define i8* @t2(i8* %base, i32* nocapture %offset) nounwind { +entry: +; CHECK-LABEL: t2: +; CHECK: subs +; CHECK-NOT: cmp +; CHECK-NOT: sub +; CHECK: b.lt +; CHECK-NOT: sub +; CHECK: ret + %0 = load i32* %offset, align 4 + %cmp = icmp slt i32 %0, 1 + br i1 %cmp, label %return, label %if.end + +if.end: + %sub = sub nsw i32 %0, 1 + store i32 %sub, i32* %offset, align 4 + %add.ptr = getelementptr inbounds i8* %base, i32 %sub + br label %return + +return: + %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ] + ret i8* %retval.0 +} diff --git a/test/CodeGen/ARM64/csel.ll b/test/CodeGen/ARM64/csel.ll new file mode 100644 index 0000000000..cbf1769897 --- /dev/null +++ b/test/CodeGen/ARM64/csel.ll @@ -0,0 +1,222 @@ +; RUN: llc -O3 < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64" +target triple = "arm64-unknown-unknown" + +; CHECK: foo1 +; CHECK: csinc w{{[0-9]+}}, w[[REG:[0-9]+]], +; CHECK: w[[REG]], eq +define i32 @foo1(i32 %b, i32 %c) nounwind readnone ssp { +entry: + %not.tobool = icmp ne i32 %c, 0 + %add = zext i1 %not.tobool to i32 + %b.add = add i32 %c, %b + %add1 = add i32 %b.add, %add + ret i32 %add1 +} + +; CHECK: foo2 +; CHECK: csneg w{{[0-9]+}}, w[[REG:[0-9]+]], +; CHECK: w[[REG]], eq +define i32 @foo2(i32 %b, i32 %c) nounwind readnone ssp { +entry: + %mul = sub i32 0, %b + %tobool = icmp eq i32 %c, 0 + %b.mul = select i1 %tobool, i32 %b, i32 %mul + %add = add nsw i32 %b.mul, %c + ret i32 %add +} + +; CHECK: foo3 +; CHECK: csinv w{{[0-9]+}}, w[[REG:[0-9]+]], +; CHECK: w[[REG]], eq +define i32 @foo3(i32 %b, i32 %c) nounwind readnone ssp { +entry: + %not.tobool = icmp ne i32 %c, 0 + %xor = sext i1 %not.tobool to i32 + %b.xor = xor i32 %xor, %b + %add = add nsw i32 %b.xor, %c + ret i32 %add +} + +; rdar://11632325 +define i32@foo4(i32 %a) nounwind ssp { +; CHECK: foo4 +; CHECK: csneg +; CHECK-NEXT: ret + %cmp = icmp sgt i32 %a, -1 + %neg = sub nsw i32 0, %a + %cond = select i1 %cmp, i32 %a, i32 %neg + ret i32 %cond +} + +define i32@foo5(i32 %a, i32 %b) nounwind ssp { +entry: +; CHECK: foo5 +; CHECK: subs +; CHECK-NEXT: csneg +; CHECK-NEXT: ret + %sub = sub nsw i32 %a, %b + %cmp = icmp sgt i32 %sub, -1 + %sub3 = sub nsw i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub3 + ret i32 %cond +} + +; make sure we can handle branch instruction in optimizeCompare. +define i32@foo6(i32 %a, i32 %b) nounwind ssp { +; CHECK: foo6 +; CHECK: b + %sub = sub nsw i32 %a, %b + %cmp = icmp sgt i32 %sub, 0 + br i1 %cmp, label %l.if, label %l.else + +l.if: + ret i32 1 + +l.else: + ret i32 %sub +} + +; If CPSR is used multiple times and V flag is used, we don't remove cmp. +define i32 @foo7(i32 %a, i32 %b) nounwind { +entry: +; CHECK-LABEL: foo7: +; CHECK: sub +; CHECK-next: adds +; CHECK-next: csneg +; CHECK-next: b + %sub = sub nsw i32 %a, %b + %cmp = icmp sgt i32 %sub, -1 + %sub3 = sub nsw i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub3 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %cmp2 = icmp slt i32 %sub, -1 + %sel = select i1 %cmp2, i32 %cond, i32 %a + ret i32 %sel + +if.else: + ret i32 %cond +} + +define i32 @foo8(i32 %v, i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK-LABEL: foo8: +; CHECK: cmp w0, #0 +; CHECK: csinv w0, w1, w2, ne + %tobool = icmp eq i32 %v, 0 + %neg = xor i32 -1, %b + %cond = select i1 %tobool, i32 %neg, i32 %a + ret i32 %cond +} + +define i32 @foo9(i32 %v) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo9: +; CHECK: cmp w0, #0 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x4 +; CHECK: csinv w0, w[[REG]], w[[REG]], ne + %tobool = icmp ne i32 %v, 0 + %cond = select i1 %tobool, i32 4, i32 -5 + ret i32 %cond +} + +define i64 @foo10(i64 %v) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo10: +; CHECK: cmp x0, #0 +; CHECK: orr x[[REG:[0-9]+]], xzr, #0x4 +; CHECK: csinv x0, x[[REG]], x[[REG]], ne + %tobool = icmp ne i64 %v, 0 + %cond = select i1 %tobool, i64 4, i64 -5 + ret i64 %cond +} + +define i32 @foo11(i32 %v) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo11: +; CHECK: cmp w0, #0 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x4 +; CHECK: csneg w0, w[[REG]], w[[REG]], ne + %tobool = icmp ne i32 %v, 0 + %cond = select i1 %tobool, i32 4, i32 -4 + ret i32 %cond +} + +define i64 @foo12(i64 %v) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo12: +; CHECK: cmp x0, #0 +; CHECK: orr x[[REG:[0-9]+]], xzr, #0x4 +; CHECK: csneg x0, x[[REG]], x[[REG]], ne + %tobool = icmp ne i64 %v, 0 + %cond = select i1 %tobool, i64 4, i64 -4 + ret i64 %cond +} + +define i32 @foo13(i32 %v, i32 %a, i32 %b) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo13: +; CHECK: cmp w0, #0 +; CHECK: csneg w0, w1, w2, ne + %tobool = icmp eq i32 %v, 0 + %sub = sub i32 0, %b + %cond = select i1 %tobool, i32 %sub, i32 %a + ret i32 %cond +} + +define i64 @foo14(i64 %v, i64 %a, i64 %b) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo14: +; CHECK: cmp x0, #0 +; CHECK: csneg x0, x1, x2, ne + %tobool = icmp eq i64 %v, 0 + %sub = sub i64 0, %b + %cond = select i1 %tobool, i64 %sub, i64 %a + ret i64 %cond +} + +define i32 @foo15(i32 %a, i32 %b) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo15: +; CHECK: cmp w0, w1 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1 +; CHECK: csinc w0, w[[REG]], w[[REG]], le + %cmp = icmp sgt i32 %a, %b + %. = select i1 %cmp, i32 2, i32 1 + ret i32 %. +} + +define i32 @foo16(i32 %a, i32 %b) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo16: +; CHECK: cmp w0, w1 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1 +; CHECK: csinc w0, w[[REG]], w[[REG]], gt + %cmp = icmp sgt i32 %a, %b + %. = select i1 %cmp, i32 1, i32 2 + ret i32 %. +} + +define i64 @foo17(i64 %a, i64 %b) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo17: +; CHECK: cmp x0, x1 +; CHECK: orr x[[REG:[0-9]+]], xzr, #0x1 +; CHECK: csinc x0, x[[REG]], x[[REG]], le + %cmp = icmp sgt i64 %a, %b + %. = select i1 %cmp, i64 2, i64 1 + ret i64 %. +} + +define i64 @foo18(i64 %a, i64 %b) nounwind readnone optsize ssp { +entry: +; CHECK-LABEL: foo18: +; CHECK: cmp x0, x1 +; CHECK: orr x[[REG:[0-9]+]], xzr, #0x1 +; CHECK: csinc x0, x[[REG]], x[[REG]], gt + %cmp = icmp sgt i64 %a, %b + %. = select i1 %cmp, i64 1, i64 2 + ret i64 %. +} diff --git a/test/CodeGen/ARM64/cvt.ll b/test/CodeGen/ARM64/cvt.ll new file mode 100644 index 0000000000..b55a42fdf8 --- /dev/null +++ b/test/CodeGen/ARM64/cvt.ll @@ -0,0 +1,401 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s + +; +; Floating-point scalar convert to signed integer (to nearest with ties to away) +; +define i32 @fcvtas_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtas_1w1s: +;CHECK: fcvtas w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtas.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtas_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtas_1x1s: +;CHECK: fcvtas x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtas.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtas_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtas_1w1d: +;CHECK: fcvtas w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtas.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtas_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtas_1x1d: +;CHECK: fcvtas x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtas.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtas.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtas.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtas.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtas.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to unsigned integer +; +define i32 @fcvtau_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtau_1w1s: +;CHECK: fcvtau w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtau.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtau_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtau_1x1s: +;CHECK: fcvtau x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtau.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtau_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtau_1w1d: +;CHECK: fcvtau w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtau.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtau_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtau_1x1d: +;CHECK: fcvtau x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtau.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtau.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtau.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtau.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtau.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to signed integer (toward -Inf) +; +define i32 @fcvtms_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtms_1w1s: +;CHECK: fcvtms w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtms.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtms_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtms_1x1s: +;CHECK: fcvtms x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtms.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtms_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtms_1w1d: +;CHECK: fcvtms w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtms.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtms_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtms_1x1d: +;CHECK: fcvtms x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtms.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtms.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtms.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtms.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtms.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to unsigned integer (toward -Inf) +; +define i32 @fcvtmu_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtmu_1w1s: +;CHECK: fcvtmu w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtmu.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtmu_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtmu_1x1s: +;CHECK: fcvtmu x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtmu.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtmu_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtmu_1w1d: +;CHECK: fcvtmu w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtmu.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtmu_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtmu_1x1d: +;CHECK: fcvtmu x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtmu.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtmu.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtmu.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtmu.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtmu.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to signed integer (to nearest with ties to even) +; +define i32 @fcvtns_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtns_1w1s: +;CHECK: fcvtns w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtns.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtns_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtns_1x1s: +;CHECK: fcvtns x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtns.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtns_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtns_1w1d: +;CHECK: fcvtns w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtns.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtns_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtns_1x1d: +;CHECK: fcvtns x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtns.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtns.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtns.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtns.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtns.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to unsigned integer (to nearest with ties to even) +; +define i32 @fcvtnu_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtnu_1w1s: +;CHECK: fcvtnu w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtnu.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtnu_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtnu_1x1s: +;CHECK: fcvtnu x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtnu.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtnu_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtnu_1w1d: +;CHECK: fcvtnu w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtnu.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtnu_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtnu_1x1d: +;CHECK: fcvtnu x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtnu.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtnu.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtnu.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtnu.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtnu.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to signed integer (toward +Inf) +; +define i32 @fcvtps_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtps_1w1s: +;CHECK: fcvtps w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtps.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtps_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtps_1x1s: +;CHECK: fcvtps x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtps.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtps_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtps_1w1d: +;CHECK: fcvtps w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtps.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtps_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtps_1x1d: +;CHECK: fcvtps x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtps.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtps.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtps.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtps.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtps.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to unsigned integer (toward +Inf) +; +define i32 @fcvtpu_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtpu_1w1s: +;CHECK: fcvtpu w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtpu.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtpu_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtpu_1x1s: +;CHECK: fcvtpu x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtpu.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtpu_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtpu_1w1d: +;CHECK: fcvtpu w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtpu.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtpu_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtpu_1x1d: +;CHECK: fcvtpu x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtpu.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtpu.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtpu.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtpu.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtpu.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to signed integer (toward zero) +; +define i32 @fcvtzs_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtzs_1w1s: +;CHECK: fcvtzs w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtzs.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtzs_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtzs_1x1s: +;CHECK: fcvtzs x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtzs.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtzs_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtzs_1w1d: +;CHECK: fcvtzs w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtzs.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtzs_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtzs_1x1d: +;CHECK: fcvtzs x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtzs.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtzs.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtzs.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtzs.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtzs.i64.f64(double) nounwind readnone + +; +; Floating-point scalar convert to unsigned integer (toward zero) +; +define i32 @fcvtzu_1w1s(float %A) nounwind { +;CHECK-LABEL: fcvtzu_1w1s: +;CHECK: fcvtzu w0, s0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtzu.i32.f32(float %A) + ret i32 %tmp3 +} + +define i64 @fcvtzu_1x1s(float %A) nounwind { +;CHECK-LABEL: fcvtzu_1x1s: +;CHECK: fcvtzu x0, s0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtzu.i64.f32(float %A) + ret i64 %tmp3 +} + +define i32 @fcvtzu_1w1d(double %A) nounwind { +;CHECK-LABEL: fcvtzu_1w1d: +;CHECK: fcvtzu w0, d0 +;CHECK-NEXT: ret + %tmp3 = call i32 @llvm.arm64.neon.fcvtzu.i32.f64(double %A) + ret i32 %tmp3 +} + +define i64 @fcvtzu_1x1d(double %A) nounwind { +;CHECK-LABEL: fcvtzu_1x1d: +;CHECK: fcvtzu x0, d0 +;CHECK-NEXT: ret + %tmp3 = call i64 @llvm.arm64.neon.fcvtzu.i64.f64(double %A) + ret i64 %tmp3 +} + +declare i32 @llvm.arm64.neon.fcvtzu.i32.f32(float) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtzu.i64.f32(float) nounwind readnone +declare i32 @llvm.arm64.neon.fcvtzu.i32.f64(double) nounwind readnone +declare i64 @llvm.arm64.neon.fcvtzu.i64.f64(double) nounwind readnone diff --git a/test/CodeGen/ARM64/dagcombiner-convergence.ll b/test/CodeGen/ARM64/dagcombiner-convergence.ll new file mode 100644 index 0000000000..a45e31320d --- /dev/null +++ b/test/CodeGen/ARM64/dagcombiner-convergence.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -o /dev/null +; rdar://10795250 +; DAGCombiner should converge. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64" +target triple = "arm64-apple-macosx10.8.0" + +define i64 @foo(i128 %Params.coerce, i128 %SelLocs.coerce) { +entry: + %tmp = lshr i128 %Params.coerce, 61 + %.tr38.i = trunc i128 %tmp to i64 + %mul.i = and i64 %.tr38.i, 4294967288 + %tmp1 = lshr i128 %SelLocs.coerce, 62 + %.tr.i = trunc i128 %tmp1 to i64 + %mul7.i = and i64 %.tr.i, 4294967292 + %add.i = add i64 %mul7.i, %mul.i + %conv.i.i = and i64 %add.i, 4294967292 + ret i64 %conv.i.i +} diff --git a/test/CodeGen/ARM64/dagcombiner-load-slicing.ll b/test/CodeGen/ARM64/dagcombiner-load-slicing.ll new file mode 100644 index 0000000000..0679014e59 --- /dev/null +++ b/test/CodeGen/ARM64/dagcombiner-load-slicing.ll @@ -0,0 +1,102 @@ +; RUN: llc -mtriple arm64-apple-ios -O3 -o - < %s | FileCheck %s +; + +%class.Complex = type { float, float } +%class.Complex_int = type { i32, i32 } +%class.Complex_long = type { i64, i64 } + +; CHECK-LABEL: @test +; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #3 +; CHECK: ldp [[CPLX1_I:s[0-9]+]], [[CPLX1_R:s[0-9]+]], {{\[}}[[BASE]]] +; CHECK: ldp [[CPLX2_I:s[0-9]+]], [[CPLX2_R:s[0-9]+]], {{\[}}[[BASE]], #64] +; CHECK: fadd {{s[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]] +; CHECK: fadd {{s[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]] +; CHECK: ret +define void @test(%class.Complex* nocapture %out, i64 %out_start) { +entry: + %arrayidx = getelementptr inbounds %class.Complex* %out, i64 %out_start + %0 = bitcast %class.Complex* %arrayidx to i64* + %1 = load i64* %0, align 4 + %t0.sroa.0.0.extract.trunc = trunc i64 %1 to i32 + %2 = bitcast i32 %t0.sroa.0.0.extract.trunc to float + %t0.sroa.2.0.extract.shift = lshr i64 %1, 32 + %t0.sroa.2.0.extract.trunc = trunc i64 %t0.sroa.2.0.extract.shift to i32 + %3 = bitcast i32 %t0.sroa.2.0.extract.trunc to float + %add = add i64 %out_start, 8 + %arrayidx2 = getelementptr inbounds %class.Complex* %out, i64 %add + %i.i = getelementptr inbounds %class.Complex* %arrayidx2, i64 0, i32 0 + %4 = load float* %i.i, align 4 + %add.i = fadd float %4, %2 + %retval.sroa.0.0.vec.insert.i = insertelement <2 x float> undef, float %add.i, i32 0 + %r.i = getelementptr inbounds %class.Complex* %arrayidx2, i64 0, i32 1 + %5 = load float* %r.i, align 4 + %add5.i = fadd float %5, %3 + %retval.sroa.0.4.vec.insert.i = insertelement <2 x float> %retval.sroa.0.0.vec.insert.i, float %add5.i, i32 1 + %ref.tmp.sroa.0.0.cast = bitcast %class.Complex* %arrayidx to <2 x float>* + store <2 x float> %retval.sroa.0.4.vec.insert.i, <2 x float>* %ref.tmp.sroa.0.0.cast, align 4 + ret void +} + +; CHECK-LABEL: @test_int +; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #3 +; CHECK: ldp [[CPLX1_I:w[0-9]+]], [[CPLX1_R:w[0-9]+]], {{\[}}[[BASE]]] +; CHECK: ldp [[CPLX2_I:w[0-9]+]], [[CPLX2_R:w[0-9]+]], {{\[}}[[BASE]], #64] +; CHECK: add {{w[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]] +; CHECK: add {{w[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]] +; CHECK: ret +define void @test_int(%class.Complex_int* nocapture %out, i64 %out_start) { +entry: + %arrayidx = getelementptr inbounds %class.Complex_int* %out, i64 %out_start + %0 = bitcast %class.Complex_int* %arrayidx to i64* + %1 = load i64* %0, align 4 + %t0.sroa.0.0.extract.trunc = trunc i64 %1 to i32 + %2 = bitcast i32 %t0.sroa.0.0.extract.trunc to i32 + %t0.sroa.2.0.extract.shift = lshr i64 %1, 32 + %t0.sroa.2.0.extract.trunc = trunc i64 %t0.sroa.2.0.extract.shift to i32 + %3 = bitcast i32 %t0.sroa.2.0.extract.trunc to i32 + %add = add i64 %out_start, 8 + %arrayidx2 = getelementptr inbounds %class.Complex_int* %out, i64 %add + %i.i = getelementptr inbounds %class.Complex_int* %arrayidx2, i64 0, i32 0 + %4 = load i32* %i.i, align 4 + %add.i = add i32 %4, %2 + %retval.sroa.0.0.vec.insert.i = insertelement <2 x i32> undef, i32 %add.i, i32 0 + %r.i = getelementptr inbounds %class.Complex_int* %arrayidx2, i64 0, i32 1 + %5 = load i32* %r.i, align 4 + %add5.i = add i32 %5, %3 + %retval.sroa.0.4.vec.insert.i = insertelement <2 x i32> %retval.sroa.0.0.vec.insert.i, i32 %add5.i, i32 1 + %ref.tmp.sroa.0.0.cast = bitcast %class.Complex_int* %arrayidx to <2 x i32>* + store <2 x i32> %retval.sroa.0.4.vec.insert.i, <2 x i32>* %ref.tmp.sroa.0.0.cast, align 4 + ret void +} + +; CHECK-LABEL: @test_long +; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #4 +; CHECK: ldp [[CPLX1_I:x[0-9]+]], [[CPLX1_R:x[0-9]+]], {{\[}}[[BASE]]] +; CHECK: ldp [[CPLX2_I:x[0-9]+]], [[CPLX2_R:x[0-9]+]], {{\[}}[[BASE]], #128] +; CHECK: add {{x[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]] +; CHECK: add {{x[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]] +; CHECK: ret +define void @test_long(%class.Complex_long* nocapture %out, i64 %out_start) { +entry: + %arrayidx = getelementptr inbounds %class.Complex_long* %out, i64 %out_start + %0 = bitcast %class.Complex_long* %arrayidx to i128* + %1 = load i128* %0, align 4 + %t0.sroa.0.0.extract.trunc = trunc i128 %1 to i64 + %2 = bitcast i64 %t0.sroa.0.0.extract.trunc to i64 + %t0.sroa.2.0.extract.shift = lshr i128 %1, 64 + %t0.sroa.2.0.extract.trunc = trunc i128 %t0.sroa.2.0.extract.shift to i64 + %3 = bitcast i64 %t0.sroa.2.0.extract.trunc to i64 + %add = add i64 %out_start, 8 + %arrayidx2 = getelementptr inbounds %class.Complex_long* %out, i64 %add + %i.i = getelementptr inbounds %class.Complex_long* %arrayidx2, i32 0, i32 0 + %4 = load i64* %i.i, align 4 + %add.i = add i64 %4, %2 + %retval.sroa.0.0.vec.insert.i = insertelement <2 x i64> undef, i64 %add.i, i32 0 + %r.i = getelementptr inbounds %class.Complex_long* %arrayidx2, i32 0, i32 1 + %5 = load i64* %r.i, align 4 + %add5.i = add i64 %5, %3 + %retval.sroa.0.4.vec.insert.i = insertelement <2 x i64> %retval.sroa.0.0.vec.insert.i, i64 %add5.i, i32 1 + %ref.tmp.sroa.0.0.cast = bitcast %class.Complex_long* %arrayidx to <2 x i64>* + store <2 x i64> %retval.sroa.0.4.vec.insert.i, <2 x i64>* %ref.tmp.sroa.0.0.cast, align 4 + ret void +} diff --git a/test/CodeGen/ARM64/dup.ll b/test/CodeGen/ARM64/dup.ll new file mode 100644 index 0000000000..e65957522b --- /dev/null +++ b/test/CodeGen/ARM64/dup.ll @@ -0,0 +1,322 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s + +define <8 x i8> @v_dup8(i8 %A) nounwind { +;CHECK-LABEL: v_dup8: +;CHECK: dup.8b + %tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0 + %tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1 + %tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2 + %tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3 + %tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4 + %tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5 + %tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6 + %tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7 + ret <8 x i8> %tmp8 +} + +define <4 x i16> @v_dup16(i16 %A) nounwind { +;CHECK-LABEL: v_dup16: +;CHECK: dup.4h + %tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0 + %tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1 + %tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2 + %tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3 + ret <4 x i16> %tmp4 +} + +define <2 x i32> @v_dup32(i32 %A) nounwind { +;CHECK-LABEL: v_dup32: +;CHECK: dup.2s + %tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0 + %tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1 + ret <2 x i32> %tmp2 +} + +define <2 x float> @v_dupfloat(float %A) nounwind { +;CHECK-LABEL: v_dupfloat: +;CHECK: dup.2s + %tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0 + %tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1 + ret <2 x float> %tmp2 +} + +define <16 x i8> @v_dupQ8(i8 %A) nounwind { +;CHECK-LABEL: v_dupQ8: +;CHECK: dup.16b + %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0 + %tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1 + %tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2 + %tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3 + %tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4 + %tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5 + %tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6 + %tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7 + %tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8 + %tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9 + %tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10 + %tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11 + %tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12 + %tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13 + %tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14 + %tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15 + ret <16 x i8> %tmp16 +} + +define <8 x i16> @v_dupQ16(i16 %A) nounwind { +;CHECK-LABEL: v_dupQ16: +;CHECK: dup.8h + %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0 + %tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1 + %tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2 + %tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3 + %tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4 + %tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5 + %tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6 + %tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7 + ret <8 x i16> %tmp8 +} + +define <4 x i32> @v_dupQ32(i32 %A) nounwind { +;CHECK-LABEL: v_dupQ32: +;CHECK: dup.4s + %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0 + %tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1 + %tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2 + %tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3 + ret <4 x i32> %tmp4 +} + +define <4 x float> @v_dupQfloat(float %A) nounwind { +;CHECK-LABEL: v_dupQfloat: +;CHECK: dup.4s + %tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0 + %tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1 + %tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2 + %tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3 + ret <4 x float> %tmp4 +} + +; Check to make sure it works with shuffles, too. + +define <8 x i8> @v_shuffledup8(i8 %A) nounwind { +;CHECK-LABEL: v_shuffledup8: +;CHECK: dup.8b + %tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0 + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer + ret <8 x i8> %tmp2 +} + +define <4 x i16> @v_shuffledup16(i16 %A) nounwind { +;CHECK-LABEL: v_shuffledup16: +;CHECK: dup.4h + %tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0 + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer + ret <4 x i16> %tmp2 +} + +define <2 x i32> @v_shuffledup32(i32 %A) nounwind { +;CHECK-LABEL: v_shuffledup32: +;CHECK: dup.2s + %tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0 + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer + ret <2 x i32> %tmp2 +} + +define <2 x float> @v_shuffledupfloat(float %A) nounwind { +;CHECK-LABEL: v_shuffledupfloat: +;CHECK: dup.2s + %tmp1 = insertelement <2 x float> undef, float %A, i32 0 + %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer + ret <2 x float> %tmp2 +} + +define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind { +;CHECK-LABEL: v_shuffledupQ8: +;CHECK: dup.16b + %tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0 + %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %tmp2 +} + +define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind { +;CHECK-LABEL: v_shuffledupQ16: +;CHECK: dup.8h + %tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0 + %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %tmp2 +} + +define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind { +;CHECK-LABEL: v_shuffledupQ32: +;CHECK: dup.4s + %tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0 + %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %tmp2 +} + +define <4 x float> @v_shuffledupQfloat(float %A) nounwind { +;CHECK-LABEL: v_shuffledupQfloat: +;CHECK: dup.4s + %tmp1 = insertelement <4 x float> undef, float %A, i32 0 + %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %tmp2 +} + +define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind { +;CHECK-LABEL: vduplane8: +;CHECK: dup.8b + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind { +;CHECK-LABEL: vduplane16: +;CHECK: dup.4h + %tmp1 = load <4 x i16>* %A + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind { +;CHECK-LABEL: vduplane32: +;CHECK: dup.2s + %tmp1 = load <2 x i32>* %A + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 > + ret <2 x i32> %tmp2 +} + +define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind { +;CHECK-LABEL: vduplanefloat: +;CHECK: dup.2s + %tmp1 = load <2 x float>* %A + %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 > + ret <2 x float> %tmp2 +} + +define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind { +;CHECK-LABEL: vduplaneQ8: +;CHECK: dup.16b + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > + ret <16 x i8> %tmp2 +} + +define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind { +;CHECK-LABEL: vduplaneQ16: +;CHECK: dup.8h + %tmp1 = load <4 x i16>* %A + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > + ret <8 x i16> %tmp2 +} + +define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind { +;CHECK-LABEL: vduplaneQ32: +;CHECK: dup.4s + %tmp1 = load <2 x i32>* %A + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > + ret <4 x i32> %tmp2 +} + +define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind { +;CHECK-LABEL: vduplaneQfloat: +;CHECK: dup.4s + %tmp1 = load <2 x float>* %A + %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > + ret <4 x float> %tmp2 +} + +define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone { +;CHECK-LABEL: foo: +;CHECK: dup.2d +entry: + %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> + ret <2 x i64> %0 +} + +define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone { +;CHECK-LABEL: bar: +;CHECK: dup.2d +entry: + %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> + ret <2 x i64> %0 +} + +define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone { +;CHECK-LABEL: baz: +;CHECK: dup.2d +entry: + %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> + ret <2 x double> %0 +} + +define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone { +;CHECK-LABEL: qux: +;CHECK: dup.2d +entry: + %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> + ret <2 x double> %0 +} + +define <2 x i32> @f(i32 %a, i32 %b) nounwind readnone { +; CHECK-LABEL: f: +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ins.s v0[1], w1 +; CHECK-NEXT: ret + %vecinit = insertelement <2 x i32> undef, i32 %a, i32 0 + %vecinit1 = insertelement <2 x i32> %vecinit, i32 %b, i32 1 + ret <2 x i32> %vecinit1 +} + +define <4 x i32> @g(i32 %a, i32 %b) nounwind readnone { +; CHECK-LABEL: g: +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ins.s v0[1], w1 +; CHECK-NEXT: ins.s v0[2], w1 +; CHECK-NEXT: ins.s v0[3], w0 +; CHECK-NEXT: ret + %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %a, i32 3 + ret <4 x i32> %vecinit3 +} + +define <2 x i64> @h(i64 %a, i64 %b) nounwind readnone { +; CHECK-LABEL: h: +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ins.d v0[1], x1 +; CHECK-NEXT: ret + %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0 + %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1 + ret <2 x i64> %vecinit1 +} + +; We used to spot this as a BUILD_VECTOR implementable by dup, but assume that +; the single value needed was of the same type as the vector. This is false if +; the scalar corresponding to the vector type is illegal (e.g. a <4 x i16> +; BUILD_VECTOR will have an i32 as its source). In that case, the operation is +; not a simple "dup vD.4h, vN.h[idx]" after all, and we crashed. +define <4 x i16> @test_build_illegal(<4 x i32> %in) { +; CHECK-LABEL: test_build_illegal: +; CHECK: umov.s [[WTMP:w[0-9]+]], v0[3] +; CHECK: dup.4h v0, [[WTMP]] + %val = extractelement <4 x i32> %in, i32 3 + %smallval = trunc i32 %val to i16 + %vec = insertelement <4x i16> undef, i16 %smallval, i32 3 + + ret <4 x i16> %vec +} + +; We used to inherit an already extract_subvectored v4i16 from +; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing +; the formation of an indexed-by-7 MLS. +define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { +; CHECK-LABEL: test_high_splat: +; CHECK: mls.4h v0, v1, v2[7] +entry: + %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> + %mul = mul <4 x i16> %shuffle, %b + %sub = sub <4 x i16> %a, %mul + ret <4 x i16> %sub +} diff --git a/test/CodeGen/ARM64/early-ifcvt.ll b/test/CodeGen/ARM64/early-ifcvt.ll new file mode 100644 index 0000000000..a5c1e26c61 --- /dev/null +++ b/test/CodeGen/ARM64/early-ifcvt.ll @@ -0,0 +1,423 @@ +; RUN: llc < %s -stress-early-ifcvt | FileCheck %s +target triple = "arm64-apple-macosx" + +; CHECK: mm2 +define i32 @mm2(i32* nocapture %p, i32 %n) nounwind uwtable readonly ssp { +entry: + br label %do.body + +; CHECK: do.body +; Loop body has no branches before the backedge. +; CHECK-NOT: LBB +do.body: + %max.0 = phi i32 [ 0, %entry ], [ %max.1, %do.cond ] + %min.0 = phi i32 [ 0, %entry ], [ %min.1, %do.cond ] + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ] + %p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ] + %incdec.ptr = getelementptr inbounds i32* %p.addr.0, i64 1 + %0 = load i32* %p.addr.0, align 4 + %cmp = icmp sgt i32 %0, %max.0 + br i1 %cmp, label %do.cond, label %if.else + +if.else: + %cmp1 = icmp slt i32 %0, %min.0 + %.min.0 = select i1 %cmp1, i32 %0, i32 %min.0 + br label %do.cond + +do.cond: + %max.1 = phi i32 [ %0, %do.body ], [ %max.0, %if.else ] + %min.1 = phi i32 [ %min.0, %do.body ], [ %.min.0, %if.else ] +; CHECK: cbnz + %dec = add i32 %n.addr.0, -1 + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %do.end, label %do.body + +do.end: + %sub = sub nsw i32 %max.1, %min.1 + ret i32 %sub +} + +; CHECK-LABEL: fold_inc_true_32: +; CHECK: {{subs.*wzr,|cmp}} w2, #1 +; CHECK-NEXT: csinc w0, w1, w0, eq +; CHECK-NEXT: ret +define i32 @fold_inc_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp eq i32 %c, 1 + %inc = add nsw i32 %x, 1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %y, %eq_bb ], [ %inc, %entry ] + ret i32 %cond +} + +; CHECK-LABEL: fold_inc_true_64: +; CHECK: {{subs.*xzr,|cmp}} x2, #1 +; CHECK-NEXT: csinc x0, x1, x0, eq +; CHECK-NEXT: ret +define i64 @fold_inc_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp eq i64 %c, 1 + %inc = add nsw i64 %x, 1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %y, %eq_bb ], [ %inc, %entry ] + ret i64 %cond +} + +; CHECK-LABEL: fold_inc_false_32: +; CHECK: {{subs.*wzr,|cmp}} w2, #1 +; CHECK-NEXT: csinc w0, w1, w0, ne +; CHECK-NEXT: ret +define i32 @fold_inc_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp eq i32 %c, 1 + %inc = add nsw i32 %x, 1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %inc, %eq_bb ], [ %y, %entry ] + ret i32 %cond +} + +; CHECK-LABEL: fold_inc_false_64: +; CHECK: {{subs.*xzr,|cmp}} x2, #1 +; CHECK-NEXT: csinc x0, x1, x0, ne +; CHECK-NEXT: ret +define i64 @fold_inc_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp eq i64 %c, 1 + %inc = add nsw i64 %x, 1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %inc, %eq_bb ], [ %y, %entry ] + ret i64 %cond +} + +; CHECK-LABEL: fold_inv_true_32: +; CHECK: {{subs.*wzr,|cmp}} w2, #1 +; CHECK-NEXT: csinv w0, w1, w0, eq +; CHECK-NEXT: ret +define i32 @fold_inv_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp eq i32 %c, 1 + %inv = xor i32 %x, -1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %y, %eq_bb ], [ %inv, %entry ] + ret i32 %cond +} + +; CHECK-LABEL: fold_inv_true_64: +; CHECK: {{subs.*xzr,|cmp}} x2, #1 +; CHECK-NEXT: csinv x0, x1, x0, eq +; CHECK-NEXT: ret +define i64 @fold_inv_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp eq i64 %c, 1 + %inv = xor i64 %x, -1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %y, %eq_bb ], [ %inv, %entry ] + ret i64 %cond +} + +; CHECK-LABEL: fold_inv_false_32: +; CHECK: {{subs.*wzr,|cmp}} w2, #1 +; CHECK-NEXT: csinv w0, w1, w0, ne +; CHECK-NEXT: ret +define i32 @fold_inv_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp eq i32 %c, 1 + %inv = xor i32 %x, -1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %inv, %eq_bb ], [ %y, %entry ] + ret i32 %cond +} + +; CHECK-LABEL: fold_inv_false_64: +; CHECK: {{subs.*xzr,|cmp}} x2, #1 +; CHECK-NEXT: csinv x0, x1, x0, ne +; CHECK-NEXT: ret +define i64 @fold_inv_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp eq i64 %c, 1 + %inv = xor i64 %x, -1 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %inv, %eq_bb ], [ %y, %entry ] + ret i64 %cond +} + +; CHECK-LABEL: fold_neg_true_32: +; CHECK: {{subs.*wzr,|cmp}} w2, #1 +; CHECK-NEXT: csneg w0, w1, w0, eq +; CHECK-NEXT: ret +define i32 @fold_neg_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp eq i32 %c, 1 + %neg = sub nsw i32 0, %x + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %y, %eq_bb ], [ %neg, %entry ] + ret i32 %cond +} + +; CHECK-LABEL: fold_neg_true_64: +; CHECK: {{subs.*xzr,|cmp}} x2, #1 +; CHECK-NEXT: csneg x0, x1, x0, eq +; CHECK-NEXT: ret +define i64 @fold_neg_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp eq i64 %c, 1 + %neg = sub nsw i64 0, %x + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %y, %eq_bb ], [ %neg, %entry ] + ret i64 %cond +} + +; CHECK-LABEL: fold_neg_false_32: +; CHECK: {{subs.*wzr,|cmp}} w2, #1 +; CHECK-NEXT: csneg w0, w1, w0, ne +; CHECK-NEXT: ret +define i32 @fold_neg_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp eq i32 %c, 1 + %neg = sub nsw i32 0, %x + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %neg, %eq_bb ], [ %y, %entry ] + ret i32 %cond +} + +; CHECK-LABEL: fold_neg_false_64: +; CHECK: {{subs.*xzr,|cmp}} x2, #1 +; CHECK-NEXT: csneg x0, x1, x0, ne +; CHECK-NEXT: ret +define i64 @fold_neg_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp eq i64 %c, 1 + %neg = sub nsw i64 0, %x + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %neg, %eq_bb ], [ %y, %entry ] + ret i64 %cond +} + +; CHECK: cbnz_32 +; CHECK: {{subs.*wzr,|cmp}} w2, #0 +; CHECK-NEXT: csel w0, w1, w0, ne +; CHECK-NEXT: ret +define i32 @cbnz_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp eq i32 %c, 0 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %x, %eq_bb ], [ %y, %entry ] + ret i32 %cond +} + +; CHECK: cbnz_64 +; CHECK: {{subs.*xzr,|cmp}} x2, #0 +; CHECK-NEXT: csel x0, x1, x0, ne +; CHECK-NEXT: ret +define i64 @cbnz_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp eq i64 %c, 0 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %x, %eq_bb ], [ %y, %entry ] + ret i64 %cond +} + +; CHECK: cbz_32 +; CHECK: {{subs.*wzr,|cmp}} w2, #0 +; CHECK-NEXT: csel w0, w1, w0, eq +; CHECK-NEXT: ret +define i32 @cbz_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %tobool = icmp ne i32 %c, 0 + br i1 %tobool, label %ne_bb, label %done + +ne_bb: + br label %done + +done: + %cond = phi i32 [ %x, %ne_bb ], [ %y, %entry ] + ret i32 %cond +} + +; CHECK: cbz_64 +; CHECK: {{subs.*xzr,|cmp}} x2, #0 +; CHECK-NEXT: csel x0, x1, x0, eq +; CHECK-NEXT: ret +define i64 @cbz_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %tobool = icmp ne i64 %c, 0 + br i1 %tobool, label %ne_bb, label %done + +ne_bb: + br label %done + +done: + %cond = phi i64 [ %x, %ne_bb ], [ %y, %entry ] + ret i64 %cond +} + +; CHECK: tbnz_32 +; CHECK: {{ands.*xzr,|tst}} x2, #0x80 +; CHECK-NEXT: csel w0, w1, w0, ne +; CHECK-NEXT: ret +define i32 @tbnz_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %mask = and i32 %c, 128 + %tobool = icmp eq i32 %mask, 0 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i32 [ %x, %eq_bb ], [ %y, %entry ] + ret i32 %cond +} + +; CHECK: tbnz_64 +; CHECK: {{ands.*xzr,|tst}} x2, #0x8000000000000000 +; CHECK-NEXT: csel x0, x1, x0, ne +; CHECK-NEXT: ret +define i64 @tbnz_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %mask = and i64 %c, 9223372036854775808 + %tobool = icmp eq i64 %mask, 0 + br i1 %tobool, label %eq_bb, label %done + +eq_bb: + br label %done + +done: + %cond = phi i64 [ %x, %eq_bb ], [ %y, %entry ] + ret i64 %cond +} + +; CHECK: tbz_32 +; CHECK: {{ands.*xzr,|tst}} x2, #0x80 +; CHECK-NEXT: csel w0, w1, w0, eq +; CHECK-NEXT: ret +define i32 @tbz_32(i32 %x, i32 %y, i32 %c) nounwind ssp { +entry: + %mask = and i32 %c, 128 + %tobool = icmp ne i32 %mask, 0 + br i1 %tobool, label %ne_bb, label %done + +ne_bb: + br label %done + +done: + %cond = phi i32 [ %x, %ne_bb ], [ %y, %entry ] + ret i32 %cond +} + +; CHECK: tbz_64 +; CHECK: {{ands.*xzr,|tst}} x2, #0x8000000000000000 +; CHECK-NEXT: csel x0, x1, x0, eq +; CHECK-NEXT: ret +define i64 @tbz_64(i64 %x, i64 %y, i64 %c) nounwind ssp { +entry: + %mask = and i64 %c, 9223372036854775808 + %tobool = icmp ne i64 %mask, 0 + br i1 %tobool, label %ne_bb, label %done + +ne_bb: + br label %done + +done: + %cond = phi i64 [ %x, %ne_bb ], [ %y, %entry ] + ret i64 %cond +} + +; This function from 175.vpr folds an ADDWri into a CSINC. +; Remember to clear the kill flag on the ADDWri. +define i32 @get_ytrack_to_xtracks() nounwind ssp { +entry: + br label %for.body + +for.body: + %x0 = load i32* undef, align 4 + br i1 undef, label %if.then.i146, label %is_sbox.exit155 + +if.then.i146: + %add8.i143 = add nsw i32 0, %x0 + %rem.i144 = srem i32 %add8.i143, %x0 + %add9.i145 = add i32 %rem.i144, 1 + br label %is_sbox.exit155 + +is_sbox.exit155: ; preds = %if.then.i146, %for.body + %seg_offset.0.i151 = phi i32 [ %add9.i145, %if.then.i146 ], [ undef, %for.body ] + %idxprom15.i152 = sext i32 %seg_offset.0.i151 to i64 + %arrayidx18.i154 = getelementptr inbounds i32* null, i64 %idxprom15.i152 + %x1 = load i32* %arrayidx18.i154, align 4 + br i1 undef, label %for.body51, label %for.body + +for.body51: ; preds = %is_sbox.exit155 + call fastcc void @get_switch_type(i32 %x1, i32 undef, i16 signext undef, i16 signext undef, i16* undef) + unreachable +} +declare fastcc void @get_switch_type(i32, i32, i16 signext, i16 signext, i16* nocapture) nounwind ssp diff --git a/test/CodeGen/ARM64/elf-calls.ll b/test/CodeGen/ARM64/elf-calls.ll new file mode 100644 index 0000000000..8c4020327b --- /dev/null +++ b/test/CodeGen/ARM64/elf-calls.ll @@ -0,0 +1,20 @@ +; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -filetype=obj -o - %s | llvm-objdump -triple=arm64-linux-gnu - -r | FileCheck %s --check-prefix=CHECK-OBJ + +declare void @callee() + +define void @caller() { + call void @callee() + ret void +; CHECK-LABEL: caller: +; CHECK: bl callee +; CHECK-OBJ: R_AARCH64_CALL26 callee +} + +define void @tail_caller() { + tail call void @callee() + ret void +; CHECK-LABEL: tail_caller: +; CHECK: b callee +; CHECK-OBJ: R_AARCH64_JUMP26 callee +} diff --git a/test/CodeGen/ARM64/elf-constpool.ll b/test/CodeGen/ARM64/elf-constpool.ll new file mode 100644 index 0000000000..95d334376b --- /dev/null +++ b/test/CodeGen/ARM64/elf-constpool.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -O0 -o - %s | FileCheck %s + +; O0 checked for fastisel purposes. It has a separate path which +; creates a constpool entry for floating values. + +define double @needs_const() { + ret double 3.14159 +; CHECK: .LCPI0_0: + +; CHECK: adrp {{x[0-9]+}}, .LCPI0_0 +; CHECK: ldr d0, [{{x[0-9]+}}, :lo12:.LCPI0_0] +} diff --git a/test/CodeGen/ARM64/elf-globals.ll b/test/CodeGen/ARM64/elf-globals.ll new file mode 100644 index 0000000000..598c96ae48 --- /dev/null +++ b/test/CodeGen/ARM64/elf-globals.ll @@ -0,0 +1,115 @@ +; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -o - %s -O0 | FileCheck %s --check-prefix=CHECK-FAST +; RUN: llc -mtriple=arm64-linux-gnu -relocation-model=pic -o - %s | FileCheck %s --check-prefix=CHECK-PIC +; RUN: llc -mtriple=arm64-linux-gnu -O0 -relocation-model=pic -o - %s | FileCheck %s --check-prefix=CHECK-FAST-PIC + +@var8 = external global i8, align 1 +@var16 = external global i16, align 2 +@var32 = external global i32, align 4 +@var64 = external global i64, align 8 + +define i8 @test_i8(i8 %new) { + %val = load i8* @var8, align 1 + store i8 %new, i8* @var8 + ret i8 %val +; CHECK-LABEL: test_i8: +; CHECK: adrp x[[HIREG:[0-9]+]], var8 +; CHECK: ldrb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8] +; CHECK: strb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8] + +; CHECK-PIC-LABEL: test_i8: +; CHECK-PIC: adrp x[[HIREG:[0-9]+]], :got:var8 +; CHECK-PIC: ldr x[[VAR_ADDR:[0-9]+]], [x[[HIREG]], :got_lo12:var8] +; CHECK-PIC: ldrb {{w[0-9]+}}, [x[[VAR_ADDR]]] + +; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var8 +; CHECK-FAST: ldrb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8] + +; CHECK-FAST-PIC: adrp x[[HIREG:[0-9]+]], :got:var8 +; CHECK-FAST-PIC: ldr x[[VARADDR:[0-9]+]], [x[[HIREG]], :got_lo12:var8] +; CHECK-FAST-PIC: ldr {{w[0-9]+}}, [x[[VARADDR]]] +} + +define i16 @test_i16(i16 %new) { + %val = load i16* @var16, align 2 + store i16 %new, i16* @var16 + ret i16 %val +; CHECK-LABEL: test_i16: +; CHECK: adrp x[[HIREG:[0-9]+]], var16 +; CHECK: ldrh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16] +; CHECK: strh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16] + +; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var16 +; CHECK-FAST: ldrh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16] +} + +define i32 @test_i32(i32 %new) { + %val = load i32* @var32, align 4 + store i32 %new, i32* @var32 + ret i32 %val +; CHECK-LABEL: test_i32: +; CHECK: adrp x[[HIREG:[0-9]+]], var32 +; CHECK: ldr {{w[0-9]+}}, [x[[HIREG]], :lo12:var32] +; CHECK: str {{w[0-9]+}}, [x[[HIREG]], :lo12:var32] + +; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var32 +; CHECK-FAST: add {{x[0-9]+}}, x[[HIREG]], :lo12:var32 +} + +define i64 @test_i64(i64 %new) { + %val = load i64* @var64, align 8 + store i64 %new, i64* @var64 + ret i64 %val +; CHECK-LABEL: test_i64: +; CHECK: adrp x[[HIREG:[0-9]+]], var64 +; CHECK: ldr {{x[0-9]+}}, [x[[HIREG]], :lo12:var64] +; CHECK: str {{x[0-9]+}}, [x[[HIREG]], :lo12:var64] + +; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var64 +; CHECK-FAST: add {{x[0-9]+}}, x[[HIREG]], :lo12:var64 +} + +define i64* @test_addr() { + ret i64* @var64 +; CHECK-LABEL: test_addr: +; CHECK: adrp [[HIREG:x[0-9]+]], var64 +; CHECK: add x0, [[HIREG]], :lo12:var64 + +; CHECK-FAST: adrp [[HIREG:x[0-9]+]], var64 +; CHECK-FAST: add x0, [[HIREG]], :lo12:var64 +} + +@hiddenvar = hidden global i32 0, align 4 +@protectedvar = protected global i32 0, align 4 + +define i32 @test_vis() { + %lhs = load i32* @hiddenvar, align 4 + %rhs = load i32* @protectedvar, align 4 + %ret = add i32 %lhs, %rhs + ret i32 %ret +; CHECK-PIC: adrp {{x[0-9]+}}, hiddenvar +; CHECK-PIC: ldr {{w[0-9]+}}, [{{x[0-9]+}}, :lo12:hiddenvar] +; CHECK-PIC: adrp {{x[0-9]+}}, protectedvar +; CHECK-PIC: ldr {{w[0-9]+}}, [{{x[0-9]+}}, :lo12:protectedvar] +} + +@var_default = external global [2 x i32] + +define i32 @test_default_align() { + %addr = getelementptr [2 x i32]* @var_default, i32 0, i32 0 + %val = load i32* %addr + ret i32 %val +; CHECK-LABEL: test_default_align: +; CHECK: adrp x[[HIREG:[0-9]+]], var_default +; CHECK: ldr w0, [x[[HIREG]], :lo12:var_default] +} + +define i64 @test_default_unaligned() { + %addr = bitcast [2 x i32]* @var_default to i64* + %val = load i64* %addr + ret i64 %val +; CHECK-LABEL: test_default_unaligned: +; CHECK: adrp [[HIREG:x[0-9]+]], var_default +; CHECK: add x[[ADDR:[0-9]+]], [[HIREG]], :lo12:var_default +; CHECK: ldr x0, [x[[ADDR]]] +} diff --git a/test/CodeGen/ARM64/ext.ll b/test/CodeGen/ARM64/ext.ll new file mode 100644 index 0000000000..57d6e0c67b --- /dev/null +++ b/test/CodeGen/ARM64/ext.ll @@ -0,0 +1,101 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s + +define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: test_vextd: +;CHECK: {{ext.8b.*#3}} + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + ret <8 x i8> %tmp3 +} + +define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: test_vextRd: +;CHECK: {{ext.8b.*#5}} + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + ret <8 x i8> %tmp3 +} + +define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: test_vextq: +;CHECK: {{ext.16b.*3}} + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + ret <16 x i8> %tmp3 +} + +define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: test_vextRq: +;CHECK: {{ext.16b.*7}} + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + ret <16 x i8> %tmp3 +} + +define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK-LABEL: test_vextd16: +;CHECK: {{ext.8b.*#6}} + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + ret <4 x i16> %tmp3 +} + +define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LABEL: test_vextq32: +;CHECK: {{ext.16b.*12}} + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> + ret <4 x i32> %tmp3 +} + +; Undef shuffle indices should not prevent matching to VEXT: + +define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: test_vextd_undef: +;CHECK: {{ext.8b.*}} + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + ret <8 x i8> %tmp3 +} + +define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: test_vextRq_undef: +;CHECK: {{ext.16b.*#7}} + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + ret <16 x i8> %tmp3 +} + +; Tests for ReconstructShuffle function. Indices have to be carefully +; chosen to reach lowering phase as a BUILD_VECTOR. + +; One vector needs vext, the other can be handled by extract_subvector +; Also checks interleaving of sources is handled correctly. +; Essence: a vext is used on %A and something saner than stack load/store for final result. +define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: test_interleaved: +;CHECK: ext.8b +;CHECK: zip1.4h + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> + ret <4 x i16> %tmp3 +} + +; An undef in the shuffle list should still be optimizable +define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: test_undef: +;CHECK: zip1.4h + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> + ret <4 x i16> %tmp3 +} diff --git a/test/CodeGen/ARM64/extend-int-to-fp.ll b/test/CodeGen/ARM64/extend-int-to-fp.ll new file mode 100644 index 0000000000..599a697a31 --- /dev/null +++ b/test/CodeGen/ARM64/extend-int-to-fp.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s + +define <4 x float> @foo(<4 x i16> %a) nounwind { +; CHECK-LABEL: foo: +; CHECK: ushll.4s v0, v0, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: ret + %vcvt.i = uitofp <4 x i16> %a to <4 x float> + ret <4 x float> %vcvt.i +} + +define <4 x float> @bar(<4 x i16> %a) nounwind { +; CHECK-LABEL: bar: +; CHECK: sshll.4s v0, v0, #0 +; CHECK-NEXT: scvtf.4s v0, v0 +; CHECK-NEXT: ret + %vcvt.i = sitofp <4 x i16> %a to <4 x float> + ret <4 x float> %vcvt.i +} diff --git a/test/CodeGen/ARM64/extend.ll b/test/CodeGen/ARM64/extend.ll new file mode 100644 index 0000000000..4d20543671 --- /dev/null +++ b/test/CodeGen/ARM64/extend.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s +@array = external global [0 x i32] + +define i64 @foo(i32 %i) { +; CHECK: foo +; CHECK: adrp x[[REG:[0-9]+]], _array@GOTPAGE +; CHECK: ldr x[[REG1:[0-9]+]], [x[[REG]], _array@GOTPAGEOFF] +; CHECK: ldrsw x0, [x[[REG1]], x0, sxtw #2] +; CHECK: ret + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds [0 x i32]* @array, i64 0, i64 %idxprom + %tmp1 = load i32* %arrayidx, align 4 + %conv = sext i32 %tmp1 to i64 + ret i64 %conv +} diff --git a/test/CodeGen/ARM64/extload-knownzero.ll b/test/CodeGen/ARM64/extload-knownzero.ll new file mode 100644 index 0000000000..14e5fd310d --- /dev/null +++ b/test/CodeGen/ARM64/extload-knownzero.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -march=arm64 | FileCheck %s +; rdar://12771555 + +define void @foo(i16* %ptr, i32 %a) nounwind { +entry: +; CHECK-LABEL: foo: + %tmp1 = icmp ult i32 %a, 100 + br i1 %tmp1, label %bb1, label %bb2 +bb1: +; CHECK: %bb1 +; CHECK: ldrh [[REG:w[0-9]+]] + %tmp2 = load i16* %ptr, align 2 + br label %bb2 +bb2: +; CHECK: %bb2 +; CHECK-NOT: and {{w[0-9]+}}, [[REG]], #0xffff +; CHECK: cmp [[REG]], #23 + %tmp3 = phi i16 [ 0, %entry ], [ %tmp2, %bb1 ] + %cmp = icmp ult i16 %tmp3, 24 + br i1 %cmp, label %bb3, label %exit +bb3: + call void @bar() nounwind + br label %exit +exit: + ret void +} + +declare void @bar () diff --git a/test/CodeGen/ARM64/extract.ll b/test/CodeGen/ARM64/extract.ll new file mode 100644 index 0000000000..119751c99e --- /dev/null +++ b/test/CodeGen/ARM64/extract.ll @@ -0,0 +1,58 @@ +; RUN: llc -arm64-extr-generation=true -verify-machineinstrs < %s \ +; RUN: -march=arm64 | FileCheck %s + +define i64 @ror_i64(i64 %in) { +; CHECK-LABEL: ror_i64: + %left = shl i64 %in, 19 + %right = lshr i64 %in, 45 + %val5 = or i64 %left, %right +; CHECK: extr {{x[0-9]+}}, x0, x0, #45 + ret i64 %val5 +} + +define i32 @ror_i32(i32 %in) { +; CHECK-LABEL: ror_i32: + %left = shl i32 %in, 9 + %right = lshr i32 %in, 23 + %val5 = or i32 %left, %right +; CHECK: extr {{w[0-9]+}}, w0, w0, #23 + ret i32 %val5 +} + +define i32 @extr_i32(i32 %lhs, i32 %rhs) { +; CHECK-LABEL: extr_i32: + %left = shl i32 %lhs, 6 + %right = lshr i32 %rhs, 26 + %val = or i32 %left, %right + ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use + ; something other than w0 and w1. +; CHECK: extr {{w[0-9]+}}, w0, w1, #26 + + ret i32 %val +} + +define i64 @extr_i64(i64 %lhs, i64 %rhs) { +; CHECK-LABEL: extr_i64: + %right = lshr i64 %rhs, 40 + %left = shl i64 %lhs, 24 + %val = or i64 %right, %left + ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use + ; something other than w0 and w1. +; CHECK: extr {{x[0-9]+}}, x0, x1, #40 + + ret i64 %val +} + +; Regression test: a bad experimental pattern crept into git which optimised +; this pattern to a single EXTR. +define i32 @extr_regress(i32 %a, i32 %b) { +; CHECK-LABEL: extr_regress: + + %sh1 = shl i32 %a, 14 + %sh2 = lshr i32 %b, 14 + %val = or i32 %sh2, %sh1 +; CHECK-NOT: extr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, #{{[0-9]+}} + + ret i32 %val +; CHECK: ret +} diff --git a/test/CodeGen/ARM64/extract_subvector.ll b/test/CodeGen/ARM64/extract_subvector.ll new file mode 100644 index 0000000000..20c05fb232 --- /dev/null +++ b/test/CodeGen/ARM64/extract_subvector.ll @@ -0,0 +1,51 @@ +; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s + +; Extract of an upper half of a vector is an "ext.16b v0, v0, v0, #8" insn. + +define <8 x i8> @v8i8(<16 x i8> %a) nounwind { +; CHECK: v8i8 +; CHECK: ext.16b v0, v0, v0, #8 +; CHECK: ret + %ret = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> + ret <8 x i8> %ret +} + +define <4 x i16> @v4i16(<8 x i16> %a) nounwind { +; CHECK-LABEL: v4i16: +; CHECK: ext.16b v0, v0, v0, #8 +; CHECK: ret + %ret = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> + ret <4 x i16> %ret +} + +define <2 x i32> @v2i32(<4 x i32> %a) nounwind { +; CHECK-LABEL: v2i32: +; CHECK: ext.16b v0, v0, v0, #8 +; CHECK: ret + %ret = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> + ret <2 x i32> %ret +} + +define <1 x i64> @v1i64(<2 x i64> %a) nounwind { +; CHECK-LABEL: v1i64: +; CHECK: ext.16b v0, v0, v0, #8 +; CHECK: ret + %ret = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> + ret <1 x i64> %ret +} + +define <2 x float> @v2f32(<4 x float> %a) nounwind { +; CHECK-LABEL: v2f32: +; CHECK: ext.16b v0, v0, v0, #8 +; CHECK: ret + %ret = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> + ret <2 x float> %ret +} + +define <1 x double> @v1f64(<2 x double> %a) nounwind { +; CHECK-LABEL: v1f64: +; CHECK: ext.16b v0, v0, v0, #8 +; CHECK: ret + %ret = shufflevector <2 x double> %a, <2 x double> %a, <1 x i32> + ret <1 x double> %ret +} diff --git a/test/CodeGen/ARM64/fast-isel-addr-offset.ll b/test/CodeGen/ARM64/fast-isel-addr-offset.ll new file mode 100644 index 0000000000..a4326dc2b8 --- /dev/null +++ b/test/CodeGen/ARM64/fast-isel-addr-offset.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s + +@sortlist = common global [5001 x i32] zeroinitializer, align 16 +@sortlist2 = common global [5001 x i64] zeroinitializer, align 16 + +; Load an address with an offset larget then LDR imm can handle +define i32 @foo() nounwind { +entry: +; CHECK: @foo +; CHECK: adrp x[[REG:[0-9]+]], _sortlist@GOTPAGE +; CHECK: ldr x[[REG1:[0-9]+]], [x[[REG]], _sortlist@GOTPAGEOFF] +; CHECK: movz x[[REG2:[0-9]+]], #20000 +; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]] +; CHECK: ldr w0, [x[[REG3]]] +; CHECK: ret + %0 = load i32* getelementptr inbounds ([5001 x i32]* @sortlist, i32 0, i64 5000), align 4 + ret i32 %0 +} + +define i64 @foo2() nounwind { +entry: +; CHECK: @foo2 +; CHECK: adrp x[[REG:[0-9]+]], _sortlist2@GOTPAGE +; CHECK: ldr x[[REG1:[0-9]+]], [x[[REG]], _sortlist2@GOTPAGEOFF] +; CHECK: movz x[[REG2:[0-9]+]], #40000 +; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]] +; CHECK: ldr x0, [x[[REG3]]] +; CHECK: ret + %0 = load i64* getelementptr inbounds ([5001 x i64]* @sortlist2, i32 0, i64 5000), align 4 + ret i64 %0 +} + +; Load an address with a ridiculously large offset. +; rdar://12505553 +@pd2 = common global i8* null, align 8 + +define signext i8 @foo3() nounwind ssp { +entry: +; CHECK: @foo3 +; CHECK: movz x[[REG:[0-9]+]], #2874, lsl #32 +; CHECK: movk x[[REG]], #29646, lsl #16 +; CHECK: movk x[[REG]], #12274 + %0 = load i8** @pd2, align 8 + %arrayidx = getelementptr inbounds i8* %0, i64 12345678901234 + %1 = load i8* %arrayidx, align 1 + ret i8 %1 +} diff --git a/test/CodeGen/ARM64/fast-isel-alloca.ll b/test/CodeGen/ARM64/fast-isel-alloca.ll new file mode 100644 index 0000000000..8bbee16232 --- /dev/null +++ b/test/CodeGen/ARM64/fast-isel-alloca.ll @@ -0,0 +1,24 @@ +; This test should cause the TargetMaterializeAlloca to be invoked +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s + +%struct.S1Ty = type { i64 } +%struct.S2Ty = type { %struct.S1Ty, %struct.S1Ty } + +define void @takeS1(%struct.S1Ty* %V) nounwind { +entry: + %V.addr = alloca %struct.S1Ty*, align 8 + store %struct.S1Ty* %V, %struct.S1Ty** %V.addr, align 8 + ret void +} + +define void @main() nounwind { +entry: +; CHECK: main +; CHECK: mov x[[REG:[0-9]+]], sp +; CHECK-NEXT: orr x[[REG1:[0-9]+]], xzr, #0x8 +; CHECK-NEXT: add x0, x[[REG]], x[[REG1]] + %E = alloca %struct.S2Ty, align 4 + %B = getelementptr inbounds %struct.S2Ty* %E, i32 0, i32 1 + call void @takeS1(%struct.S1Ty* %B) + ret void +} diff --git a/test/CodeGen/ARM64/fast-isel-br.ll b/test/CodeGen/ARM64/fast-isel-br.ll new file mode 100644 index 0000000000..8fd32fdd35 --- /dev/null +++ b/test/CodeGen/ARM64/fast-isel-br.ll @@ -0,0 +1,155 @@ +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s + +define void @branch1() nounwind uwtable ssp { + %x = alloca i32, align 4 + store i32 0, i32* %x, align 4 + %1 = load i32* %x, align 4 + %2 = icmp ne i32 %1, 0 + br i1 %2, label %3, label %4 + +;