From 8e38e862667e89ac928b8f5e7ce0a91c84fae4bc Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 9 Jun 2014 22:00:52 +0000 Subject: [PPC64LE] Generate correct code for unaligned little-endian vector loads The code in PPCTargetLowering::PerformDAGCombine() that handles unaligned Altivec vector loads generates a lvsl followed by a vperm. As we've seen in numerous other places, the vperm instruction has a big-endian bias, and this is fixed for little endian by complementing the permute control vector and swapping the input operands. In this case the lvsl is providing the permute control vector. Rather than generating an lvsl and a complement operation, it is sufficient to generate an lvsr instruction instead. Thus for LE code generation we will generate an lvsr rather than an lvsl, and swap the other input arguments on the vperm. The existing test/CodeGen/PowerPC/vec_misalign.ll is updated to test the code generation for PPC64 and PPC64LE, in addition to the existing PPC32/G5 testing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210493 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/PowerPC/vec_misaligned.ll | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/CodeGen/PowerPC/vec_misaligned.ll b/test/CodeGen/PowerPC/vec_misaligned.ll index d7ed64a5b1..304a84d49a 100644 --- a/test/CodeGen/PowerPC/vec_misaligned.ll +++ b/test/CodeGen/PowerPC/vec_misaligned.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s -march=ppc32 -mcpu=g5 +; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec | FileCheck %s -check-prefix=CHECK-LE target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128" target triple = "powerpc-apple-darwin8" @@ -8,6 +10,8 @@ target triple = "powerpc-apple-darwin8" define void @foo(i32 %x, ...) { entry: +; CHECK: foo: +; CHECK-LE: foo: %x_addr = alloca i32 ; [#uses=1] %ap = alloca i8* ; [#uses=3] %ap.0 = alloca i8* ; [#uses=3] @@ -27,6 +31,10 @@ entry: %tmp8 = getelementptr %struct.u16qi* %tmp6, i32 0, i32 0 ; <<16 x i8>*> [#uses=1] %tmp9 = getelementptr %struct.u16qi* %tmp7, i32 0, i32 0 ; <<16 x i8>*> [#uses=1] %tmp10 = load <16 x i8>* %tmp9, align 4 ; <<16 x i8>> [#uses=1] +; CHECK: lvsl +; CHECK: vperm +; CHECK-LE: lvsr +; CHECK-LE: vperm store <16 x i8> %tmp10, <16 x i8>* %tmp8, align 4 br label %return -- cgit v1.2.3