; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s ; CHECK: vperm2i128 $17 define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp { entry: ; add forces execution domain %a2 = add <32 x i8> %a, %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle } ; CHECK: vperm2i128 $3 define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { entry: ; add forces execution domain %a2 = add <4 x i64> %a, %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> ret <4 x i64> %shuffle } ; CHECK: vperm2i128 $49 define <8 x i32> @E3(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp { entry: ; add forces execution domain %a2 = add <8 x i32> %a, %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } ; CHECK: vperm2i128 $2 define <16 x i16> @E4(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp { entry: ; add forces execution domain %a2 = add <16 x i16> %a, %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } ; CHECK: vperm2i128 $2, (% define <16 x i16> @E5(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp { entry: %c = load <16 x i16>* %a %d = load <16 x i16>* %b %c2 = add <16 x i16> %c, %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> ret <16 x i16> %shuffle }