39 files changed, 2019 insertions, 185 deletions
diff --git a/test/CodeGen/ARM/2013-05-13-DAGCombiner-undef-mask.ll b/test/CodeGen/ARM/2013-05-13-DAGCombiner-undef-mask.ll
new file mode 100644
index 0000000000..8f6709ec5e
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-13-DAGCombiner-undef-mask.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s
+target triple = "armv7-none-linux-gnueabi"
+
+define <3 x i64> @shuffle(i1 %dec1, i1 %dec0, <3 x i64> %b) {
+entry:
+  %.sink = select i1 %dec1, <3 x i64> %b, <3 x i64> zeroinitializer
+  %.sink15 = select i1 %dec0, <3 x i64> %b, <3 x i64> zeroinitializer
+  %vecinit7 = shufflevector <3 x i64> %.sink, <3 x i64> %.sink15, <3 x i32> <i32 0, i32 4, i32 undef>
+  ret <3 x i64> %vecinit7
+}
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index 58fbbda0f6..c7e17ea353 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -162,3 +162,20 @@ define i32 @t9() nounwind {
 declare %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock*) unnamed_addr nounwind align 2
 
 declare %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock*) unnamed_addr nounwind align 2
+
+; rdar://13827621
+; Correctly preserve the input chain for the tailcall node in the bitcast case,
+; otherwise the call to floorf is lost.
+define float @libcall_tc_test2(float* nocapture %a, float %b) {
+; CHECKT2D: libcall_tc_test2:
+; CHECKT2D: blx _floorf
+; CHECKT2D: b.w _truncf
+  %1 = load float* %a, align 4
+  %call = tail call float @floorf(float %1)
+  store float %call, float* %a, align 4
+  %call1 = tail call float @truncf(float %b)
+  ret float %call1
+}
+
+declare float @floorf(float) readnone
+declare float @truncf(float) readnone
diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
index 60bc6a62f5..28a84e3bf9 100644
--- a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
+++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -26,8 +26,8 @@ entry:
 ; THUMB: t2
   %addr = alloca i32*, align 4
   store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
-; ARM: movw r1, #1148
-; ARM: add r0, r0, r1
+; ARM: movw [[R:r[0-9]+]], #1148
+; ARM: add r0, r{{[0-9]+}}, [[R]]
 ; THUMB: addw r0, r0, #1148
   %0 = load i32** %addr, align 4
   ret i32* %0
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
index 4e6efd2489..aefe200dc7 100644
--- a/test/CodeGen/ARM/fast-isel-br-const.ll
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -7,8 +7,8 @@ entry:
 ; ARM: t1:
   %x = add i32 %a, %b  
   br i1 1, label %if.then, label %if.else
-; THUMB-NOT: b LBB0_1
-; ARM-NOT:  b LBB0_1
+; THUMB-NOT: b {{\.?}}LBB0_1
+; ARM-NOT:  b {{\.?}}LBB0_1
 
 if.then:                                          ; preds = %entry
   call void @foo1()
@@ -16,8 +16,8 @@ if.then:                                          ; preds = %entry
 
 if.else:                                          ; preds = %entry
   br i1 0, label %if.then2, label %if.else3
-; THUMB: b LBB0_4
-; ARM:  b LBB0_4
+; THUMB: b {{\.?}}LBB0_4
+; ARM:  b {{\.?}}LBB0_4
 
 if.then2:                                         ; preds = %if.else
   call void @foo2()
@@ -26,8 +26,8 @@ if.then2:                                         ; preds = %if.else
 if.else3:                                         ; preds = %if.else
   %y = sub i32 %a, %b
   br i1 1, label %if.then5, label %if.end
-; THUMB-NOT: b LBB0_5
-; ARM-NOT:  b LBB0_5
+; THUMB-NOT: b {{\.?}}LBB0_5
+; ARM-NOT:  b {{\.?}}LBB0_5
 
 if.then5:                                         ; preds = %if.else3
   call void @foo1()
diff --git a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
index b6f201728c..46d5f997c6 100644
--- a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
+++ b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Fast-isel can't handle non-double multi-reg retvals.
 ; This test just check to make sure we don't hit the assert in FinishCall.
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index b6c9098613..6ee2c349ab 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -2,8 +2,12 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
+
+; Note that some of these tests assume that relocations are either
+; movw/movt or constant pool loads. Different platforms will select
+; different approaches.
 
 define i32 @t0(i1 zeroext %a) nounwind {
   %1 = zext i1 %a to i32
@@ -88,53 +92,53 @@ declare zeroext i1 @t9();
 define i32 @t10(i32 %argc, i8** nocapture %argv) {
 entry:
 ; ARM: @t10
-; ARM: movw r0, #0
-; ARM: movw r1, #248
-; ARM: movw r2, #187
-; ARM: movw r3, #28
-; ARM: movw r9, #40
-; ARM: movw r12, #186
-; ARM: uxtb r0, r0
-; ARM: uxtb r1, r1
-; ARM: uxtb r2, r2
-; ARM: uxtb r3, r3
-; ARM: uxtb r9, r9
-; ARM: str r9, [sp]
-; ARM: uxtb r9, r12
-; ARM: str r9, [sp, #4]
-; ARM: bl _bar
+; ARM: movw [[R0:l?r[0-9]*]], #0
+; ARM: movw [[R1:l?r[0-9]*]], #248
+; ARM: movw [[R2:l?r[0-9]*]], #187
+; ARM: movw [[R3:l?r[0-9]*]], #28
+; ARM: movw [[R4:l?r[0-9]*]], #40
+; ARM: movw [[R5:l?r[0-9]*]], #186
+; ARM: uxtb [[R0]], [[R0]]
+; ARM: uxtb [[R1]], [[R1]]
+; ARM: uxtb [[R2]], [[R2]]
+; ARM: uxtb [[R3]], [[R3]]
+; ARM: uxtb [[R4]], [[R4]]
+; ARM: str [[R4]], [sp]
+; ARM: uxtb [[R4]], [[R5]]
+; ARM: str [[R4]], [sp, #4]
+; ARM: bl {{_?}}bar
 ; ARM-LONG: @t10
-; ARM-LONG: movw lr, :lower16:L_bar$non_lazy_ptr
-; ARM-LONG: movt lr, :upper16:L_bar$non_lazy_ptr
-; ARM-LONG: ldr lr, [lr]
-; ARM-LONG: blx lr
+; ARM-LONG: {{(movw)|(ldr)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
+; ARM-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
+; ARM-LONG: ldr [[R]], {{\[}}[[R]]{{\]}}
+; ARM-LONG: blx [[R]]
 ; THUMB: @t10
-; THUMB: movs r0, #0
-; THUMB: movt r0, #0
-; THUMB: movs r1, #248
-; THUMB: movt r1, #0
-; THUMB: movs r2, #187
-; THUMB: movt r2, #0
-; THUMB: movs r3, #28
-; THUMB: movt r3, #0
-; THUMB: movw r9, #40
-; THUMB: movt r9, #0
-; THUMB: movw r12, #186
-; THUMB: movt r12, #0
-; THUMB: uxtb r0, r0
-; THUMB: uxtb r1, r1
-; THUMB: uxtb r2, r2
-; THUMB: uxtb r3, r3
-; THUMB: uxtb.w r9, r9
-; THUMB: str.w r9, [sp]
-; THUMB: uxtb.w r9, r12
-; THUMB: str.w r9, [sp, #4]
-; THUMB: bl _bar
+; THUMB: movs [[R0:l?r[0-9]*]], #0
+; THUMB: movt [[R0]], #0
+; THUMB: movs [[R1:l?r[0-9]*]], #248
+; THUMB: movt [[R1]], #0
+; THUMB: movs [[R2:l?r[0-9]*]], #187
+; THUMB: movt [[R2]], #0
+; THUMB: movs [[R3:l?r[0-9]*]], #28
+; THUMB: movt [[R3]], #0
+; THUMB: movw [[R4:l?r[0-9]*]], #40
+; THUMB: movt [[R4]], #0
+; THUMB: movw [[R5:l?r[0-9]*]], #186
+; THUMB: movt [[R5]], #0
+; THUMB: uxtb [[R0]], [[R0]]
+; THUMB: uxtb [[R1]], [[R1]]
+; THUMB: uxtb [[R2]], [[R2]]
+; THUMB: uxtb [[R3]], [[R3]]
+; THUMB: uxtb.w [[R4]], [[R4]]
+; THUMB: str.w [[R4]], [sp]
+; THUMB: uxtb.w [[R4]], [[R5]]
+; THUMB: str.w [[R4]], [sp, #4]
+; THUMB: bl {{_?}}bar
 ; THUMB-LONG: @t10
-; THUMB-LONG: movw lr, :lower16:L_bar$non_lazy_ptr
-; THUMB-LONG: movt lr, :upper16:L_bar$non_lazy_ptr
-; THUMB-LONG: ldr.w lr, [lr]
-; THUMB-LONG: blx lr
+; THUMB-LONG: {{(movw)|(ldr.n)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
+; THUMB-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
+; THUMB-LONG: ldr{{(.w)?}} [[R]], {{\[}}[[R]]{{\]}}
+; THUMB-LONG: blx [[R]]
   %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
   ret i32 0
 }
@@ -147,12 +151,12 @@ define i32 @bar0(i32 %i) nounwind {
 
 define void @foo3() uwtable {
 ; ARM: movw    r0, #0
-; ARM: movw    r1, :lower16:_bar0
-; ARM: movt    r1, :upper16:_bar0
+; ARM: {{(movw r1, :lower16:_?bar0)|(ldr r1, .LCPI)}}
+; ARM: {{(movt r1, :upper16:_?bar0)|(ldr r1, \[r1\])}}
 ; ARM: blx     r1
 ; THUMB: movs    r0, #0
-; THUMB: movw    r1, :lower16:_bar0
-; THUMB: movt    r1, :upper16:_bar0
+; THUMB: {{(movw r1, :lower16:_?bar0)|(ldr.n r1, .LCPI)}}
+; THUMB: {{(movt r1, :upper16:_?bar0)|(ldr r1, \[r1\])}}
 ; THUMB: blx     r1
   %fptr = alloca i32 (i32)*, align 8
   store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
@@ -164,66 +168,23 @@ define void @foo3() uwtable {
 define i32 @LibCall(i32 %a, i32 %b) {
 entry:
 ; ARM: LibCall
-; ARM: bl ___udivsi3
+; ARM: bl {{___udivsi3|__aeabi_uidiv}}
 ; ARM-LONG: LibCall
-; ARM-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr
-; ARM-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr
+; ARM-LONG: {{(movw r2, :lower16:L___udivsi3\$non_lazy_ptr)|(ldr r2, .LCPI)}}
+; ARM-LONG: {{(movt r2, :upper16:L___udivsi3\$non_lazy_ptr)?}}
 ; ARM-LONG: ldr r2, [r2]
 ; ARM-LONG: blx r2
 ; THUMB: LibCall
-; THUMB: bl ___udivsi3
+; THUMB: bl {{___udivsi3|__aeabi_uidiv}}
 ; THUMB-LONG: LibCall
-; THUMB-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr
-; THUMB-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr
+; THUMB-LONG: {{(movw r2, :lower16:L___udivsi3\$non_lazy_ptr)|(ldr.n r2, .LCPI)}}
+; THUMB-LONG: {{(movt r2, :upper16:L___udivsi3\$non_lazy_ptr)?}}
 ; THUMB-LONG: ldr r2, [r2]
 ; THUMB-LONG: blx r2
         %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
-define i32 @VarArg() nounwind {
-entry:
-  %i = alloca i32, align 4
-  %j = alloca i32, align 4
-  %k = alloca i32, align 4
-  %m = alloca i32, align 4
-  %n = alloca i32, align 4
-  %tmp = alloca i32, align 4
-  %0 = load i32* %i, align 4
-  %1 = load i32* %j, align 4
-  %2 = load i32* %k, align 4
-  %3 = load i32* %m, align 4
-  %4 = load i32* %n, align 4
-; ARM: VarArg
-; ARM: mov r7, sp
-; ARM: movw r0, #5
-; ARM: ldr r1, [r7, #-4]
-; ARM: ldr r2, [r7, #-8]
-; ARM: ldr r3, [r7, #-12]
-; ARM: ldr r9, [sp, #16]
-; ARM: ldr r12, [sp, #12]
-; ARM: str r9, [sp]
-; ARM: str r12, [sp, #4]
-; ARM: bl _CallVariadic
-; THUMB: mov r7, sp
-; THUMB: movs r0, #5
-; THUMB: movt r0, #0
-; THUMB: ldr r1, [sp, #28]
-; THUMB: ldr r2, [sp, #24]
-; THUMB: ldr r3, [sp, #20]
-; THUMB: ldr.w r9, [sp, #16]
-; THUMB: ldr.w r12, [sp, #12]
-; THUMB: str.w r9, [sp]
-; THUMB: str.w r12, [sp, #4]
-; THUMB: bl _CallVariadic
-  %call = call i32 (i32, ...)* @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
-  store i32 %call, i32* %tmp, align 4
-  %5 = load i32* %tmp, align 4
-  ret i32 %5
-}
-
-declare i32 @CallVariadic(i32, ...)
-
 ; Test fastcc
 
 define fastcc void @fast_callee(float %i) ssp {
diff --git a/test/CodeGen/ARM/fast-isel-crash.ll b/test/CodeGen/ARM/fast-isel-crash.ll
index 8fb4b66b7d..7d45feff69 100644
--- a/test/CodeGen/ARM/fast-isel-crash.ll
+++ b/test/CodeGen/ARM/fast-isel-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin
 
 %union.anon = type { <16 x i32> }
 
diff --git a/test/CodeGen/ARM/fast-isel-crash2.ll b/test/CodeGen/ARM/fast-isel-crash2.ll
index f245168a8e..8867f87065 100644
--- a/test/CodeGen/ARM/fast-isel-crash2.ll
+++ b/test/CodeGen/ARM/fast-isel-crash2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin
 ; rdar://9515076
 ; (Make sure this doesn't crash.)
 
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
index 3a943d854b..5e6666c47d 100644
--- a/test/CodeGen/ARM/fast-isel-deadcode.ll
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Target-specific selector can't properly handle the double because it isn't
 ; being passed via a register, so the materialized arguments become dead code.
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index 48105dd389..bc9769a537 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -3,33 +3,37 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
 
+; Note that some of these tests assume that relocations are either
+; movw/movt or constant pool loads. Different platforms will select
+; different approaches.
+
 @message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
 @temp = common global [60 x i8] zeroinitializer, align 1
 
 define void @t1() nounwind ssp {
 ; ARM: t1
-; ARM: movw r0, :lower16:_message1
-; ARM: movt r0, :upper16:_message1
+; ARM: {{(movw r0, :lower16:_?message1)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}}
 ; ARM: add r0, r0, #5
 ; ARM: movw r1, #64
 ; ARM: movw r2, #10
 ; ARM: uxtb r1, r1
-; ARM: bl _memset
+; ARM: bl {{_?}}memset
 ; ARM-LONG: t1
 ; ARM-LONG: movw r3, :lower16:L_memset$non_lazy_ptr
 ; ARM-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
 ; ARM-LONG: ldr r3, [r3]
 ; ARM-LONG: blx r3
 ; THUMB: t1
-; THUMB: movw r0, :lower16:_message1
-; THUMB: movt r0, :upper16:_message1
+; THUMB: {{(movw r0, :lower16:_?message1)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}}
 ; THUMB: adds r0, #5
 ; THUMB: movs r1, #64
 ; THUMB: movt r1, #0
 ; THUMB: movs r2, #10
 ; THUMB: movt r2, #0
 ; THUMB: uxtb r1, r1
-; THUMB: bl _memset
+; THUMB: bl {{_?}}memset
 ; THUMB-LONG: t1
 ; THUMB-LONG: movw r3, :lower16:L_memset$non_lazy_ptr
 ; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
@@ -43,31 +47,33 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define void @t2() nounwind ssp {
 ; ARM: t2
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: add r1, r0, #4
 ; ARM: add r0, r0, #16
 ; ARM: movw r2, #17
-; ARM: str r0, [sp]                @ 4-byte Spill
+; ARM: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
 ; ARM: mov r0, r1
-; ARM: ldr r1, [sp]                @ 4-byte Reload
-; ARM: bl _memcpy
+; ARM: ldr r1, [sp[[SLOT]]] @ 4-byte Reload
+; ARM: bl {{_?}}memcpy
 ; ARM-LONG: t2
 ; ARM-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr
 ; ARM-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
 ; ARM-LONG: ldr r3, [r3]
 ; ARM-LONG: blx r3
 ; THUMB: t2
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: adds r1, r0, #4
 ; THUMB: adds r0, #16
 ; THUMB: movs r2, #17
 ; THUMB: movt r2, #0
+; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
 ; THUMB: mov r0, r1
-; THUMB: bl _memcpy
+; THUMB: ldr r1,  [sp[[SLOT]]] @ 4-byte Reload
+; THUMB: bl {{_?}}memcpy
 ; THUMB-LONG: t2
 ; THUMB-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr
 ; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
@@ -81,29 +87,31 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 
 define void @t3() nounwind ssp {
 ; ARM: t3
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: add r1, r0, #4
 ; ARM: add r0, r0, #16
 ; ARM: movw r2, #10
 ; ARM: mov r0, r1
-; ARM: bl _memmove
+; ARM: bl {{_?}}memmove
 ; ARM-LONG: t3
 ; ARM-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr
 ; ARM-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
 ; ARM-LONG: ldr r3, [r3]
 ; ARM-LONG: blx r3
 ; THUMB: t3
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: adds r1, r0, #4
 ; THUMB: adds r0, #16
 ; THUMB: movs r2, #10
 ; THUMB: movt r2, #0
+; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
 ; THUMB: mov r0, r1
-; THUMB: bl _memmove
+; THUMB: ldr r1,  [sp[[SLOT]]] @ 4-byte Reload
+; THUMB: bl {{_?}}memmove
 ; THUMB-LONG: t3
 ; THUMB-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr
 ; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
@@ -115,8 +123,8 @@ define void @t3() nounwind ssp {
 
 define void @t4() nounwind ssp {
 ; ARM: t4
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: ldr r1, [r0, #16]
 ; ARM: str r1, [r0, #4]
@@ -126,8 +134,8 @@ define void @t4() nounwind ssp {
 ; ARM: strh r1, [r0, #12]
 ; ARM: bx lr
 ; THUMB: t4
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: ldr r1, [r0, #16]
 ; THUMB: str r1, [r0, #4]
@@ -144,8 +152,8 @@ declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 
 define void @t5() nounwind ssp {
 ; ARM: t5
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: ldrh r1, [r0, #16]
 ; ARM: strh r1, [r0, #4]
@@ -159,8 +167,8 @@ define void @t5() nounwind ssp {
 ; ARM: strh r1, [r0, #12]
 ; ARM: bx lr
 ; THUMB: t5
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: ldrh r1, [r0, #16]
 ; THUMB: strh r1, [r0, #4]
@@ -179,8 +187,8 @@ define void @t5() nounwind ssp {
 
 define void @t6() nounwind ssp {
 ; ARM: t6
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: ldrb r1, [r0, #16]
 ; ARM: strb r1, [r0, #4]
@@ -204,8 +212,8 @@ define void @t6() nounwind ssp {
 ; ARM: strb r1, [r0, #13]
 ; ARM: bx lr
 ; THUMB: t6
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: ldrb r1, [r0, #16]
 ; THUMB: strb r1, [r0, #4]
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
index dfb8c53735..cf294bcfbe 100644
--- a/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=ARM
 
 define i32 @t1(i32* nocapture %ptr) nounwind readonly {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
index 0b5267ddc9..0e71322d4e 100644
--- a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
+++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
 ; rdar://10418009
 
 define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp {
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
index b180e439dd..328168a84f 100644
--- a/test/CodeGen/ARM/fast-isel-mvn.ll
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 ; rdar://10412592
 
 ; Note: The Thumb code is being generated by the target-independent selector.
diff --git a/test/CodeGen/ARM/fast-isel-pic.ll b/test/CodeGen/ARM/fast-isel-pic.ll
index 867d53f973..6bb9ea3a8c 100644
--- a/test/CodeGen/ARM/fast-isel-pic.ll
+++ b/test/CodeGen/ARM/fast-isel-pic.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF
 
 @g = global i32 0, align 4
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index 563880dab0..ee150facac 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -verify-machineinstrs -optimize-regalloc -regalloc=basic < %s
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -optimize-regalloc -regalloc=basic < %s
 ; This isn't exactly a useful set of command-line options, but check that it
 ; doesn't crash.  (It was crashing because a register was getting redefined.)
 
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
index b83a733669..a937036284 100644
--- a/test/CodeGen/ARM/fast-isel-select.ll
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i1 %c) nounwind readnone {
diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll
index e8759a7fc4..afdfa84f39 100644
--- a/test/CodeGen/ARM/fast-isel-static.ll
+++ b/test/CodeGen/ARM/fast-isel-static.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -relocation-model=static | FileCheck -check-prefix=NORM %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=NORM %s
 
 define void @myadd(float* %sum, float* %addend) nounwind {
 entry:
@@ -24,7 +24,7 @@ entry:
   store float 0.000000e+00, float* %ztot, align 4
   store float 1.000000e+00, float* %z, align 4
 ; CHECK-LONG: blx     r
-; CHECK-NORM: bl      _myadd
+; CHECK-NORM: bl      {{_?}}myadd
   call void @myadd(float* %ztot, float* %z)
   ret i32 0
 }
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 41fda41326..39ffcac292 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -144,15 +144,19 @@ define void @test4() {
   store i32 %b, i32* @test4g
   ret void
 
-; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr
+
+; Note that relocations are either movw/movt or constant pool
+; loads. Different platforms will select different approaches.
+
+; THUMB: {{(movw r0, :lower16:L_test4g\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_test4g\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: ldr r1, [r0]
 ; THUMB: adds r1, #1
 ; THUMB: str r1, [r0]
 
-; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr
-; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_test4g\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_test4g\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: ldr r1, [r0]
 ; ARM: add r1, r1, #1
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 74628f0c5c..eb5ad8f0c3 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -599,3 +599,27 @@ for.end179:                                       ; preds = %for.cond.loopexit,
 declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
+
+; vmull lowering would create a zext(v4i8 load()) instead of a zextload(v4i8),
+; creating an illegal type during legalization and causing an assert.
+; PR15970
+define void @no_illegal_types_vmull_sext(<4 x i32> %a) {
+entry:
+  %wide.load283.i = load <4 x i8>* undef, align 1
+  %0 = sext <4 x i8> %wide.load283.i to <4 x i32>
+  %1 = sub nsw <4 x i32> %0, %a
+  %2 = mul nsw <4 x i32> %1, %1
+  %predphi290.v.i = select <4 x i1> undef, <4 x i32> undef, <4 x i32> %2
+  store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4
+  ret void
+}
+define void @no_illegal_types_vmull_zext(<4 x i32> %a) {
+entry:
+  %wide.load283.i = load <4 x i8>* undef, align 1
+  %0 = zext <4 x i8> %wide.load283.i to <4 x i32>
+  %1 = sub nsw <4 x i32> %0, %a
+  %2 = mul nsw <4 x i32> %1, %1
+  %predphi290.v.i = select <4 x i1> undef, <4 x i32> undef, <4 x i32> %2
+  store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/BranchPredict.ll b/test/CodeGen/Hexagon/BranchPredict.ll
new file mode 100644
index 0000000000..716e85da5a
--- /dev/null
+++ b/test/CodeGen/Hexagon/BranchPredict.ll
@@ -0,0 +1,79 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; Check if the branch probabilities are reflected in the instructions:
+; The basic block placement pass should place the more probable successor
+; block as the fall-through block. The unconditional jump in the predecessor
+; should then get the right hint (not_taken or ":nt")
+
+
+@j = external global i32
+
+define i32 @foo(i32 %a) nounwind {
+; CHECK: if{{ *}}(!p{{[0-3]}}.new) jump:nt
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.else, label %if.then, !prof !0
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %a, 10
+  %call = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 %add) nounwind
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %call2 = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 4) nounwind
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call2, %if.else ]
+  ret i32 %retval.0
+}
+
+declare i32 @foobar(...)
+
+define i32 @bar(i32 %a) nounwind {
+; CHECK: if{{ *}}(p{{[0-3]}}.new) jump:nt
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.else, label %if.then, !prof !1
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %a, 10
+  %call = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 %add) nounwind
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %call2 = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 4) nounwind
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call2, %if.else ]
+  ret i32 %retval.0
+}
+
+define i32 @foo_bar(i32 %a, i16 signext %b) nounwind {
+; CHECK: if{{ *}}(!cmp.eq(r{{[0-9]*}}.new, #0)) jump:nt
+entry:
+  %0 = load i32* @j, align 4, !tbaa !2
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.else, label %if.then, !prof !0
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %a, 10
+  %call = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 %add) nounwind
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %add1 = add nsw i32 %a, 4
+  %call2 = tail call i32 bitcast (i32 (...)* @foobar to i32 (i32)*)(i32 %add1) nounwind
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call2, %if.else ]
+  ret i32 %retval.0
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+!2 = metadata !{metadata !"int", metadata !3}
+!3 = metadata !{metadata !"omnipotent char", metadata !4}
+!4 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
index f8c9e44c83..aea4ffe2ee 100644
--- a/test/CodeGen/Hexagon/args.ll
+++ b/test/CodeGen/Hexagon/args.ll
@@ -1,11 +1,8 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched -disable-hexagon-misched < %s | FileCheck %s
 ; CHECK: memw(r29{{ *}}+{{ *}}#0){{ *}}={{ *}}#7
-; CHECK: r0 = #1
-; CHECK: r1 = #2
-; CHECK: r2 = #3
-; CHECK: r3 = #4
-; CHECK: r4 = #5
-; CHECK: r5 = #6
+; CHECK: r1:0 = combine(#2, #1)
+; CHECK: r3:2 = combine(#4, #3)
+; CHECK: r5:4 = combine(#6, #5)
 
 
 define void @foo() nounwind {
diff --git a/test/CodeGen/Hexagon/extload-combine.ll b/test/CodeGen/Hexagon/extload-combine.ll
new file mode 100644
index 0000000000..b3b8bf0703
--- /dev/null
+++ b/test/CodeGen/Hexagon/extload-combine.ll
@@ -0,0 +1,80 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+; Check that the combine/stxw instructions are being generated.
+; In case of combine one of the operand should be 0 and another should be
+; the output of absolute addressing load instruction.
+
+@a = external global i16
+@b = external global i16
+@c = external global i16
+@char_a = external global i8
+@char_b = external global i8
+@char_c = external global i8
+@int_a = external global i32
+@int_b = external global i32
+@int_c = external global i32
+
+; Function Attrs: nounwind
+define i64 @short_test1() #0 {
+; CHECK: [[VAR:r[0-9]+]]{{ *}}={{ *}}memuh(##
+; CHECK: combine(#0, [[VAR]])
+entry:
+  store i16 0, i16* @a, align 2
+  %0 = load i16* @b, align 2
+  %conv2 = zext i16 %0 to i64
+  ret i64 %conv2
+}
+
+; Function Attrs: nounwind
+define i64 @short_test2() #0 {
+; CHECK: [[VAR1:r[0-9]+]]{{ *}}={{ *}}memh(##
+; CHECK: sxtw([[VAR1]])
+entry:
+  store i16 0, i16* @a, align 2
+  %0 = load i16* @c, align 2
+  %conv2 = sext i16 %0 to i64
+  ret i64 %conv2
+}
+
+; Function Attrs: nounwind
+define i64 @char_test1() #0 {
+; CHECK: [[VAR2:r[0-9]+]]{{ *}}={{ *}}memub(##
+; CHECK: combine(#0, [[VAR2]])
+entry:
+  store i8 0, i8* @char_a, align 1
+  %0 = load i8* @char_b, align 1
+  %conv2 = zext i8 %0 to i64
+  ret i64 %conv2
+}
+
+; Function Attrs: nounwind
+define i64 @char_test2() #0 {
+; CHECK: [[VAR3:r[0-9]+]]{{ *}}={{ *}}memb(##
+; CHECK: sxtw([[VAR3]])
+entry:
+  store i8 0, i8* @char_a, align 1
+  %0 = load i8* @char_c, align 1
+  %conv2 = sext i8 %0 to i64
+  ret i64 %conv2
+}
+
+; Function Attrs: nounwind
+define i64 @int_test1() #0 {
+; CHECK: [[VAR4:r[0-9]+]]{{ *}}={{ *}}memw(##
+; CHECK: combine(#0, [[VAR4]])
+entry:
+  store i32 0, i32* @int_a, align 4
+  %0 = load i32* @int_b, align 4
+  %conv = zext i32 %0 to i64
+  ret i64 %conv
+}
+
+; Function Attrs: nounwind
+define i64 @int_test2() #0 {
+; CHECK: [[VAR5:r[0-9]+]]{{ *}}={{ *}}memw(##
+; CHECK: sxtw([[VAR5]])
+entry:
+  store i32 0, i32* @int_a, align 4
+  %0 = load i32* @int_c, align 4
+  %conv = sext i32 %0 to i64
+  ret i64 %conv
+}
diff --git a/test/CodeGen/Hexagon/packetize_cond_inst.ll b/test/CodeGen/Hexagon/packetize_cond_inst.ll
new file mode 100644
index 0000000000..a48a9f62ec
--- /dev/null
+++ b/test/CodeGen/Hexagon/packetize_cond_inst.ll
@@ -0,0 +1,32 @@
+; RUN: llc -mcpu=hexagonv4 -tail-dup-size=1 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon-unknown--elf"
+
+; Make sure we put the two conditionally executed adds in a packet.
+; ifcnv_add:
+;     {
+;       p0 = cmp.gt(r2, r1)
+;       if (!p0.new) r0 = add(r2, r1)
+;       if (p0.new) r0 = add(r0, #10)
+;     }
+; CHECK: cmp
+; CHECK-NEXT: add
+; CHECH-NEXT: add
+define i32 @ifcnv_add(i32, i32, i32) nounwind readnone {
+  %4 = icmp sgt i32 %2, %1
+  br i1 %4, label %5, label %7
+
+; <label>:5                                       ; preds = %3
+  %6 = add nsw i32 %0, 10
+  br label %9
+
+; <label>:7                                       ; preds = %3
+  %8 = add nsw i32 %2, %1
+  br label %9
+
+; <label>:9                                       ; preds = %7, %5
+  %10 = phi i32 [ %6, %5 ], [ %8, %7 ]
+  %11 = add nsw i32 %10, 1
+  ret i32 %11
+}
diff --git a/test/CodeGen/Hexagon/tfr-to-combine.ll b/test/CodeGen/Hexagon/tfr-to-combine.ll
new file mode 100644
index 0000000000..e3057cd161
--- /dev/null
+++ b/test/CodeGen/Hexagon/tfr-to-combine.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  -O3 < %s | FileCheck %s
+
+; Check that we combine TFRs and TFRIs into COMBINEs.
+
+@a = external global i16
+@b = external global i16
+@c = external global i16
+
+; Function Attrs: nounwind
+define i64 @test1() #0 {
+; CHECK: combine(#10, #0)
+entry:
+  store i16 0, i16* @a, align 2
+  store i16 10, i16* @b, align 2
+  ret i64 10
+}
+
+; Function Attrs: nounwind
+define i64 @test2() #0 {
+; CHECK: combine(#0, r{{[0-9]+}})
+entry:
+  store i16 0, i16* @a, align 2
+  %0 = load i16* @c, align 2
+  %conv2 = zext i16 %0 to i64
+  ret i64 %conv2
+}
+
+; Function Attrs: nounwind
+define i64 @test4() #0 {
+; CHECK: combine(#0, ##100)
+entry:
+  store i16 100, i16* @b, align 2
+  store i16 0, i16* @a, align 2
+  ret i64 0
+}
diff --git a/test/CodeGen/Mips/hf16call32.ll b/test/CodeGen/Mips/hf16call32.ll
new file mode 100644
index 0000000000..41249e1be2
--- /dev/null
+++ b/test/CodeGen/Mips/hf16call32.ll
@@ -0,0 +1,1028 @@
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=stel
+
+@x = common global float 0.000000e+00, align 4
+@y = common global float 0.000000e+00, align 4
+@xd = common global double 0.000000e+00, align 8
+@yd = common global double 0.000000e+00, align 8
+@xy = common global { float, float } zeroinitializer, align 4
+@xyd = common global { double, double } zeroinitializer, align 8
+@ret_sf = common global float 0.000000e+00, align 4
+@ret_df = common global double 0.000000e+00, align 8
+@ret_sc = common global { float, float } zeroinitializer, align 4
+@ret_dc = common global { double, double } zeroinitializer, align 8
+@lx = common global float 0.000000e+00, align 4
+@ly = common global float 0.000000e+00, align 4
+@lxd = common global double 0.000000e+00, align 8
+@lyd = common global double 0.000000e+00, align 8
+@lxy = common global { float, float } zeroinitializer, align 4
+@lxyd = common global { double, double } zeroinitializer, align 8
+@lret_sf = common global float 0.000000e+00, align 4
+@lret_df = common global double 0.000000e+00, align 8
+@lret_sc = common global { float, float } zeroinitializer, align 4
+@lret_dc = common global { double, double } zeroinitializer, align 8
+@.str = private unnamed_addr constant [10 x i8] c"%f %f %i\0A\00", align 1
+@.str1 = private unnamed_addr constant [16 x i8] c"%f=%f %f=%f %i\0A\00", align 1
+@.str2 = private unnamed_addr constant [22 x i8] c"%f=%f %f=%f %f=%f %i\0A\00", align 1
+@.str3 = private unnamed_addr constant [18 x i8] c"%f+%fi=%f+%fi %i\0A\00", align 1
+@.str4 = private unnamed_addr constant [24 x i8] c"%f+%fi=%f+%fi %f=%f %i\0A\00", align 1
+
+; Function Attrs: nounwind
+define void @clear() #0 {
+entry:
+  store float 1.000000e+00, float* @x, align 4
+  store float 1.000000e+00, float* @y, align 4
+  store double 1.000000e+00, double* @xd, align 8
+  store double 1.000000e+00, double* @yd, align 8
+  store float 1.000000e+00, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0)
+  store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1)
+  store double 1.000000e+00, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0)
+  store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1)
+  store float 1.000000e+00, float* @ret_sf, align 4
+  store double 1.000000e+00, double* @ret_df, align 8
+  store float 1.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  store double 1.000000e+00, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  store float 0.000000e+00, float* @lx, align 4
+  store float 0.000000e+00, float* @ly, align 4
+  store double 0.000000e+00, double* @lxd, align 8
+  store double 0.000000e+00, double* @lyd, align 8
+  store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lxy, i32 0, i32 0)
+  store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lxy, i32 0, i32 1)
+  store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lxyd, i32 0, i32 0)
+  store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lxyd, i32 0, i32 1)
+  store float 0.000000e+00, float* @lret_sf, align 4
+  store double 0.000000e+00, double* @lret_df, align 8
+  store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  ret void
+}
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @clear()
+  store float 1.500000e+00, float* @lx, align 4
+  %0 = load float* @lx, align 4
+  call void @v_sf(float %0)
+  %1 = load float* @x, align 4
+  %conv = fpext float %1 to double
+  %2 = load float* @lx, align 4
+  %conv1 = fpext float %2 to double
+  %3 = load float* @x, align 4
+  %4 = load float* @lx, align 4
+  %cmp = fcmp oeq float %3, %4
+  %conv2 = zext i1 %cmp to i32
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %conv, double %conv1, i32 %conv2)
+  call void @clear()
+  store double 0x41678C29C0000000, double* @lxd, align 8
+  %5 = load double* @lxd, align 8
+  call void @v_df(double %5)
+  %6 = load double* @xd, align 8
+  %7 = load double* @lxd, align 8
+  %8 = load double* @xd, align 8
+  %9 = load double* @lxd, align 8
+  %cmp3 = fcmp oeq double %8, %9
+  %conv4 = zext i1 %cmp3 to i32
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %6, double %7, i32 %conv4)
+  call void @clear()
+  store float 9.000000e+00, float* @lx, align 4
+  store float 1.000000e+01, float* @ly, align 4
+  %10 = load float* @lx, align 4
+  %11 = load float* @ly, align 4
+  call void @v_sf_sf(float %10, float %11)
+  %12 = load float* @x, align 4
+  %conv6 = fpext float %12 to double
+  %13 = load float* @lx, align 4
+  %conv7 = fpext float %13 to double
+  %14 = load float* @y, align 4
+  %conv8 = fpext float %14 to double
+  %15 = load float* @ly, align 4
+  %conv9 = fpext float %15 to double
+  %16 = load float* @x, align 4
+  %17 = load float* @lx, align 4
+  %cmp10 = fcmp oeq float %16, %17
+  br i1 %cmp10, label %land.rhs, label %land.end
+
+land.rhs:                                         ; preds = %entry
+  %18 = load float* @y, align 4
+  %19 = load float* @ly, align 4
+  %cmp12 = fcmp oeq float %18, %19
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %entry
+  %20 = phi i1 [ false, %entry ], [ %cmp12, %land.rhs ]
+  %land.ext = zext i1 %20 to i32
+  %call14 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv6, double %conv7, double %conv8, double %conv9, i32 %land.ext)
+  call void @clear()
+  store float 0x3FFE666660000000, float* @lx, align 4
+  store double 0x4007E613249FF279, double* @lyd, align 8
+  %21 = load float* @lx, align 4
+  %22 = load double* @lyd, align 8
+  call void @v_sf_df(float %21, double %22)
+  %23 = load float* @x, align 4
+  %conv15 = fpext float %23 to double
+  %24 = load float* @lx, align 4
+  %conv16 = fpext float %24 to double
+  %25 = load double* @yd, align 8
+  %26 = load double* @lyd, align 8
+  %27 = load float* @x, align 4
+  %28 = load float* @lx, align 4
+  %cmp17 = fcmp oeq float %27, %28
+  %conv18 = zext i1 %cmp17 to i32
+  %29 = load double* @yd, align 8
+  %30 = load double* @lyd, align 8
+  %cmp19 = fcmp oeq double %29, %30
+  %conv20 = zext i1 %cmp19 to i32
+  %and = and i32 %conv18, %conv20
+  %call21 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv15, double %conv16, double %25, double %26, i32 %and)
+  call void @clear()
+  store double 0x4194E54F94000000, double* @lxd, align 8
+  store float 7.600000e+01, float* @ly, align 4
+  %31 = load double* @lxd, align 8
+  %32 = load float* @ly, align 4
+  call void @v_df_sf(double %31, float %32)
+  %33 = load double* @xd, align 8
+  %34 = load double* @lxd, align 8
+  %35 = load float* @y, align 4
+  %conv22 = fpext float %35 to double
+  %36 = load float* @ly, align 4
+  %conv23 = fpext float %36 to double
+  %37 = load double* @xd, align 8
+  %38 = load double* @lxd, align 8
+  %cmp24 = fcmp oeq double %37, %38
+  %conv25 = zext i1 %cmp24 to i32
+  %39 = load float* @y, align 4
+  %40 = load float* @ly, align 4
+  %cmp26 = fcmp oeq float %39, %40
+  %conv27 = zext i1 %cmp26 to i32
+  %and28 = and i32 %conv25, %conv27
+  %call29 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %33, double %34, double %conv22, double %conv23, i32 %and28)
+  call void @clear()
+  store double 7.365198e+07, double* @lxd, align 8
+  store double 0x416536CD80000000, double* @lyd, align 8
+  %41 = load double* @lxd, align 8
+  %42 = load double* @lyd, align 8
+  call void @v_df_df(double %41, double %42)
+  %43 = load double* @xd, align 8
+  %44 = load double* @lxd, align 8
+  %45 = load double* @yd, align 8
+  %46 = load double* @lyd, align 8
+  %47 = load double* @xd, align 8
+  %48 = load double* @lxd, align 8
+  %cmp30 = fcmp oeq double %47, %48
+  %conv31 = zext i1 %cmp30 to i32
+  %49 = load double* @yd, align 8
+  %50 = load double* @lyd, align 8
+  %cmp32 = fcmp oeq double %49, %50
+  %conv33 = zext i1 %cmp32 to i32
+  %and34 = and i32 %conv31, %conv33
+  %call35 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %43, double %44, double %45, double %46, i32 %and34)
+  call void @clear()
+  store float 0x4016666660000000, float* @ret_sf, align 4
+  %call36 = call float @sf_v()
+  store float %call36, float* @lret_sf, align 4
+  %51 = load float* @ret_sf, align 4
+  %conv37 = fpext float %51 to double
+  %52 = load float* @lret_sf, align 4
+  %conv38 = fpext float %52 to double
+  %53 = load float* @ret_sf, align 4
+  %54 = load float* @lret_sf, align 4
+  %cmp39 = fcmp oeq float %53, %54
+  %conv40 = zext i1 %cmp39 to i32
+  %call41 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %conv37, double %conv38, i32 %conv40)
+  call void @clear()
+  store float 4.587300e+06, float* @ret_sf, align 4
+  store float 3.420000e+02, float* @lx, align 4
+  %55 = load float* @lx, align 4
+  %call42 = call float @sf_sf(float %55)
+  store float %call42, float* @lret_sf, align 4
+  %56 = load float* @ret_sf, align 4
+  %conv43 = fpext float %56 to double
+  %57 = load float* @lret_sf, align 4
+  %conv44 = fpext float %57 to double
+  %58 = load float* @x, align 4
+  %conv45 = fpext float %58 to double
+  %59 = load float* @lx, align 4
+  %conv46 = fpext float %59 to double
+  %60 = load float* @ret_sf, align 4
+  %61 = load float* @lret_sf, align 4
+  %cmp47 = fcmp oeq float %60, %61
+  %conv48 = zext i1 %cmp47 to i32
+  %62 = load float* @x, align 4
+  %63 = load float* @lx, align 4
+  %cmp49 = fcmp oeq float %62, %63
+  %conv50 = zext i1 %cmp49 to i32
+  %and51 = and i32 %conv48, %conv50
+  %call52 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv43, double %conv44, double %conv45, double %conv46, i32 %and51)
+  call void @clear()
+  store float 4.445910e+06, float* @ret_sf, align 4
+  store double 0x419A7DB294000000, double* @lxd, align 8
+  %64 = load double* @lxd, align 8
+  %call53 = call float @sf_df(double %64)
+  store float %call53, float* @lret_sf, align 4
+  %65 = load float* @ret_sf, align 4
+  %conv54 = fpext float %65 to double
+  %66 = load float* @lret_sf, align 4
+  %conv55 = fpext float %66 to double
+  %67 = load double* @xd, align 8
+  %68 = load double* @lxd, align 8
+  %69 = load float* @ret_sf, align 4
+  %70 = load float* @lret_sf, align 4
+  %cmp56 = fcmp oeq float %69, %70
+  %conv57 = zext i1 %cmp56 to i32
+  %71 = load double* @xd, align 8
+  %72 = load double* @lxd, align 8
+  %cmp58 = fcmp oeq double %71, %72
+  %conv59 = zext i1 %cmp58 to i32
+  %and60 = and i32 %conv57, %conv59
+  %call61 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv54, double %conv55, double %67, double %68, i32 %and60)
+  call void @clear()
+  store float 0x3FFF4BC6A0000000, float* @ret_sf, align 4
+  store float 4.445500e+03, float* @lx, align 4
+  store float 0x4068ACCCC0000000, float* @ly, align 4
+  %73 = load float* @lx, align 4
+  %74 = load float* @ly, align 4
+  %call62 = call float @sf_sf_sf(float %73, float %74)
+  store float %call62, float* @lret_sf, align 4
+  %75 = load float* @ret_sf, align 4
+  %conv63 = fpext float %75 to double
+  %76 = load float* @lret_sf, align 4
+  %conv64 = fpext float %76 to double
+  %77 = load float* @x, align 4
+  %conv65 = fpext float %77 to double
+  %78 = load float* @lx, align 4
+  %conv66 = fpext float %78 to double
+  %79 = load float* @y, align 4
+  %conv67 = fpext float %79 to double
+  %80 = load float* @ly, align 4
+  %conv68 = fpext float %80 to double
+  %81 = load float* @ret_sf, align 4
+  %82 = load float* @lret_sf, align 4
+  %cmp69 = fcmp oeq float %81, %82
+  br i1 %cmp69, label %land.lhs.true, label %land.end76
+
+land.lhs.true:                                    ; preds = %land.end
+  %83 = load float* @x, align 4
+  %84 = load float* @lx, align 4
+  %cmp71 = fcmp oeq float %83, %84
+  br i1 %cmp71, label %land.rhs73, label %land.end76
+
+land.rhs73:                                       ; preds = %land.lhs.true
+  %85 = load float* @y, align 4
+  %86 = load float* @ly, align 4
+  %cmp74 = fcmp oeq float %85, %86
+  br label %land.end76
+
+land.end76:                                       ; preds = %land.rhs73, %land.lhs.true, %land.end
+  %87 = phi i1 [ false, %land.lhs.true ], [ false, %land.end ], [ %cmp74, %land.rhs73 ]
+  %land.ext77 = zext i1 %87 to i32
+  %call78 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv63, double %conv64, double %conv65, double %conv66, double %conv67, double %conv68, i32 %land.ext77)
+  call void @clear()
+  store float 9.991300e+04, float* @ret_sf, align 4
+  store float 1.114500e+04, float* @lx, align 4
+  store double 9.994445e+07, double* @lyd, align 8
+  %88 = load float* @lx, align 4
+  %89 = load double* @lyd, align 8
+  %call79 = call float @sf_sf_df(float %88, double %89)
+  store float %call79, float* @lret_sf, align 4
+  %90 = load float* @ret_sf, align 4
+  %conv80 = fpext float %90 to double
+  %91 = load float* @lret_sf, align 4
+  %conv81 = fpext float %91 to double
+  %92 = load float* @x, align 4
+  %conv82 = fpext float %92 to double
+  %93 = load float* @lx, align 4
+  %conv83 = fpext float %93 to double
+  %94 = load double* @yd, align 8
+  %95 = load double* @lyd, align 8
+  %96 = load float* @ret_sf, align 4
+  %97 = load float* @lret_sf, align 4
+  %cmp84 = fcmp oeq float %96, %97
+  br i1 %cmp84, label %land.lhs.true86, label %land.end92
+
+land.lhs.true86:                                  ; preds = %land.end76
+  %98 = load float* @x, align 4
+  %99 = load float* @lx, align 4
+  %cmp87 = fcmp oeq float %98, %99
+  br i1 %cmp87, label %land.rhs89, label %land.end92
+
+land.rhs89:                                       ; preds = %land.lhs.true86
+  %100 = load double* @yd, align 8
+  %101 = load double* @lyd, align 8
+  %cmp90 = fcmp oeq double %100, %101
+  br label %land.end92
+
+land.end92:                                       ; preds = %land.rhs89, %land.lhs.true86, %land.end76
+  %102 = phi i1 [ false, %land.lhs.true86 ], [ false, %land.end76 ], [ %cmp90, %land.rhs89 ]
+  %land.ext93 = zext i1 %102 to i32
+  %call94 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv80, double %conv81, double %conv82, double %conv83, double %94, double %95, i32 %land.ext93)
+  call void @clear()
+  store float 0x417CCC7A00000000, float* @ret_sf, align 4
+  store double 0x4172034530000000, double* @lxd, align 8
+  store float 4.456200e+04, float* @ly, align 4
+  %103 = load double* @lxd, align 8
+  %104 = load float* @ly, align 4
+  %call95 = call float @sf_df_sf(double %103, float %104)
+  store float %call95, float* @lret_sf, align 4
+  %105 = load float* @ret_sf, align 4
+  %conv96 = fpext float %105 to double
+  %106 = load float* @lret_sf, align 4
+  %conv97 = fpext float %106 to double
+  %107 = load double* @xd, align 8
+  %108 = load double* @lxd, align 8
+  %109 = load float* @y, align 4
+  %conv98 = fpext float %109 to double
+  %110 = load float* @ly, align 4
+  %conv99 = fpext float %110 to double
+  %111 = load float* @ret_sf, align 4
+  %112 = load float* @lret_sf, align 4
+  %cmp100 = fcmp oeq float %111, %112
+  br i1 %cmp100, label %land.lhs.true102, label %land.end108
+
+land.lhs.true102:                                 ; preds = %land.end92
+  %113 = load double* @xd, align 8
+  %114 = load double* @lxd, align 8
+  %cmp103 = fcmp oeq double %113, %114
+  br i1 %cmp103, label %land.rhs105, label %land.end108
+
+land.rhs105:                                      ; preds = %land.lhs.true102
+  %115 = load float* @y, align 4
+  %116 = load float* @ly, align 4
+  %cmp106 = fcmp oeq float %115, %116
+  br label %land.end108
+
+land.end108:                                      ; preds = %land.rhs105, %land.lhs.true102, %land.end92
+  %117 = phi i1 [ false, %land.lhs.true102 ], [ false, %land.end92 ], [ %cmp106, %land.rhs105 ]
+  %land.ext109 = zext i1 %117 to i32
+  %call110 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv96, double %conv97, double %107, double %108, double %conv98, double %conv99, i32 %land.ext109)
+  call void @clear()
+  store float 3.987721e+06, float* @ret_sf, align 4
+  store double 0x3FF1F49F6DDDC2D8, double* @lxd, align 8
+  store double 0x409129F306A2B170, double* @lyd, align 8
+  %118 = load double* @lxd, align 8
+  %119 = load double* @lyd, align 8
+  %call111 = call float @sf_df_df(double %118, double %119)
+  store float %call111, float* @lret_sf, align 4
+  %120 = load float* @ret_sf, align 4
+  %conv112 = fpext float %120 to double
+  %121 = load float* @lret_sf, align 4
+  %conv113 = fpext float %121 to double
+  %122 = load double* @xd, align 8
+  %123 = load double* @lxd, align 8
+  %124 = load double* @yd, align 8
+  %125 = load double* @lyd, align 8
+  %126 = load float* @ret_sf, align 4
+  %127 = load float* @lret_sf, align 4
+  %cmp114 = fcmp oeq float %126, %127
+  br i1 %cmp114, label %land.lhs.true116, label %land.end122
+
+land.lhs.true116:                                 ; preds = %land.end108
+  %128 = load double* @xd, align 8
+  %129 = load double* @lxd, align 8
+  %cmp117 = fcmp oeq double %128, %129
+  br i1 %cmp117, label %land.rhs119, label %land.end122
+
+land.rhs119:                                      ; preds = %land.lhs.true116
+  %130 = load double* @yd, align 8
+  %131 = load double* @lyd, align 8
+  %cmp120 = fcmp oeq double %130, %131
+  br label %land.end122
+
+land.end122:                                      ; preds = %land.rhs119, %land.lhs.true116, %land.end108
+  %132 = phi i1 [ false, %land.lhs.true116 ], [ false, %land.end108 ], [ %cmp120, %land.rhs119 ]
+  %land.ext123 = zext i1 %132 to i32
+  %call124 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv112, double %conv113, double %122, double %123, double %124, double %125, i32 %land.ext123)
+  call void @clear()
+  store double 1.561234e+01, double* @ret_df, align 8
+  %call125 = call double @df_v()
+  store double %call125, double* @lret_df, align 8
+  %133 = load double* @ret_df, align 8
+  %134 = load double* @lret_df, align 8
+  %135 = load double* @ret_df, align 8
+  %136 = load double* @lret_df, align 8
+  %cmp126 = fcmp oeq double %135, %136
+  %conv127 = zext i1 %cmp126 to i32
+  %call128 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %133, double %134, i32 %conv127)
+  call void @clear()
+  store double 1.345873e+01, double* @ret_df, align 8
+  store float 3.434520e+05, float* @lx, align 4
+  %137 = load float* @lx, align 4
+  %call129 = call double @df_sf(float %137)
+  store double %call129, double* @lret_df, align 8
+  %138 = load double* @ret_df, align 8
+  %139 = load double* @lret_df, align 8
+  %140 = load float* @x, align 4
+  %conv130 = fpext float %140 to double
+  %141 = load float* @lx, align 4
+  %conv131 = fpext float %141 to double
+  %142 = load double* @ret_df, align 8
+  %143 = load double* @lret_df, align 8
+  %cmp132 = fcmp oeq double %142, %143
+  %conv133 = zext i1 %cmp132 to i32
+  %144 = load float* @x, align 4
+  %145 = load float* @lx, align 4
+  %cmp134 = fcmp oeq float %144, %145
+  %conv135 = zext i1 %cmp134 to i32
+  %and136 = and i32 %conv133, %conv135
+  %call137 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %138, double %139, double %conv130, double %conv131, i32 %and136)
+  call void @clear()
+  store double 0x4084F3AB7AA25D8D, double* @ret_df, align 8
+  store double 0x4114F671D2F1A9FC, double* @lxd, align 8
+  %146 = load double* @lxd, align 8
+  %call138 = call double @df_df(double %146)
+  store double %call138, double* @lret_df, align 8
+  %147 = load double* @ret_df, align 8
+  %148 = load double* @lret_df, align 8
+  %149 = load double* @xd, align 8
+  %150 = load double* @lxd, align 8
+  %151 = load double* @ret_df, align 8
+  %152 = load double* @lret_df, align 8
+  %cmp139 = fcmp oeq double %151, %152
+  %conv140 = zext i1 %cmp139 to i32
+  %153 = load double* @xd, align 8
+  %154 = load double* @lxd, align 8
+  %cmp141 = fcmp oeq double %153, %154
+  %conv142 = zext i1 %cmp141 to i32
+  %and143 = and i32 %conv140, %conv142
+  %call144 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %147, double %148, double %149, double %150, i32 %and143)
+  call void @clear()
+  store double 6.781956e+03, double* @ret_df, align 8
+  store float 4.445500e+03, float* @lx, align 4
+  store float 0x4068ACCCC0000000, float* @ly, align 4
+  %155 = load float* @lx, align 4
+  %156 = load float* @ly, align 4
+  %call145 = call double @df_sf_sf(float %155, float %156)
+  store double %call145, double* @lret_df, align 8
+  %157 = load double* @ret_df, align 8
+  %158 = load double* @lret_df, align 8
+  %159 = load float* @x, align 4
+  %conv146 = fpext float %159 to double
+  %160 = load float* @lx, align 4
+  %conv147 = fpext float %160 to double
+  %161 = load float* @y, align 4
+  %conv148 = fpext float %161 to double
+  %162 = load float* @ly, align 4
+  %conv149 = fpext float %162 to double
+  %163 = load double* @ret_df, align 8
+  %164 = load double* @lret_df, align 8
+  %cmp150 = fcmp oeq double %163, %164
+  br i1 %cmp150, label %land.lhs.true152, label %land.end158
+
+land.lhs.true152:                                 ; preds = %land.end122
+  %165 = load float* @x, align 4
+  %166 = load float* @lx, align 4
+  %cmp153 = fcmp oeq float %165, %166
+  br i1 %cmp153, label %land.rhs155, label %land.end158
+
+land.rhs155:                                      ; preds = %land.lhs.true152
+  %167 = load float* @y, align 4
+  %168 = load float* @ly, align 4
+  %cmp156 = fcmp oeq float %167, %168
+  br label %land.end158
+
+land.end158:                                      ; preds = %land.rhs155, %land.lhs.true152, %land.end122
+  %169 = phi i1 [ false, %land.lhs.true152 ], [ false, %land.end122 ], [ %cmp156, %land.rhs155 ]
+  %land.ext159 = zext i1 %169 to i32
+  %call160 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %157, double %158, double %conv146, double %conv147, double %conv148, double %conv149, i32 %land.ext159)
+  call void @clear()
+  store double 1.889130e+05, double* @ret_df, align 8
+  store float 9.111450e+05, float* @lx, align 4
+  store double 0x4185320A58000000, double* @lyd, align 8
+  %170 = load float* @lx, align 4
+  %171 = load double* @lyd, align 8
+  %call161 = call double @df_sf_df(float %170, double %171)
+  store double %call161, double* @lret_df, align 8
+  %172 = load double* @ret_df, align 8
+  %173 = load double* @lret_df, align 8
+  %174 = load float* @x, align 4
+  %conv162 = fpext float %174 to double
+  %175 = load float* @lx, align 4
+  %conv163 = fpext float %175 to double
+  %176 = load double* @yd, align 8
+  %177 = load double* @lyd, align 8
+  %178 = load double* @ret_df, align 8
+  %179 = load double* @lret_df, align 8
+  %cmp164 = fcmp oeq double %178, %179
+  br i1 %cmp164, label %land.lhs.true166, label %land.end172
+
+land.lhs.true166:                                 ; preds = %land.end158
+  %180 = load float* @x, align 4
+  %181 = load float* @lx, align 4
+  %cmp167 = fcmp oeq float %180, %181
+  br i1 %cmp167, label %land.rhs169, label %land.end172
+
+land.rhs169:                                      ; preds = %land.lhs.true166
+  %182 = load double* @yd, align 8
+  %183 = load double* @lyd, align 8
+  %cmp170 = fcmp oeq double %182, %183
+  br label %land.end172
+
+land.end172:                                      ; preds = %land.rhs169, %land.lhs.true166, %land.end158
+  %184 = phi i1 [ false, %land.lhs.true166 ], [ false, %land.end158 ], [ %cmp170, %land.rhs169 ]
+  %land.ext173 = zext i1 %184 to i32
+  %call174 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %172, double %173, double %conv162, double %conv163, double %176, double %177, i32 %land.ext173)
+  call void @clear()
+  store double 0x418B2DB900000000, double* @ret_df, align 8
+  store double 0x41B1EF2ED3000000, double* @lxd, align 8
+  store float 1.244562e+06, float* @ly, align 4
+  %185 = load double* @lxd, align 8
+  %186 = load float* @ly, align 4
+  %call175 = call double @df_df_sf(double %185, float %186)
+  store double %call175, double* @lret_df, align 8
+  %187 = load double* @ret_df, align 8
+  %188 = load double* @lret_df, align 8
+  %189 = load double* @xd, align 8
+  %190 = load double* @lxd, align 8
+  %191 = load float* @y, align 4
+  %conv176 = fpext float %191 to double
+  %192 = load float* @ly, align 4
+  %conv177 = fpext float %192 to double
+  %193 = load double* @ret_df, align 8
+  %194 = load double* @lret_df, align 8
+  %cmp178 = fcmp oeq double %193, %194
+  br i1 %cmp178, label %land.lhs.true180, label %land.end186
+
+land.lhs.true180:                                 ; preds = %land.end172
+  %195 = load double* @xd, align 8
+  %196 = load double* @lxd, align 8
+  %cmp181 = fcmp oeq double %195, %196
+  br i1 %cmp181, label %land.rhs183, label %land.end186
+
+land.rhs183:                                      ; preds = %land.lhs.true180
+  %197 = load float* @y, align 4
+  %198 = load float* @ly, align 4
+  %cmp184 = fcmp oeq float %197, %198
+  br label %land.end186
+
+land.end186:                                      ; preds = %land.rhs183, %land.lhs.true180, %land.end172
+  %199 = phi i1 [ false, %land.lhs.true180 ], [ false, %land.end172 ], [ %cmp184, %land.rhs183 ]
+  %land.ext187 = zext i1 %199 to i32
+  %call188 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %187, double %188, double %189, double %190, double %conv176, double %conv177, i32 %land.ext187)
+  call void @clear()
+  store double 3.987721e+06, double* @ret_df, align 8
+  store double 5.223560e+00, double* @lxd, align 8
+  store double 0x40B7D37CC1A8AC5C, double* @lyd, align 8
+  %200 = load double* @lxd, align 8
+  %201 = load double* @lyd, align 8
+  %call189 = call double @df_df_df(double %200, double %201)
+  store double %call189, double* @lret_df, align 8
+  %202 = load double* @ret_df, align 8
+  %203 = load double* @lret_df, align 8
+  %204 = load double* @xd, align 8
+  %205 = load double* @lxd, align 8
+  %206 = load double* @yd, align 8
+  %207 = load double* @lyd, align 8
+  %208 = load double* @ret_df, align 8
+  %209 = load double* @lret_df, align 8
+  %cmp190 = fcmp oeq double %208, %209
+  br i1 %cmp190, label %land.lhs.true192, label %land.end198
+
+land.lhs.true192:                                 ; preds = %land.end186
+  %210 = load double* @xd, align 8
+  %211 = load double* @lxd, align 8
+  %cmp193 = fcmp oeq double %210, %211
+  br i1 %cmp193, label %land.rhs195, label %land.end198
+
+land.rhs195:                                      ; preds = %land.lhs.true192
+  %212 = load double* @yd, align 8
+  %213 = load double* @lyd, align 8
+  %cmp196 = fcmp oeq double %212, %213
+  br label %land.end198
+
+land.end198:                                      ; preds = %land.rhs195, %land.lhs.true192, %land.end186
+  %214 = phi i1 [ false, %land.lhs.true192 ], [ false, %land.end186 ], [ %cmp196, %land.rhs195 ]
+  %land.ext199 = zext i1 %214 to i32
+  %call200 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %202, double %203, double %204, double %205, double %206, double %207, i32 %land.ext199)
+  call void @clear()
+  store float 4.500000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  store float 7.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %call201 = call { float, float } @sc_v()
+  %215 = extractvalue { float, float } %call201, 0
+  %216 = extractvalue { float, float } %call201, 1
+  store float %215, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  store float %216, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %ret_sc.real = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %conv202 = fpext float %ret_sc.real to double
+  %conv203 = fpext float %ret_sc.imag to double
+  %ret_sc.real204 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag205 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %conv206 = fpext float %ret_sc.real204 to double
+  %conv207 = fpext float %ret_sc.imag205 to double
+  %lret_sc.real = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %conv208 = fpext float %lret_sc.real to double
+  %conv209 = fpext float %lret_sc.imag to double
+  %lret_sc.real210 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag211 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %conv212 = fpext float %lret_sc.real210 to double
+  %conv213 = fpext float %lret_sc.imag211 to double
+  %ret_sc.real214 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag215 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %lret_sc.real216 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag217 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %cmp.r = fcmp oeq float %ret_sc.real214, %lret_sc.real216
+  %cmp.i = fcmp oeq float %ret_sc.imag215, %lret_sc.imag217
+  %and.ri = and i1 %cmp.r, %cmp.i
+  %conv218 = zext i1 %and.ri to i32
+  %call219 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str3, i32 0, i32 0), double %conv202, double %conv207, double %conv208, double %conv213, i32 %conv218)
+  call void @clear()
+  store float 0x3FF7A99300000000, float* @lx, align 4
+  store float 4.500000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  store float 7.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %217 = load float* @lx, align 4
+  %call220 = call { float, float } @sc_sf(float %217)
+  %218 = extractvalue { float, float } %call220, 0
+  %219 = extractvalue { float, float } %call220, 1
+  store float %218, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  store float %219, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %ret_sc.real221 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag222 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %conv223 = fpext float %ret_sc.real221 to double
+  %conv224 = fpext float %ret_sc.imag222 to double
+  %ret_sc.real225 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag226 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %conv227 = fpext float %ret_sc.real225 to double
+  %conv228 = fpext float %ret_sc.imag226 to double
+  %lret_sc.real229 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag230 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %conv231 = fpext float %lret_sc.real229 to double
+  %conv232 = fpext float %lret_sc.imag230 to double
+  %lret_sc.real233 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag234 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %conv235 = fpext float %lret_sc.real233 to double
+  %conv236 = fpext float %lret_sc.imag234 to double
+  %220 = load float* @x, align 4
+  %conv237 = fpext float %220 to double
+  %221 = load float* @lx, align 4
+  %conv238 = fpext float %221 to double
+  %ret_sc.real239 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag240 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %lret_sc.real241 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag242 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %cmp.r243 = fcmp oeq float %ret_sc.real239, %lret_sc.real241
+  %cmp.i244 = fcmp oeq float %ret_sc.imag240, %lret_sc.imag242
+  %and.ri245 = and i1 %cmp.r243, %cmp.i244
+  br i1 %and.ri245, label %land.rhs247, label %land.end250
+
+land.rhs247:                                      ; preds = %land.end198
+  %222 = load float* @x, align 4
+  %223 = load float* @lx, align 4
+  %cmp248 = fcmp oeq float %222, %223
+  br label %land.end250
+
+land.end250:                                      ; preds = %land.rhs247, %land.end198
+  %224 = phi i1 [ false, %land.end198 ], [ %cmp248, %land.rhs247 ]
+  %land.ext251 = zext i1 %224 to i32
+  %call252 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([24 x i8]* @.str4, i32 0, i32 0), double %conv223, double %conv228, double %conv231, double %conv236, double %conv237, double %conv238, i32 %land.ext251)
+  call void @clear()
+  store double 1.234500e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  store double 7.677000e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %call253 = call { double, double } @dc_v()
+  %225 = extractvalue { double, double } %call253, 0
+  %226 = extractvalue { double, double } %call253, 1
+  store double %225, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  store double %226, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %ret_dc.real = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %ret_dc.real254 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag255 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %lret_dc.real = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %lret_dc.real256 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag257 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %ret_dc.real258 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag259 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %lret_dc.real260 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag261 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %cmp.r262 = fcmp oeq double %ret_dc.real258, %lret_dc.real260
+  %cmp.i263 = fcmp oeq double %ret_dc.imag259, %lret_dc.imag261
+  %and.ri264 = and i1 %cmp.r262, %cmp.i263
+  %conv265 = zext i1 %and.ri264 to i32
+  %call266 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str3, i32 0, i32 0), double %ret_dc.real, double %ret_dc.imag255, double %lret_dc.real, double %lret_dc.imag257, i32 %conv265)
+  call void @clear()
+  store double 0x40AAF6F532617C1C, double* @lxd, align 8
+  store double 4.444500e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  store double 7.888000e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %227 = load float* @lx, align 4
+  %call267 = call { double, double } @dc_sf(float %227)
+  %228 = extractvalue { double, double } %call267, 0
+  %229 = extractvalue { double, double } %call267, 1
+  store double %228, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  store double %229, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %ret_dc.real268 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag269 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %ret_dc.real270 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag271 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %lret_dc.real272 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag273 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %lret_dc.real274 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag275 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %230 = load float* @x, align 4
+  %conv276 = fpext float %230 to double
+  %231 = load float* @lx, align 4
+  %conv277 = fpext float %231 to double
+  %ret_dc.real278 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag279 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %lret_dc.real280 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag281 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %cmp.r282 = fcmp oeq double %ret_dc.real278, %lret_dc.real280
+  %cmp.i283 = fcmp oeq double %ret_dc.imag279, %lret_dc.imag281
+  %and.ri284 = and i1 %cmp.r282, %cmp.i283
+  br i1 %and.ri284, label %land.rhs286, label %land.end289
+
+land.rhs286:                                      ; preds = %land.end250
+  %232 = load float* @x, align 4
+  %233 = load float* @lx, align 4
+  %cmp287 = fcmp oeq float %232, %233
+  br label %land.end289
+
+land.end289:                                      ; preds = %land.rhs286, %land.end250
+  %234 = phi i1 [ false, %land.end250 ], [ %cmp287, %land.rhs286 ]
+  %land.ext290 = zext i1 %234 to i32
+  %call291 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([24 x i8]* @.str4, i32 0, i32 0), double %ret_dc.real268, double %ret_dc.imag271, double %lret_dc.real272, double %lret_dc.imag275, double %conv276, double %conv277, i32 %land.ext290)
+  %235 = load i32* %retval
+  ret i32 %235
+}
+
+declare void @v_sf(float) #1
+; stel: .section	.mips16.call.fp.v_sf,"ax",@progbits
+; stel:	.ent	__call_stub_v_sf
+; stel:	mtc1 $4,$f12
+; stel:	lui  $25,%hi(v_sf)
+; stel:	addiu  $25,$25,%lo(v_sf)
+; stel:	jr $25
+; stel:	.end	__call_stub_v_sf
+
+declare i32 @printf(i8*, ...) #1
+
+declare void @v_df(double) #1
+; stel: .section	.mips16.call.fp.v_df,"ax",@progbits
+; stel:	.ent	__call_stub_v_df
+; stel:	mtc1 $4,$f12
+; stel:	mtc1 $5,$f13
+; stel:	lui  $25,%hi(v_df)
+; stel:	addiu  $25,$25,%lo(v_df)
+; stel:	jr $25
+; stel:	.end	__call_stub_v_df
+
+declare void @v_sf_sf(float, float) #1
+; stel: .section	.mips16.call.fp.v_sf_sf,"ax",@progbits
+; stel:	.ent	__call_stub_v_sf_sf
+; stel:	mtc1 $4,$f12
+; stel:	mtc1 $5,$f14
+; stel:	lui  $25,%hi(v_sf_sf)
+; stel:	addiu  $25,$25,%lo(v_sf_sf)
+; stel:	jr $25
+; stel:	.end	__call_stub_v_sf_sf
+
+declare void @v_sf_df(float, double) #1
+; stel: .section	.mips16.call.fp.v_sf_df,"ax",@progbits
+; stel:	.ent	__call_stub_v_sf_df
+; stel:	mtc1 $4,$f12
+; stel:	mtc1 $6,$f14
+; stel:	mtc1 $7,$f15
+; stel:	lui  $25,%hi(v_sf_df)
+; stel:	addiu  $25,$25,%lo(v_sf_df)
+; stel:	jr $25
+; stel:	.end	__call_stub_v_sf_df
+
+declare void @v_df_sf(double, float) #1
+; stel: .section	.mips16.call.fp.v_df_sf,"ax",@progbits
+; stel:	.ent	__call_stub_v_df_sf
+; stel:	mtc1 $4,$f12
+; stel:	mtc1 $5,$f13
+; stel:	mtc1 $6,$f14
+; stel:	lui  $25,%hi(v_df_sf)
+; stel:	addiu  $25,$25,%lo(v_df_sf)
+; stel:	jr $25
+; stel:	.end	__call_stub_v_df_sf
+
+declare void @v_df_df(double, double) #1
+; stel: .section	.mips16.call.fp.v_df_df,"ax",@progbits
+; stel:	.ent	__call_stub_v_df_df
+; stel:	mtc1 $4,$f12
+; stel:	mtc1 $5,$f13
+; stel:	mtc1 $6,$f14
+; stel:	mtc1 $7,$f15
+; stel:	lui  $25,%hi(v_df_df)
+; stel:	addiu  $25,$25,%lo(v_df_df)
+; stel:	jr $25
+; stel:	.end	__call_stub_v_df_df
+
+declare float @sf_v() #1
+; stel: .section	.mips16.call.fp.sf_v,"ax",@progbits
+; stel:	.ent	__call_stub_sf_v
+; stel: move $18, $31
+; stel: jal sf_v
+; stel:	mfc1 $2,$f0
+; stel:	jr $18
+; stel:	.end	__call_stub_sf_v
+
+declare float @sf_sf(float) #1
+; stel: .section	.mips16.call.fp.sf_sf,"ax",@progbits
+; stel:	.ent	__call_stub_sf_sf
+; stel: mtc1 $4,$f12
+; stel: move $18, $31
+; stel: jal sf_sf
+; stel:	mfc1 $2,$f0
+; stel:	jr $18
+; stel:	.end	__call_stub_sf_sf
+
+declare float @sf_df(double) #1
+; stel: .section	.mips16.call.fp.sf_df,"ax",@progbits
+; stel:	.ent	__call_stub_sf_df
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f13
+; stel: move $18, $31
+; stel: jal sf_df
+; stel:	mfc1 $2,$f0
+; stel:	jr $18
+; stel:	.end	__call_stub_sf_df
+
+declare float @sf_sf_sf(float, float) #1
+; stel: .section	.mips16.call.fp.sf_sf_sf,"ax",@progbits
+; stel:	.ent	__call_stub_sf_sf_sf
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f14
+; stel: move $18, $31
+; stel: jal sf_sf_sf
+; stel:	mfc1 $2,$f0
+; stel:	jr $18
+; stel:	.end	__call_stub_sf_sf_sf
+
+declare float @sf_sf_df(float, double) #1
+; stel: .section	.mips16.call.fp.sf_sf_df,"ax",@progbits
+; stel:	.ent	__call_stub_sf_sf_df
+; stel: mtc1 $4,$f12
+; stel: mtc1 $6,$f14
+; stel: mtc1 $7,$f15
+; stel: move $18, $31
+; stel: jal sf_sf_df
+; stel:	mfc1 $2,$f0
+; stel:	jr $18
+; stel:	.end	__call_stub_sf_sf_df
+
+declare float @sf_df_sf(double, float) #1
+; stel: .section	.mips16.call.fp.sf_df_sf,"ax",@progbits
+; stel:	.ent	__call_stub_sf_df_sf
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f13
+; stel: mtc1 $6,$f14
+; stel: move $18, $31
+; stel: jal sf_df_sf
+; stel:	mfc1 $2,$f0
+; stel:	jr $18
+; stel:	.end	__call_stub_sf_df_sf
+
+declare float @sf_df_df(double, double) #1
+; stel: .section	.mips16.call.fp.sf_df_df,"ax",@progbits
+; stel:	.ent	__call_stub_sf_df_df
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f13
+; stel: mtc1 $6,$f14
+; stel: mtc1 $7,$f15
+; stel: move $18, $31
+; stel: jal sf_df_df
+; stel:	mfc1 $2,$f0
+; stel:	jr $18
+; stel:	.end	__call_stub_sf_df_df
+
+declare double @df_v() #1
+; stel: .section	.mips16.call.fp.df_v,"ax",@progbits
+; stel:	.ent	__call_stub_df_v
+; stel: move $18, $31
+; stel: jal df_v
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_df_v
+
+declare double @df_sf(float) #1
+; stel: .section	.mips16.call.fp.df_sf,"ax",@progbits
+; stel:	.ent	__call_stub_df_sf
+; stel: mtc1 $4,$f12
+; stel: move $18, $31
+; stel: jal df_sf
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_df_sf
+
+declare double @df_df(double) #1
+; stel: .section	.mips16.call.fp.df_df,"ax",@progbits
+; stel:	.ent	__call_stub_df_df
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f13
+; stel: move $18, $31
+; stel: jal df_df
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_df_df
+
+declare double @df_sf_sf(float, float) #1
+; stel: .section	.mips16.call.fp.df_sf_sf,"ax",@progbits
+; stel:	.ent	__call_stub_df_sf_sf
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f14
+; stel: move $18, $31
+; stel: jal df_sf_sf
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_df_sf_sf
+
+declare double @df_sf_df(float, double) #1
+; stel: .section	.mips16.call.fp.df_sf_df,"ax",@progbits
+; stel:	.ent	__call_stub_df_sf_df
+; stel: mtc1 $4,$f12
+; stel: mtc1 $6,$f14
+; stel: mtc1 $7,$f15
+; stel: move $18, $31
+; stel: jal df_sf_df
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_df_sf_df
+
+declare double @df_df_sf(double, float) #1
+; stel: .section	.mips16.call.fp.df_df_sf,"ax",@progbits
+; stel:	.ent	__call_stub_df_df_sf
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f13
+; stel: mtc1 $6,$f14
+; stel: move $18, $31
+; stel: jal df_df_sf
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_df_df_sf
+
+declare double @df_df_df(double, double) #1
+; stel: .section	.mips16.call.fp.df_df_df,"ax",@progbits
+; stel:	.ent	__call_stub_df_df_df
+; stel: mtc1 $4,$f12
+; stel: mtc1 $5,$f13
+; stel: mtc1 $6,$f14
+; stel: mtc1 $7,$f15
+; stel: move $18, $31
+; stel: jal df_df_df
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_df_df_df
+
+declare { float, float } @sc_v() #1
+; stel: .section	.mips16.call.fp.sc_v,"ax",@progbits
+; stel:	.ent	__call_stub_sc_v
+; stel: move $18, $31
+; stel: jal sc_v
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f2
+; stel:	jr $18
+; stel:	.end	__call_stub_sc_v
+
+declare { float, float } @sc_sf(float) #1
+; stel: .section	.mips16.call.fp.sc_sf,"ax",@progbits
+; stel:	.ent	__call_stub_sc_sf
+; stel: mtc1 $4,$f12
+; stel: move $18, $31
+; stel: jal sc_sf
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f2
+; stel:	jr $18
+; stel:	.end	__call_stub_sc_sf
+
+declare { double, double } @dc_v() #1
+; stel: .section	.mips16.call.fp.dc_v,"ax",@progbits
+; stel:	.ent	__call_stub_dc_v
+; stel: move $18, $31
+; stel: jal dc_v
+; stel:	mfc1 $4,$f2
+; stel:	mfc1 $5,$f3
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_dc_v
+
+declare { double, double } @dc_sf(float) #1
+; stel: .section	.mips16.call.fp.dc_sf,"ax",@progbits
+; stel:	.ent	__call_stub_dc_sf
+; stel: mtc1 $4,$f12
+; stel: move $18, $31
+; stel: jal dc_sf
+; stel:	mfc1 $4,$f2
+; stel:	mfc1 $5,$f3
+; stel:	mfc1 $2,$f0
+; stel:	mfc1 $3,$f1
+; stel:	jr $18
+; stel:	.end	__call_stub_dc_sf
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mno-ldc1-sdc1.ll b/test/CodeGen/Mips/mno-ldc1-sdc1.ll
new file mode 100644
index 0000000000..eae9a2216a
--- /dev/null
+++ b/test/CodeGen/Mips/mno-ldc1-sdc1.ll
@@ -0,0 +1,45 @@
+; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 < %s | \
+; RUN: FileCheck %s -check-prefix=LE-PIC
+; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 < %s | \
+; RUN: FileCheck %s -check-prefix=LE-STATIC
+; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 < %s | \
+; RUN: FileCheck %s -check-prefix=BE-PIC
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=CHECK-LDC1-SDC1
+
+@g0 = common global double 0.000000e+00, align 8
+
+; LE-PIC: test_ldc1:
+; LE-PIC: lwc1 $f0, 0(${{[0-9]+}})
+; LE-PIC: lwc1 $f1, 4(${{[0-9]+}})
+; LE-STATIC: test_ldc1:
+; LE-STATIC: lwc1 $f0, %lo(g0)(${{[0-9]+}})
+; LE-STATIC: lwc1 $f1, %lo(g0+4)(${{[0-9]+}})
+; BE-PIC: test_ldc1:
+; BE-PIC: lwc1 $f1, 0(${{[0-9]+}})
+; BE-PIC: lwc1 $f0, 4(${{[0-9]+}})
+; CHECK-LDC1-SDC1: test_ldc1:
+; CHECK-LDC1-SDC1: ldc1 $f{{[0-9]+}}
+
+define double @test_ldc1() {
+entry:
+  %0 = load double* @g0, align 8
+  ret double %0
+}
+
+; LE-PIC: test_sdc1:
+; LE-PIC: swc1 $f12, 0(${{[0-9]+}})
+; LE-PIC: swc1 $f13, 4(${{[0-9]+}})
+; LE-STATIC: test_sdc1:
+; LE-STATIC: swc1 $f12, %lo(g0)(${{[0-9]+}})
+; LE-STATIC: swc1 $f13, %lo(g0+4)(${{[0-9]+}})
+; BE-PIC: test_sdc1:
+; BE-PIC: swc1 $f13, 0(${{[0-9]+}})
+; BE-PIC: swc1 $f12, 4(${{[0-9]+}})
+; CHECK-LDC1-SDC1: test_sdc1:
+; CHECK-LDC1-SDC1: sdc1 $f{{[0-9]+}}
+
+define void @test_sdc1(double %a) {
+entry:
+  store double %a, double* @g0, align 8
+  ret void
+}
diff --git a/test/CodeGen/Mips/stackcoloring.ll b/test/CodeGen/Mips/stackcoloring.ll
new file mode 100644
index 0000000000..76cc08679d
--- /dev/null
+++ b/test/CodeGen/Mips/stackcoloring.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@g1 = external global i32*
+
+; CHECK: foo1:
+; CHECK: lw ${{[0-9]+}}, %got(g1)
+; CHECK: # %for.body
+; CHECK: # %for.end
+
+define i32 @foo1() {
+entry:
+  %b = alloca [16 x i32], align 4
+  %0 = bitcast [16 x i32]* %b to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %0)
+  %arraydecay = getelementptr inbounds [16 x i32]* %b, i32 0, i32 0
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %v.04 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %1 = load i32** @g1, align 4
+  %arrayidx = getelementptr inbounds i32* %1, i32 %i.05
+  %2 = load i32* %arrayidx, align 4
+  %call = call i32 @foo2(i32 %2, i32* %arraydecay)
+  %add = add nsw i32 %call, %v.04
+  %inc = add nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  call void @llvm.lifetime.end(i64 64, i8* %0)
+  ret i32 %add
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+declare i32 @foo2(i32, i32*)
+
+declare void @llvm.lifetime.end(i64, i8* nocapture)
diff --git a/test/CodeGen/PowerPC/addrfuncstr.ll b/test/CodeGen/PowerPC/addrfuncstr.ll
new file mode 100644
index 0000000000..60c02d498f
--- /dev/null
+++ b/test/CodeGen/PowerPC/addrfuncstr.ll
@@ -0,0 +1,27 @@
+; RUN: llc -O0 < %s | FileCheck %s
+
+; Verify that a constant with an initializer that may turn into a dynamic
+; relocation is not placed in .rodata, but rather in .data.rel.ro.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.x = type { i64 (i8*, i64, i64, %struct._IO_FILE*)* }
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+
+@_ZL1y = internal constant %struct.x { i64 (i8*, i64, i64, %struct._IO_FILE*)* @fread }, align 8
+
+; Function Attrs: nounwind
+define %struct.x* @_Z3foov() #0 {
+entry:
+  ret %struct.x* @_ZL1y
+}
+
+declare i64 @fread(i8*, i64, i64, %struct._IO_FILE*) #1
+
+; CHECK: .section .data.rel.ro
+; CHECK: .quad fread
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/crsave.ll b/test/CodeGen/PowerPC/crsave.ll
index d698ab031d..f1cbc5afa8 100644
--- a/test/CodeGen/PowerPC/crsave.ll
+++ b/test/CodeGen/PowerPC/crsave.ll
@@ -13,9 +13,11 @@ entry:
   ret i32 %1
 }
 
+; PPC32: stw 31, -4(1)
+; PPC32: stwu 1, -32(1)
 ; PPC32: mfcr 12
-; PPC32-NEXT: stw 12, {{[0-9]+}}(31)
-; PPC32: lwz 12, {{[0-9]+}}(31)
+; PPC32-NEXT: stw 12, 24(31)
+; PPC32: lwz 12, 24(31)
 ; PPC32-NEXT: mtcrf 32, 12
 
 ; PPC64: mfcr 12
@@ -35,9 +37,11 @@ entry:
   ret i32 %1
 }
 
+; PPC32: stw 31, -4(1)
+; PPC32: stwu 1, -32(1)
 ; PPC32: mfcr 12
-; PPC32-NEXT: stw 12, {{[0-9]+}}(31)
-; PPC32: lwz 12, {{[0-9]+}}(31)
+; PPC32-NEXT: stw 12, 24(31)
+; PPC32: lwz 12, 24(31)
 ; PPC32-NEXT: mtcrf 32, 12
 ; PPC32-NEXT: mtcrf 16, 12
 ; PPC32-NEXT: mtcrf 8, 12
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imax.ll b/test/CodeGen/R600/llvm.AMDGPU.imax.ll
new file mode 100644
index 0000000000..3e854c840f
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.imax.ll
@@ -0,0 +1,21 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_MAX_I32_e32
+
+define void @main(i32 %p0, i32 %p1) #0 {
+main_body:
+  %0 = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %p1)
+  %1 = bitcast i32 %0 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
+  ret void
+}
+
+; Function Attrs: readnone
+declare i32 @llvm.AMDGPU.imax(i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { readnone }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imin.ll b/test/CodeGen/R600/llvm.AMDGPU.imin.ll
new file mode 100644
index 0000000000..e227bf8d55
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.imin.ll
@@ -0,0 +1,21 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_MIN_I32_e32
+
+define void @main(i32 %p0, i32 %p1) #0 {
+main_body:
+  %0 = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %p1)
+  %1 = bitcast i32 %0 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
+  ret void
+}
+
+; Function Attrs: readnone
+declare i32 @llvm.AMDGPU.imin(i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { readnone }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
index ff22a69196..cdc03f8a41 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
@@ -1,16 +1,16 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
 
-;CHECK: TRUNC * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: @amdgpu_trunc
+; R600-CHECK: TRUNC * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; SI-CHECK: @amdgpu_trunc
+; SI-CHECK: V_TRUNC_F32
 
-define void @test() {
-   %r0 = call float @llvm.R600.load.input(i32 0)
-   %r1 = call float @llvm.AMDGPU.trunc( float %r0)
-   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
-   ret void
+define void @amdgpu_trunc(float addrspace(1)* %out, float %x) {
+entry:
+  %0 = call float @llvm.AMDGPU.trunc(float %x)
+  store float %0, float addrspace(1)* %out
+  ret void
 }
 
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
 declare float @llvm.AMDGPU.trunc(float ) readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/test/CodeGen/R600/llvm.AMDGPU.umax.ll
new file mode 100644
index 0000000000..7699c04c36
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.umax.ll
@@ -0,0 +1,21 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_MAX_U32_e32
+
+define void @main(i32 %p0, i32 %p1) #0 {
+main_body:
+  %0 = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %p1)
+  %1 = bitcast i32 %0 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
+  ret void
+}
+
+; Function Attrs: readnone
+declare i32 @llvm.AMDGPU.umax(i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { readnone }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/test/CodeGen/R600/llvm.AMDGPU.umin.ll
new file mode 100644
index 0000000000..a911ad9bb3
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.umin.ll
@@ -0,0 +1,21 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_MIN_U32_e32
+
+define void @main(i32 %p0, i32 %p1) #0 {
+main_body:
+  %0 = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %p1)
+  %1 = bitcast i32 %0 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
+  ret void
+}
+
+; Function Attrs: readnone
+declare i32 @llvm.AMDGPU.umin(i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { readnone }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/uitofp.ll b/test/CodeGen/R600/uitofp.ll
new file mode 100644
index 0000000000..6cf9e6a225
--- /dev/null
+++ b/test/CodeGen/R600/uitofp.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_CVT_F32_U32_e32
+
+define void @main(i32 %p) #0 {
+main_body:
+  %0 = uitofp i32 %p to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %0, float %0, float %0, float %0)
+  ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/SPARC/64cond.ll b/test/CodeGen/SPARC/64cond.ll
index 6e66a262a4..cf1a039af5 100644
--- a/test/CodeGen/SPARC/64cond.ll
+++ b/test/CodeGen/SPARC/64cond.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=sparcv9 | FileCheck %s
-; Testing 64-bit conditionals.
+; RUN: llc < %s -mtriple=sparc64-pc-openbsd | FileCheck %s
+; Testing 64-bit conditionals. The sparc64 triple is an alias for sparcv9.
 
 ; CHECK: cmpri
 ; CHECK: subcc %i1, 1
diff --git a/test/CodeGen/SystemZ/int-sub-07.ll b/test/CodeGen/SystemZ/int-sub-07.ll
new file mode 100644
index 0000000000..9bf5ed9055
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-sub-07.ll
@@ -0,0 +1,131 @@
+; Test 32-bit subtraction in which the second operand is a sign-extended
+; i16 memory value.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the SH range.
+define i32 @f1(i32 %lhs, i16 *%src) {
+; CHECK: f1:
+; CHECK: sh %r2, 0(%r3)
+; CHECK: br %r14
+  %half = load i16 *%src
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the high end of the aligned SH range.
+define i32 @f2(i32 %lhs, i16 *%src) {
+; CHECK: f2:
+; CHECK: sh %r2, 4094(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 2047
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the next halfword up, which should use SHY instead of SH.
+define i32 @f3(i32 %lhs, i16 *%src) {
+; CHECK: f3:
+; CHECK: shy %r2, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 2048
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the high end of the aligned SHY range.
+define i32 @f4(i32 %lhs, i16 *%src) {
+; CHECK: f4:
+; CHECK: shy %r2, 524286(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262143
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the next halfword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f5(i32 %lhs, i16 *%src) {
+; CHECK: f5:
+; CHECK: agfi %r3, 524288
+; CHECK: sh %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262144
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the high end of the negative aligned SHY range.
+define i32 @f6(i32 %lhs, i16 *%src) {
+; CHECK: f6:
+; CHECK: shy %r2, -2(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -1
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the low end of the SHY range.
+define i32 @f7(i32 %lhs, i16 *%src) {
+; CHECK: f7:
+; CHECK: shy %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262144
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the next halfword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f8(i32 %lhs, i16 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r3, -524290
+; CHECK: sh %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262145
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check that SH allows an index.
+define i32 @f9(i32 %lhs, i64 %src, i64 %index) {
+; CHECK: f9:
+; CHECK: sh %r2, 4094({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %sub1 = add i64 %src, %index
+  %sub2 = add i64 %sub1, 4094
+  %ptr = inttoptr i64 %sub2 to i16 *
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check that SHY allows an index.
+define i32 @f10(i32 %lhs, i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: shy %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %sub1 = add i64 %src, %index
+  %sub2 = add i64 %sub1, 4096
+  %ptr = inttoptr i64 %sub2 to i16 *
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = sub i32 %lhs, %rhs
+  ret i32 %res
+}
diff --git a/test/CodeGen/Thumb2/large-call.ll b/test/CodeGen/Thumb2/large-call.ll
index 61c477aa91..1b4d4625dd 100644
--- a/test/CodeGen/Thumb2/large-call.ll
+++ b/test/CodeGen/Thumb2/large-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mcpu=cortex-a8 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios0.0.0"
 
diff --git a/test/CodeGen/X86/x86-64-psub.ll b/test/CodeGen/X86/x86-64-psub.ll
new file mode 100644
index 0000000000..7869a80b2a
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-psub.ll
@@ -0,0 +1,213 @@
+; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck %s
+
+; MMX packed sub opcodes were wrongly marked as commutative.
+; This test checks that the operands of packed sub instructions are
+; never interchanged by the "Two-Address instruction pass".
+
+declare { i64, double } @getFirstParam() 
+declare { i64, double } @getSecondParam() 
+
+define i64 @test_psubb() {
+entry:
+  %call = tail call { i64, double } @getFirstParam()
+  %0 = extractvalue { i64, double } %call, 0
+  %call2 = tail call { i64, double } @getSecondParam()
+  %1 = extractvalue { i64, double } %call2, 0
+  %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
+  %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
+  %3 = bitcast <8 x i8> %2 to x86_mmx
+  %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
+  %5 = bitcast <8 x i8> %4 to x86_mmx
+  %6 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %3, x86_mmx %5) nounwind
+  %7 = bitcast x86_mmx %6 to <8 x i8>
+  %8 = bitcast <8 x i8> %7 to <1 x i64>
+  %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
+  ret i64 %retval.0.extract.i15
+}
+
+; CHECK: test_psubb:
+; CHECK:   callq getFirstParam
+; CHECK:   callq getSecondParam
+; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
+; CHECK:   psubb [[PARAM2]], [[PARAM1]]
+; CHECK: ret
+
+define i64 @test_psubw() {
+entry:
+  %call = tail call { i64, double } @getFirstParam()
+  %0 = extractvalue { i64, double } %call, 0
+  %call2 = tail call { i64, double } @getSecondParam()
+  %1 = extractvalue { i64, double } %call2, 0
+  %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
+  %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to x86_mmx
+  %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
+  %5 = bitcast <4 x i16> %4 to x86_mmx
+  %6 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %3, x86_mmx %5) nounwind
+  %7 = bitcast x86_mmx %6 to <4 x i16>
+  %8 = bitcast <4 x i16> %7 to <1 x i64>
+  %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
+  ret i64 %retval.0.extract.i15
+}
+
+; CHECK: test_psubw:
+; CHECK:   callq getFirstParam
+; CHECK:   callq getSecondParam
+; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
+; CHECK:   psubw [[PARAM2]], [[PARAM1]]
+; CHECK: ret
+
+
+define i64 @test_psubd() {
+entry:
+  %call = tail call { i64, double } @getFirstParam()
+  %0 = extractvalue { i64, double } %call, 0
+  %call2 = tail call { i64, double } @getSecondParam()
+  %1 = extractvalue { i64, double } %call2, 0
+  %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
+  %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  %2 = bitcast <1 x i64> %__m1.0.insert.i to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to x86_mmx
+  %4 = bitcast <1 x i64> %__m2.0.insert.i to <2 x i32>
+  %5 = bitcast <2 x i32> %4 to x86_mmx
+  %6 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %3, x86_mmx %5) nounwind
+  %7 = bitcast x86_mmx %6 to <2 x i32>
+  %8 = bitcast <2 x i32> %7 to <1 x i64>
+  %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
+  ret i64 %retval.0.extract.i15
+}
+
+; CHECK: test_psubd:
+; CHECK:   callq getFirstParam
+; CHECK:   callq getSecondParam
+; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
+; CHECK:   psubd [[PARAM2]], [[PARAM1]]
+; CHECK: ret
+
+define i64 @test_psubsb() {
+entry:
+  %call = tail call { i64, double } @getFirstParam()
+  %0 = extractvalue { i64, double } %call, 0
+  %call2 = tail call { i64, double } @getSecondParam()
+  %1 = extractvalue { i64, double } %call2, 0
+  %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
+  %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
+  %3 = bitcast <8 x i8> %2 to x86_mmx
+  %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
+  %5 = bitcast <8 x i8> %4 to x86_mmx
+  %6 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %3, x86_mmx %5) nounwind
+  %7 = bitcast x86_mmx %6 to <8 x i8>
+  %8 = bitcast <8 x i8> %7 to <1 x i64>
+  %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
+  ret i64 %retval.0.extract.i15
+}
+
+; CHECK: test_psubsb:
+; CHECK:   callq getFirstParam
+; CHECK:   callq getSecondParam
+; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
+; CHECK:   psubsb [[PARAM2]], [[PARAM1]]
+; CHECK: ret
+
+define i64 @test_psubswv() {
+entry:
+  %call = tail call { i64, double } @getFirstParam()
+  %0 = extractvalue { i64, double } %call, 0
+  %call2 = tail call { i64, double } @getSecondParam()
+  %1 = extractvalue { i64, double } %call2, 0
+  %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
+  %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to x86_mmx
+  %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
+  %5 = bitcast <4 x i16> %4 to x86_mmx
+  %6 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %3, x86_mmx %5) nounwind
+  %7 = bitcast x86_mmx %6 to <4 x i16>
+  %8 = bitcast <4 x i16> %7 to <1 x i64>
+  %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
+  ret i64 %retval.0.extract.i15
+}
+
+; CHECK: test_psubswv:
+; CHECK:   callq getFirstParam
+; CHECK:   callq getSecondParam
+; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
+; CHECK:   psubsw [[PARAM2]], [[PARAM1]]
+; CHECK: ret
+
+define i64 @test_psubusbv() {
+entry:
+  %call = tail call { i64, double } @getFirstParam()
+  %0 = extractvalue { i64, double } %call, 0
+  %call2 = tail call { i64, double } @getSecondParam()
+  %1 = extractvalue { i64, double } %call2, 0
+  %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
+  %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
+  %3 = bitcast <8 x i8> %2 to x86_mmx
+  %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
+  %5 = bitcast <8 x i8> %4 to x86_mmx
+  %6 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %3, x86_mmx %5) nounwind
+  %7 = bitcast x86_mmx %6 to <8 x i8>
+  %8 = bitcast <8 x i8> %7 to <1 x i64>
+  %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
+  ret i64 %retval.0.extract.i15
+}
+
+; CHECK: test_psubusbv:
+; CHECK:   callq getFirstParam
+; CHECK:   callq getSecondParam
+; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
+; CHECK:   psubusb [[PARAM2]], [[PARAM1]]
+; CHECK: ret
+
+define i64 @test_psubuswv() {
+entry:
+  %call = tail call { i64, double } @getFirstParam()
+  %0 = extractvalue { i64, double } %call, 0
+  %call2 = tail call { i64, double } @getSecondParam()
+  %1 = extractvalue { i64, double } %call2, 0
+  %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
+  %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to x86_mmx
+  %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
+  %5 = bitcast <4 x i16> %4 to x86_mmx
+  %6 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %3, x86_mmx %5) nounwind
+  %7 = bitcast x86_mmx %6 to <4 x i16>
+  %8 = bitcast <4 x i16> %7 to <1 x i64>
+  %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
+  ret i64 %retval.0.extract.i15
+}
+
+; CHECK: test_psubuswv:
+; CHECK:   callq getFirstParam
+; CHECK:   callq getSecondParam
+; CHECK:   movd %rax, [[PARAM2:%[a-z0-9]+]]
+; CHECK:   movq (%rsp), [[PARAM1:%[a-z0-9]+]]
+; CHECK:   psubusw [[PARAM2]], [[PARAM1]]
+; CHECK: ret
+
+
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone