summaryrefslogtreecommitdiff
path: root/test/CodeGen
diff options
context:
space:
mode:
authorRichard Sandiford <rsandifo@linux.vnet.ibm.com>2013-05-30 09:45:42 +0000
committerRichard Sandiford <rsandifo@linux.vnet.ibm.com>2013-05-30 09:45:42 +0000
commit14a926f13b768ee3771bb944bbbb29529a40dbe1 (patch)
tree84c2dbb8c1cdb7bd841e955b875421c16a28e49b /test/CodeGen
parentccb7bd9d84602c1fb5514dcee6de3420f175176a (diff)
downloadllvm-14a926f13b768ee3771bb944bbbb29529a40dbe1.tar.gz
llvm-14a926f13b768ee3771bb944bbbb29529a40dbe1.tar.bz2
llvm-14a926f13b768ee3771bb944bbbb29529a40dbe1.tar.xz
[SystemZ] Enable unaligned accesses
The code to distinguish between unaligned and aligned addresses was already there, so this is mostly just a switch-on-and-test process. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182920 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r--test/CodeGen/SystemZ/alloca-02.ll10
-rw-r--r--test/CodeGen/SystemZ/int-cmp-36.ll21
-rw-r--r--test/CodeGen/SystemZ/int-cmp-37.ll22
-rw-r--r--test/CodeGen/SystemZ/int-cmp-38.ll39
-rw-r--r--test/CodeGen/SystemZ/int-cmp-39.ll21
-rw-r--r--test/CodeGen/SystemZ/int-cmp-40.ll22
-rw-r--r--test/CodeGen/SystemZ/int-cmp-41.ll21
-rw-r--r--test/CodeGen/SystemZ/int-cmp-42.ll21
-rw-r--r--test/CodeGen/SystemZ/int-cmp-43.ll20
-rw-r--r--test/CodeGen/SystemZ/int-move-08.ll50
-rw-r--r--test/CodeGen/SystemZ/int-move-09.ll85
-rw-r--r--test/CodeGen/SystemZ/unaligned-01.ll59
12 files changed, 386 insertions, 5 deletions
diff --git a/test/CodeGen/SystemZ/alloca-02.ll b/test/CodeGen/SystemZ/alloca-02.ll
index fbb095f4d1..41c987a3f8 100644
--- a/test/CodeGen/SystemZ/alloca-02.ll
+++ b/test/CodeGen/SystemZ/alloca-02.ll
@@ -34,15 +34,15 @@ define i64 @f1(i64 %length, i64 %index) {
; CHECK-E: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]])
; CHECK-E: mviy 4096([[TMP]]), 4
%a = alloca i8, i64 %length
- store i8 0, i8 *%a
+ store volatile i8 0, i8 *%a
%b = getelementptr i8 *%a, i64 4095
- store i8 1, i8 *%b
+ store volatile i8 1, i8 *%b
%c = getelementptr i8 *%a, i64 %index
- store i8 2, i8 *%c
+ store volatile i8 2, i8 *%c
%d = getelementptr i8 *%c, i64 4095
- store i8 3, i8 *%d
+ store volatile i8 3, i8 *%d
%e = getelementptr i8 *%d, i64 1
- store i8 4, i8 *%e
+ store volatile i8 4, i8 *%e
%count = call i64 @bar(i8 *%a)
%res = add i64 %count, 1
ret i64 %res
diff --git a/test/CodeGen/SystemZ/int-cmp-36.ll b/test/CodeGen/SystemZ/int-cmp-36.ll
index 9c8cd81b7b..df0e337cc8 100644
--- a/test/CodeGen/SystemZ/int-cmp-36.ll
+++ b/test/CodeGen/SystemZ/int-cmp-36.ll
@@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i16 1
+@h = global i16 1, align 1, section "foo"
; Check signed comparison.
define i32 @f1(i32 %src1) {
@@ -79,3 +80,23 @@ exit:
%res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
ret i32 %res
}
+
+; Repeat f1 with an unaligned address.
+define i32 @f5(i32 %src1) {
+; CHECK: f5:
+; CHECK: lgrl [[REG:%r[0-5]]], h@GOT
+; CHECK: ch %r2, 0([[REG]])
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+ %val = load i16 *@h, align 1
+ %src2 = sext i16 %val to i32
+ %cond = icmp slt i32 %src1, %src2
+ br i1 %cond, label %exit, label %mulb
+mulb:
+ %mul = mul i32 %src1, %src1
+ br label %exit
+exit:
+ %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+ ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-37.ll b/test/CodeGen/SystemZ/int-cmp-37.ll
index 862cf38458..272df71313 100644
--- a/test/CodeGen/SystemZ/int-cmp-37.ll
+++ b/test/CodeGen/SystemZ/int-cmp-37.ll
@@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i16 1
+@h = global i16 1, align 1, section "foo"
; Check unsigned comparison.
define i32 @f1(i32 %src1) {
@@ -79,3 +80,24 @@ exit:
%res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
ret i32 %res
}
+
+; Repeat f1 with an unaligned address.
+define i32 @f5(i32 %src1) {
+; CHECK: f5:
+; CHECK: lgrl [[REG:%r[0-5]]], h@GOT
+; CHECK: llh [[VAL:%r[0-5]]], 0([[REG]])
+; CHECK: clr %r2, [[VAL]]
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+ %val = load i16 *@h, align 1
+ %src2 = zext i16 %val to i32
+ %cond = icmp ult i32 %src1, %src2
+ br i1 %cond, label %exit, label %mulb
+mulb:
+ %mul = mul i32 %src1, %src1
+ br label %exit
+exit:
+ %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+ ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-38.ll b/test/CodeGen/SystemZ/int-cmp-38.ll
index 3e1ebfb558..54f325e674 100644
--- a/test/CodeGen/SystemZ/int-cmp-38.ll
+++ b/test/CodeGen/SystemZ/int-cmp-38.ll
@@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i32 1
+@h = global i32 1, align 2, section "foo"
; Check signed comparisons.
define i32 @f1(i32 %src1) {
@@ -76,3 +77,41 @@ exit:
%res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
ret i32 %res
}
+
+; Repeat f1 with an unaligned address.
+define i32 @f5(i32 %src1) {
+; CHECK: f5:
+; CHECK: larl [[REG:%r[0-5]]], h
+; CHECK: c %r2, 0([[REG]])
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+ %src2 = load i32 *@h, align 2
+ %cond = icmp slt i32 %src1, %src2
+ br i1 %cond, label %exit, label %mulb
+mulb:
+ %mul = mul i32 %src1, %src1
+ br label %exit
+exit:
+ %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+ ret i32 %res
+}
+
+; Repeat f2 with an unaligned address.
+define i32 @f6(i32 %src1) {
+; CHECK: f6:
+; CHECK: larl [[REG:%r[0-5]]], h
+; CHECK: cl %r2, 0([[REG]])
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+ %src2 = load i32 *@h, align 2
+ %cond = icmp ult i32 %src1, %src2
+ br i1 %cond, label %exit, label %mulb
+mulb:
+ %mul = mul i32 %src1, %src1
+ br label %exit
+exit:
+ %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+ ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-39.ll b/test/CodeGen/SystemZ/int-cmp-39.ll
index 7d654ee594..e99b240af8 100644
--- a/test/CodeGen/SystemZ/int-cmp-39.ll
+++ b/test/CodeGen/SystemZ/int-cmp-39.ll
@@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i16 1
+@h = global i16 1, align 1, section "foo"
; Check signed comparison.
define i64 @f1(i64 %src1) {
@@ -79,3 +80,23 @@ exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res
}
+
+; Repeat f1 with an unaligned address.
+define i64 @f5(i64 %src1) {
+; CHECK: f5:
+; CHECK: lgrl [[REG:%r[0-5]]], h@GOT
+; CHECK: cgh %r2, 0([[REG]])
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+ %val = load i16 *@h, align 1
+ %src2 = sext i16 %val to i64
+ %cond = icmp slt i64 %src1, %src2
+ br i1 %cond, label %exit, label %mulb
+mulb:
+ %mul = mul i64 %src1, %src1
+ br label %exit
+exit:
+ %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+ ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-40.ll b/test/CodeGen/SystemZ/int-cmp-40.ll
index 6c179ccf89..2d33c8fcd5 100644
--- a/test/CodeGen/SystemZ/int-cmp-40.ll
+++ b/test/CodeGen/SystemZ/int-cmp-40.ll
@@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i16 1
+@h = global i16 1, align 1, section "foo"
; Check unsigned comparison.
define i64 @f1(i64 %src1) {
@@ -79,3 +80,24 @@ exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res
}
+
+; Repeat f1 with an unaligned address.
+define i64 @f5(i64 %src1) {
+; CHECK: f5:
+; CHECK: lgrl [[REG:%r[0-5]]], h@GOT
+; CHECK: llgh [[VAL:%r[0-5]]], 0([[REG]])
+; CHECK: clgr %r2, [[VAL]]
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+ %val = load i16 *@h, align 1
+ %src2 = zext i16 %val to i64
+ %cond = icmp ult i64 %src1, %src2
+ br i1 %cond, label %exit, label %mulb
+mulb:
+ %mul = mul i64 %src1, %src1
+ br label %exit
+exit:
+ %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+ ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-41.ll b/test/CodeGen/SystemZ/int-cmp-41.ll
index e2bdcad440..f68638a1b8 100644
--- a/test/CodeGen/SystemZ/int-cmp-41.ll
+++ b/test/CodeGen/SystemZ/int-cmp-41.ll
@@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i32 1
+@h = global i32 1, align 2, section "foo"
; Check signed comparison.
define i64 @f1(i64 %src1) {
@@ -79,3 +80,23 @@ exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res
}
+
+; Repeat f1 with an unaligned address.
+define i64 @f5(i64 %src1) {
+; CHECK: f5:
+; CHECK: larl [[REG:%r[0-5]]], h
+; CHECK: cgf %r2, 0([[REG]])
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+ %val = load i32 *@h, align 2
+ %src2 = sext i32 %val to i64
+ %cond = icmp slt i64 %src1, %src2
+ br i1 %cond, label %exit, label %mulb
+mulb:
+ %mul = mul i64 %src1, %src1
+ br label %exit
+exit:
+ %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+ ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-42.ll b/test/CodeGen/SystemZ/int-cmp-42.ll
index dbdf91d291..dd3cb4a398 100644
--- a/test/CodeGen/SystemZ/int-cmp-42.ll
+++ b/test/CodeGen/SystemZ/int-cmp-42.ll
@@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i32 1
+@h = global i32 1, align 2, section "foo"
; Check unsigned comparison.
define i64 @f1(i64 %src1) {
@@ -79,3 +80,23 @@ exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res
}
+
+; Repeat f1 with an unaligned address.
+define i64 @f5(i64 %src1) {
+; CHECK: f5:
+; CHECK: larl [[REG:%r[0-5]]], h
+; CHECK: clgf %r2, 0([[REG]])
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+ %val = load i32 *@h, align 2
+ %src2 = zext i32 %val to i64
+ %cond = icmp ult i64 %src1, %src2
+ br i1 %cond, label %exit, label %mulb
+mulb:
+ %mul = mul i64 %src1, %src1
+ br label %exit
+exit:
+ %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+ ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-43.ll b/test/CodeGen/SystemZ/int-cmp-43.ll
index 3df25fa3db..7d4adcab06 100644
--- a/test/CodeGen/SystemZ/int-cmp-43.ll
+++ b/test/CodeGen/SystemZ/int-cmp-43.ll
@@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i64 1
+@h = global i64 1, align 4, section "foo"
; Check signed comparisons.
define i64 @f1(i64 %src1) {
@@ -76,3 +77,22 @@ exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res
}
+
+; Repeat f1 with an unaligned address.
+define i64 @f5(i64 %src1) {
+; CHECK: f5:
+; CHECK: larl [[REG:%r[0-5]]], h
+; CHECK: cg %r2, 0([[REG]])
+; CHECK-NEXT: jl
+; CHECK: br %r14
+entry:
+ %src2 = load i64 *@h, align 4
+ %cond = icmp slt i64 %src1, %src2
+ br i1 %cond, label %exit, label %mulb
+mulb:
+ %mul = mul i64 %src1, %src1
+ br label %exit
+exit:
+ %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+ ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-move-08.ll b/test/CodeGen/SystemZ/int-move-08.ll
index 5640fec329..e6022aa6ca 100644
--- a/test/CodeGen/SystemZ/int-move-08.ll
+++ b/test/CodeGen/SystemZ/int-move-08.ll
@@ -6,6 +6,10 @@
@gsrc32 = global i32 1
@gdst16 = global i16 2
@gdst32 = global i32 2
+@gsrc16u = global i16 1, align 1, section "foo"
+@gsrc32u = global i32 1, align 2, section "foo"
+@gdst16u = global i16 2, align 1, section "foo"
+@gdst32u = global i32 2, align 2, section "foo"
; Check sign-extending loads from i16.
define i32 @f1() {
@@ -47,3 +51,49 @@ define void @f4() {
store i32 %val, i32 *@gdst32
ret void
}
+
+; Repeat f1 with an unaligned variable.
+define i32 @f5() {
+; CHECK: f5:
+; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u
+; CHECK: lh %r2, 0([[REG]])
+; CHECK: br %r14
+ %val = load i16 *@gsrc16u, align 1
+ %ext = sext i16 %val to i32
+ ret i32 %ext
+}
+
+; Repeat f2 with an unaligned variable.
+define i32 @f6() {
+; CHECK: f6:
+; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u
+; CHECK: llh %r2, 0([[REG]])
+; CHECK: br %r14
+ %val = load i16 *@gsrc16u, align 1
+ %ext = zext i16 %val to i32
+ ret i32 %ext
+}
+
+; Repeat f3 with an unaligned variable.
+define void @f7(i32 %val) {
+; CHECK: f7:
+; CHECK: lgrl [[REG:%r[0-5]]], gdst16u
+; CHECK: sth %r2, 0([[REG]])
+; CHECK: br %r14
+ %half = trunc i32 %val to i16
+ store i16 %half, i16 *@gdst16u, align 1
+ ret void
+}
+
+; Repeat f4 with unaligned variables.
+define void @f8() {
+; CHECK: f8:
+; CHECK: larl [[REG:%r[0-5]]], gsrc32u
+; CHECK: l [[VAL:%r[0-5]]], 0([[REG]])
+; CHECK: larl [[REG:%r[0-5]]], gdst32u
+; CHECK: st [[VAL]], 0([[REG]])
+; CHECK: br %r14
+ %val = load i32 *@gsrc32u, align 2
+ store i32 %val, i32 *@gdst32u, align 2
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/int-move-09.ll b/test/CodeGen/SystemZ/int-move-09.ll
index a7a8c82951..9167405aa9 100644
--- a/test/CodeGen/SystemZ/int-move-09.ll
+++ b/test/CodeGen/SystemZ/int-move-09.ll
@@ -8,6 +8,12 @@
@gdst16 = global i16 2
@gdst32 = global i32 2
@gdst64 = global i64 2
+@gsrc16u = global i16 1, align 1, section "foo"
+@gsrc32u = global i32 1, align 2, section "foo"
+@gsrc64u = global i64 1, align 4, section "foo"
+@gdst16u = global i16 2, align 1, section "foo"
+@gdst32u = global i32 2, align 2, section "foo"
+@gdst64u = global i64 2, align 4, section "foo"
; Check sign-extending loads from i16.
define i64 @f1() {
@@ -79,3 +85,82 @@ define void @f7() {
store i64 %val, i64 *@gdst64
ret void
}
+
+; Repeat f1 with an unaligned variable.
+define i64 @f8() {
+; CHECK: f8:
+; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u@GOT
+; CHECK: lgh %r2, 0([[REG]])
+; CHECK: br %r14
+ %val = load i16 *@gsrc16u, align 1
+ %ext = sext i16 %val to i64
+ ret i64 %ext
+}
+
+; Repeat f2 with an unaligned variable.
+define i64 @f9() {
+; CHECK: f9:
+; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u@GOT
+; CHECK: llgh %r2, 0([[REG]])
+; CHECK: br %r14
+ %val = load i16 *@gsrc16u, align 1
+ %ext = zext i16 %val to i64
+ ret i64 %ext
+}
+
+; Repeat f3 with an unaligned variable.
+define i64 @f10() {
+; CHECK: f10:
+; CHECK: larl [[REG:%r[0-5]]], gsrc32u
+; CHECK: lgf %r2, 0([[REG]])
+; CHECK: br %r14
+ %val = load i32 *@gsrc32u, align 2
+ %ext = sext i32 %val to i64
+ ret i64 %ext
+}
+
+; Repeat f4 with an unaligned variable.
+define i64 @f11() {
+; CHECK: f11:
+; CHECK: larl [[REG:%r[0-5]]], gsrc32u
+; CHECK: llgf %r2, 0([[REG]])
+; CHECK: br %r14
+ %val = load i32 *@gsrc32u, align 2
+ %ext = zext i32 %val to i64
+ ret i64 %ext
+}
+
+; Repeat f5 with an unaligned variable.
+define void @f12(i64 %val) {
+; CHECK: f12:
+; CHECK: lgrl [[REG:%r[0-5]]], gdst16u@GOT
+; CHECK: sth %r2, 0([[REG]])
+; CHECK: br %r14
+ %half = trunc i64 %val to i16
+ store i16 %half, i16 *@gdst16u, align 1
+ ret void
+}
+
+; Repeat f6 with an unaligned variable.
+define void @f13(i64 %val) {
+; CHECK: f13:
+; CHECK: larl [[REG:%r[0-5]]], gdst32u
+; CHECK: st %r2, 0([[REG]])
+; CHECK: br %r14
+ %word = trunc i64 %val to i32
+ store i32 %word, i32 *@gdst32u, align 2
+ ret void
+}
+
+; Repeat f7 with unaligned variables.
+define void @f14() {
+; CHECK: f14:
+; CHECK: larl [[REG:%r[0-5]]], gsrc64u
+; CHECK: lg [[VAL:%r[0-5]]], 0([[REG]])
+; CHECK: larl [[REG:%r[0-5]]], gdst64u
+; CHECK: stg [[VAL]], 0([[REG]])
+; CHECK: br %r14
+ %val = load i64 *@gsrc64u, align 4
+ store i64 %val, i64 *@gdst64u, align 4
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/unaligned-01.ll b/test/CodeGen/SystemZ/unaligned-01.ll
new file mode 100644
index 0000000000..be237acd27
--- /dev/null
+++ b/test/CodeGen/SystemZ/unaligned-01.ll
@@ -0,0 +1,59 @@
+; Check that unaligned accesses are allowed in general. We check the
+; few exceptions (like CRL) in their respective test files.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check that these four byte stores become a single word store.
+define void @f1(i8 *%ptr) {
+; CHECK: f1
+; CHECK: iilf [[REG:%r[0-5]]], 66051
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %off1 = getelementptr i8 *%ptr, i64 1
+ %off2 = getelementptr i8 *%ptr, i64 2
+ %off3 = getelementptr i8 *%ptr, i64 3
+ store i8 0, i8 *%ptr
+ store i8 1, i8 *%off1
+ store i8 2, i8 *%off2
+ store i8 3, i8 *%off3
+ ret void
+}
+
+; Check that unaligned 2-byte accesses are allowed.
+define i16 @f2(i16 *%src, i16 *%dst) {
+; CHECK: f2:
+; CHECK: lh %r2, 0(%r2)
+; CHECK: sth %r2, 0(%r3)
+; CHECK: br %r14
+ %val = load i16 *%src, align 1
+ store i16 %val, i16 *%dst, align 1
+ ret i16 %val
+}
+
+; Check that unaligned 4-byte accesses are allowed.
+define i32 @f3(i32 *%src1, i32 *%src2, i32 *%dst) {
+; CHECK: f3:
+; CHECK: l %r2, 0(%r2)
+; CHECK: s %r2, 0(%r3)
+; CHECK: st %r2, 0(%r4)
+; CHECK: br %r14
+ %val1 = load i32 *%src1, align 1
+ %val2 = load i32 *%src2, align 2
+ %sub = sub i32 %val1, %val2
+ store i32 %sub, i32 *%dst, align 1
+ ret i32 %sub
+}
+
+; Check that unaligned 8-byte accesses are allowed.
+define i64 @f4(i64 *%src1, i64 *%src2, i64 *%dst) {
+; CHECK: f4:
+; CHECK: lg %r2, 0(%r2)
+; CHECK: sg %r2, 0(%r3)
+; CHECK: stg %r2, 0(%r4)
+; CHECK: br %r14
+ %val1 = load i64 *%src1, align 1
+ %val2 = load i64 *%src2, align 2
+ %sub = sub i64 %val1, %val2
+ store i64 %sub, i64 *%dst, align 4
+ ret i64 %sub
+}