summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Kledzik <kledzik@apple.com>2013-05-24 19:38:11 +0000
committerNick Kledzik <kledzik@apple.com>2013-05-24 19:38:11 +0000
commita09d09d29e250e905bdfaf819979b9c3e9adc047 (patch)
tree953cf70a185259b3d6b191811e9c322f38457ef6
parentf5a9ace4904e9daf3d962274cbbcb702ebc5450e (diff)
downloadcompiler-rt-a09d09d29e250e905bdfaf819979b9c3e9adc047.tar.gz
compiler-rt-a09d09d29e250e905bdfaf819979b9c3e9adc047.tar.bz2
compiler-rt-a09d09d29e250e905bdfaf819979b9c3e9adc047.tar.xz
<rdar://problem/12512722> Use arm divide instruction if available
git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@182665 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/arm/divmodsi4.S13
-rw-r--r--lib/arm/modsi3.S11
-rw-r--r--lib/arm/udivmodsi4.S13
-rw-r--r--lib/arm/umodsi3.S11
-rw-r--r--test/timing/modsi3.c52
5 files changed, 100 insertions, 0 deletions
diff --git a/lib/arm/divmodsi4.S b/lib/arm/divmodsi4.S
index cec39a79..d31e510c 100644
--- a/lib/arm/divmodsi4.S
+++ b/lib/arm/divmodsi4.S
@@ -24,6 +24,18 @@
.syntax unified
.align 3
DEFINE_COMPILERRT_FUNCTION(__divmodsi4)
+#if __ARM_ARCH_7S__
+ tst r1, r1
+ beq LOCAL_LABEL(divzero)
+ mov r3, r0
+ sdiv r0, r3, r1
+ mls r1, r0, r1, r3
+ str r1, [r2]
+ bx lr
+LOCAL_LABEL(divzero):
+ mov r0, #0
+ bx lr
+#else
ESTABLISH_FRAME
// Set aside the sign of the quotient and modulus, and the address for the
// modulus.
@@ -45,3 +57,4 @@ DEFINE_COMPILERRT_FUNCTION(__divmodsi4)
sub r1, r1, r5, asr #31
str r1, [r6]
CLEAR_FRAME_AND_RETURN
+#endif
diff --git a/lib/arm/modsi3.S b/lib/arm/modsi3.S
index a4cd2ee5..04595011 100644
--- a/lib/arm/modsi3.S
+++ b/lib/arm/modsi3.S
@@ -23,6 +23,16 @@
.syntax unified
.align 3
DEFINE_COMPILERRT_FUNCTION(__modsi3)
+#if __ARM_ARCH_7S__
+ tst r1, r1
+ beq LOCAL_LABEL(divzero)
+ sdiv r2, r0, r1
+ mls r0, r2, r1, r0
+ bx lr
+LOCAL_LABEL(divzero):
+ mov r0, #0
+ bx lr
+#else
ESTABLISH_FRAME
// Set aside the sign of the dividend.
mov r4, r0
@@ -37,3 +47,4 @@ DEFINE_COMPILERRT_FUNCTION(__modsi3)
eor r0, r0, r4, asr #31
sub r0, r0, r4, asr #31
CLEAR_FRAME_AND_RETURN
+#endif
diff --git a/lib/arm/udivmodsi4.S b/lib/arm/udivmodsi4.S
index d164a751..9956cd48 100644
--- a/lib/arm/udivmodsi4.S
+++ b/lib/arm/udivmodsi4.S
@@ -31,6 +31,18 @@
.syntax unified
.align 3
DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
+#if __ARM_ARCH_7S__
+ tst r1, r1
+ beq LOCAL_LABEL(divzero)
+ mov r3, r0
+ udiv r0, r3, r1
+ mls r1, r0, r1, r3
+ str r1, [r2]
+ bx lr
+LOCAL_LABEL(divzero):
+ mov r0, #0
+ bx lr
+#else
// We use a simple digit by digit algorithm; before we get into the actual
// divide loop, we must calculate the left-shift amount necessary to align
// the MSB of the divisor with that of the dividend (If this shift is
@@ -78,3 +90,4 @@ LOCAL_LABEL(return):
str a, [r2]
mov r0, q
CLEAR_FRAME_AND_RETURN
+#endif
diff --git a/lib/arm/umodsi3.S b/lib/arm/umodsi3.S
index 3a2ab2b8..328e7054 100644
--- a/lib/arm/umodsi3.S
+++ b/lib/arm/umodsi3.S
@@ -23,6 +23,16 @@
.syntax unified
.align 3
DEFINE_COMPILERRT_FUNCTION(__umodsi3)
+#if __ARM_ARCH_7S__
+ tst r1, r1
+ beq LOCAL_LABEL(divzero)
+ udiv r2, r0, r1
+ mls r0, r2, r1, r0
+ bx lr
+LOCAL_LABEL(divzero):
+ mov r0, #0
+ bx lr
+#else
// We use a simple digit by digit algorithm; before we get into the actual
// divide loop, we must calculate the left-shift amount necessary to align
// the MSB of the divisor with that of the dividend.
@@ -56,3 +66,4 @@ LOCAL_LABEL(mainLoop):
subs r, a, b
movhs a, r
bx lr
+#endif
diff --git a/test/timing/modsi3.c b/test/timing/modsi3.c
new file mode 100644
index 00000000..3275b832
--- /dev/null
+++ b/test/timing/modsi3.c
@@ -0,0 +1,52 @@
+#include "timing.h"
+#include <stdio.h>
+
+#define INPUT_TYPE int32_t
+#define INPUT_SIZE 256
+#define FUNCTION_NAME __modsi3
+
+#ifndef LIBNAME
+#define LIBNAME UNKNOWN
+#endif
+
+#define LIBSTRING LIBSTRINGX(LIBNAME)
+#define LIBSTRINGX(a) LIBSTRINGXX(a)
+#define LIBSTRINGXX(a) #a
+
+INPUT_TYPE FUNCTION_NAME(INPUT_TYPE input1, INPUT_TYPE input2);
+
+int main(int argc, char *argv[]) {
+ INPUT_TYPE input1[INPUT_SIZE];
+ INPUT_TYPE input2[INPUT_SIZE];
+ int i, j;
+
+ srand(42);
+
+ // Initialize the input array with data of various sizes.
+ for (i=0; i<INPUT_SIZE; ++i) {
+ input1[i] = rand();
+ input2[i] = rand() + 1;
+ }
+
+ int64_t fixedInput = INT64_C(0x1234567890ABCDEF);
+
+ double bestTime = __builtin_inf();
+ void *dummyp;
+ for (j=0; j<1024; ++j) {
+
+ uint64_t startTime = mach_absolute_time();
+ for (i=0; i<INPUT_SIZE; ++i)
+ FUNCTION_NAME(input1[i], input2[i]);
+ uint64_t endTime = mach_absolute_time();
+
+ double thisTime = intervalInCycles(startTime, endTime);
+ bestTime = __builtin_fmin(thisTime, bestTime);
+
+ // Move the stack alignment between trials to eliminate (mostly) aliasing effects
+ dummyp = alloca(1);
+ }
+
+ printf("%16s: %f cycles.\n", LIBSTRING, bestTime / (double) INPUT_SIZE);
+
+ return 0;
+}