summaryrefslogtreecommitdiff
path: root/src/math
diff options
context:
space:
mode:
authorIlya Tocar <ilya.tocar@intel.com>2015-09-14 18:42:39 +0300
committerBrad Fitzpatrick <bradfitz@golang.org>2015-10-03 15:55:08 +0000
commit37cfb2e07e9e9e8b11f21ea462856aeb1f6ec0c0 (patch)
tree29c52602253546a41b3c2c062789ab573844c366 /src/math
parentacc90c53e8b5448afee8455ee7c4917af25c6bc9 (diff)
downloadgo-git-37cfb2e07e9e9e8b11f21ea462856aeb1f6ec0c0.tar.gz
math: optimize ceil/floor functions on amd64
Use SSE 4.1 rounding instruction to perform rounding Results (haswell): name old time/op new time/op delta Floor-48 2.71ns ± 0% 1.87ns ± 1% -31.17% (p=0.000 n=16+19) Ceil-48 3.09ns ± 3% 2.16ns ± 0% -30.16% (p=0.000 n=19+12) Change-Id: If63715879eed6530b1eb4fc96132d827f8f43909 Reviewed-on: https://go-review.googlesource.com/14561 Reviewed-by: Klaus Post <klauspost@gmail.com> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/math')
-rw-r--r--src/math/floor_amd64.s23
-rw-r--r--src/math/floor_asm.go12
2 files changed, 35 insertions, 0 deletions
diff --git a/src/math/floor_amd64.s b/src/math/floor_amd64.s
index 67b7cdec04..7f512e7c22 100644
--- a/src/math/floor_amd64.s
+++ b/src/math/floor_amd64.s
@@ -6,8 +6,25 @@
#define Big 0x4330000000000000 // 2**52
+// func hasSSE4() bool
+// returns whether SSE4.1 is supported
+TEXT ·hasSSE4(SB),NOSPLIT,$0
+ XORQ AX, AX
+ INCL AX
+ CPUID
+ SHRQ $19, CX
+ ANDQ $1, CX
+ MOVB CX, ret+0(FP)
+ RET
+
// func Floor(x float64) float64
TEXT ·Floor(SB),NOSPLIT,$0
+ CMPB math·useSSE4(SB), $1
+ JNE nosse4
+ ROUNDSD $1, x+0(FP), X0
+ MOVQ X0, ret+8(FP)
+ RET
+nosse4:
MOVQ x+0(FP), AX
MOVQ $~(1<<63), DX // sign bit mask
ANDQ AX,DX // DX = |x|
@@ -30,6 +47,12 @@ isBig_floor:
// func Ceil(x float64) float64
TEXT ·Ceil(SB),NOSPLIT,$0
+ CMPB math·useSSE4(SB), $1
+ JNE nosse4
+ ROUNDSD $2, x+0(FP), X0
+ MOVQ X0, ret+8(FP)
+ RET
+nosse4:
MOVQ x+0(FP), AX
MOVQ $~(1<<63), DX // sign bit mask
MOVQ AX, BX // BX = copy of x
diff --git a/src/math/floor_asm.go b/src/math/floor_asm.go
new file mode 100644
index 0000000000..28e56a5d51
--- /dev/null
+++ b/src/math/floor_asm.go
@@ -0,0 +1,12 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64 amd64p32
+
+package math
+
+//defined in floor_amd64.s
+func hasSSE4() bool
+
+var useSSE4 = hasSSE4()