diff options
| author | Ilya Tocar <ilya.tocar@intel.com> | 2015-09-14 18:42:39 +0300 |
|---|---|---|
| committer | Brad Fitzpatrick <bradfitz@golang.org> | 2015-10-03 15:55:08 +0000 |
| commit | 37cfb2e07e9e9e8b11f21ea462856aeb1f6ec0c0 (patch) | |
| tree | 29c52602253546a41b3c2c062789ab573844c366 /src/math | |
| parent | acc90c53e8b5448afee8455ee7c4917af25c6bc9 (diff) | |
| download | go-git-37cfb2e07e9e9e8b11f21ea462856aeb1f6ec0c0.tar.gz | |
math: optimize ceil/floor functions on amd64
Use SSE 4.1 rounding instruction to perform rounding
Results (haswell):
name old time/op new time/op delta
Floor-48 2.71ns ± 0% 1.87ns ± 1% -31.17% (p=0.000 n=16+19)
Ceil-48 3.09ns ± 3% 2.16ns ± 0% -30.16% (p=0.000 n=19+12)
Change-Id: If63715879eed6530b1eb4fc96132d827f8f43909
Reviewed-on: https://go-review.googlesource.com/14561
Reviewed-by: Klaus Post <klauspost@gmail.com>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/math')
| -rw-r--r-- | src/math/floor_amd64.s | 23 | ||||
| -rw-r--r-- | src/math/floor_asm.go | 12 |
2 files changed, 35 insertions, 0 deletions
diff --git a/src/math/floor_amd64.s b/src/math/floor_amd64.s index 67b7cdec04..7f512e7c22 100644 --- a/src/math/floor_amd64.s +++ b/src/math/floor_amd64.s @@ -6,8 +6,25 @@ #define Big 0x4330000000000000 // 2**52 +// func hasSSE4() bool +// returns whether SSE4.1 is supported +TEXT ·hasSSE4(SB),NOSPLIT,$0 + XORQ AX, AX + INCL AX + CPUID + SHRQ $19, CX + ANDQ $1, CX + MOVB CX, ret+0(FP) + RET + // func Floor(x float64) float64 TEXT ·Floor(SB),NOSPLIT,$0 + CMPB math·useSSE4(SB), $1 + JNE nosse4 + ROUNDSD $1, x+0(FP), X0 + MOVQ X0, ret+8(FP) + RET +nosse4: MOVQ x+0(FP), AX MOVQ $~(1<<63), DX // sign bit mask ANDQ AX,DX // DX = |x| @@ -30,6 +47,12 @@ isBig_floor: // func Ceil(x float64) float64 TEXT ·Ceil(SB),NOSPLIT,$0 + CMPB math·useSSE4(SB), $1 + JNE nosse4 + ROUNDSD $2, x+0(FP), X0 + MOVQ X0, ret+8(FP) + RET +nosse4: MOVQ x+0(FP), AX MOVQ $~(1<<63), DX // sign bit mask MOVQ AX, BX // BX = copy of x diff --git a/src/math/floor_asm.go b/src/math/floor_asm.go new file mode 100644 index 0000000000..28e56a5d51 --- /dev/null +++ b/src/math/floor_asm.go @@ -0,0 +1,12 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64 amd64p32 + +package math + +//defined in floor_amd64.s +func hasSSE4() bool + +var useSSE4 = hasSSE4() |
