summaryrefslogtreecommitdiff
path: root/src/runtime/asm_amd64.s
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2018-03-03 14:24:54 -0800
committerKeith Randall <khr@golang.org>2018-03-04 17:49:39 +0000
commitf6332bb84ad87e958290ae23b29a2b13a41ee2a2 (patch)
treef09ef9174bee3ae86920a113318f4a322a1a98ad /src/runtime/asm_amd64.s
parent45964e4f9c950863adcaeb62fbe49f3fa913f27d (diff)
downloadgo-git-f6332bb84ad87e958290ae23b29a2b13a41ee2a2.tar.gz
internal/bytealg: move compare functions to bytealg
Move bytes.Compare and runtime·cmpstring to bytealg. Update #19792 Change-Id: I139e6d7c59686bef7a3017e3dec99eba5fd10447 Reviewed-on: https://go-review.googlesource.com/98515 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Diffstat (limited to 'src/runtime/asm_amd64.s')
-rw-r--r--src/runtime/asm_amd64.s222
1 files changed, 0 insertions, 222 deletions
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index 386307afa5..f91a01da72 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -1358,228 +1358,6 @@ DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
GLOBL shifts<>(SB),RODATA,$256
-TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
- MOVQ s1_base+0(FP), SI
- MOVQ s1_len+8(FP), BX
- MOVQ s2_base+16(FP), DI
- MOVQ s2_len+24(FP), DX
- LEAQ ret+32(FP), R9
- JMP runtime·cmpbody(SB)
-
-TEXT bytes·Compare(SB),NOSPLIT,$0-56
- MOVQ s1+0(FP), SI
- MOVQ s1+8(FP), BX
- MOVQ s2+24(FP), DI
- MOVQ s2+32(FP), DX
- LEAQ res+48(FP), R9
- JMP runtime·cmpbody(SB)
-
-// input:
-// SI = a
-// DI = b
-// BX = alen
-// DX = blen
-// R9 = address of output word (stores -1/0/1 here)
-TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
- CMPQ SI, DI
- JEQ allsame
- CMPQ BX, DX
- MOVQ DX, R8
- CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
- CMPQ R8, $8
- JB small
-
- CMPQ R8, $63
- JBE loop
- CMPB runtime·support_avx2(SB), $1
- JEQ big_loop_avx2
- JMP big_loop
-loop:
- CMPQ R8, $16
- JBE _0through16
- MOVOU (SI), X0
- MOVOU (DI), X1
- PCMPEQB X0, X1
- PMOVMSKB X1, AX
- XORQ $0xffff, AX // convert EQ to NE
- JNE diff16 // branch if at least one byte is not equal
- ADDQ $16, SI
- ADDQ $16, DI
- SUBQ $16, R8
- JMP loop
-
-diff64:
- ADDQ $48, SI
- ADDQ $48, DI
- JMP diff16
-diff48:
- ADDQ $32, SI
- ADDQ $32, DI
- JMP diff16
-diff32:
- ADDQ $16, SI
- ADDQ $16, DI
- // AX = bit mask of differences
-diff16:
- BSFQ AX, BX // index of first byte that differs
- XORQ AX, AX
- MOVB (SI)(BX*1), CX
- CMPB CX, (DI)(BX*1)
- SETHI AX
- LEAQ -1(AX*2), AX // convert 1/0 to +1/-1
- MOVQ AX, (R9)
- RET
-
- // 0 through 16 bytes left, alen>=8, blen>=8
-_0through16:
- CMPQ R8, $8
- JBE _0through8
- MOVQ (SI), AX
- MOVQ (DI), CX
- CMPQ AX, CX
- JNE diff8
-_0through8:
- MOVQ -8(SI)(R8*1), AX
- MOVQ -8(DI)(R8*1), CX
- CMPQ AX, CX
- JEQ allsame
-
- // AX and CX contain parts of a and b that differ.
-diff8:
- BSWAPQ AX // reverse order of bytes
- BSWAPQ CX
- XORQ AX, CX
- BSRQ CX, CX // index of highest bit difference
- SHRQ CX, AX // move a's bit to bottom
- ANDQ $1, AX // mask bit
- LEAQ -1(AX*2), AX // 1/0 => +1/-1
- MOVQ AX, (R9)
- RET
-
- // 0-7 bytes in common
-small:
- LEAQ (R8*8), CX // bytes left -> bits left
- NEGQ CX // - bits lift (== 64 - bits left mod 64)
- JEQ allsame
-
- // load bytes of a into high bytes of AX
- CMPB SI, $0xf8
- JA si_high
- MOVQ (SI), SI
- JMP si_finish
-si_high:
- MOVQ -8(SI)(R8*1), SI
- SHRQ CX, SI
-si_finish:
- SHLQ CX, SI
-
- // load bytes of b in to high bytes of BX
- CMPB DI, $0xf8
- JA di_high
- MOVQ (DI), DI
- JMP di_finish
-di_high:
- MOVQ -8(DI)(R8*1), DI
- SHRQ CX, DI
-di_finish:
- SHLQ CX, DI
-
- BSWAPQ SI // reverse order of bytes
- BSWAPQ DI
- XORQ SI, DI // find bit differences
- JEQ allsame
- BSRQ DI, CX // index of highest bit difference
- SHRQ CX, SI // move a's bit to bottom
- ANDQ $1, SI // mask bit
- LEAQ -1(SI*2), AX // 1/0 => +1/-1
- MOVQ AX, (R9)
- RET
-
-allsame:
- XORQ AX, AX
- XORQ CX, CX
- CMPQ BX, DX
- SETGT AX // 1 if alen > blen
- SETEQ CX // 1 if alen == blen
- LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
- MOVQ AX, (R9)
- RET
-
- // this works for >= 64 bytes of data.
-big_loop:
- MOVOU (SI), X0
- MOVOU (DI), X1
- PCMPEQB X0, X1
- PMOVMSKB X1, AX
- XORQ $0xffff, AX
- JNE diff16
-
- MOVOU 16(SI), X0
- MOVOU 16(DI), X1
- PCMPEQB X0, X1
- PMOVMSKB X1, AX
- XORQ $0xffff, AX
- JNE diff32
-
- MOVOU 32(SI), X0
- MOVOU 32(DI), X1
- PCMPEQB X0, X1
- PMOVMSKB X1, AX
- XORQ $0xffff, AX
- JNE diff48
-
- MOVOU 48(SI), X0
- MOVOU 48(DI), X1
- PCMPEQB X0, X1
- PMOVMSKB X1, AX
- XORQ $0xffff, AX
- JNE diff64
-
- ADDQ $64, SI
- ADDQ $64, DI
- SUBQ $64, R8
- CMPQ R8, $64
- JBE loop
- JMP big_loop
-
- // Compare 64-bytes per loop iteration.
- // Loop is unrolled and uses AVX2.
-big_loop_avx2:
- VMOVDQU (SI), Y2
- VMOVDQU (DI), Y3
- VMOVDQU 32(SI), Y4
- VMOVDQU 32(DI), Y5
- VPCMPEQB Y2, Y3, Y0
- VPMOVMSKB Y0, AX
- XORL $0xffffffff, AX
- JNE diff32_avx2
- VPCMPEQB Y4, Y5, Y6
- VPMOVMSKB Y6, AX
- XORL $0xffffffff, AX
- JNE diff64_avx2
-
- ADDQ $64, SI
- ADDQ $64, DI
- SUBQ $64, R8
- CMPQ R8, $64
- JB big_loop_avx2_exit
- JMP big_loop_avx2
-
- // Avoid AVX->SSE transition penalty and search first 32 bytes of 64 byte chunk.
-diff32_avx2:
- VZEROUPPER
- JMP diff16
-
- // Same as diff32_avx2, but for last 32 bytes.
-diff64_avx2:
- VZEROUPPER
- JMP diff48
-
- // For <64 bytes remainder jump to normal loop.
-big_loop_avx2_exit:
- VZEROUPPER
- JMP loop
-
TEXT strings·indexShortStr(SB),NOSPLIT,$0-40
MOVQ s+0(FP), DI
// We want len in DX and AX, because PCMPESTRI implicitly consumes them