diff options
author | Keith Randall <khr@golang.org> | 2018-03-03 14:24:54 -0800 |
---|---|---|
committer | Keith Randall <khr@golang.org> | 2018-03-04 17:49:39 +0000 |
commit | f6332bb84ad87e958290ae23b29a2b13a41ee2a2 (patch) | |
tree | f09ef9174bee3ae86920a113318f4a322a1a98ad /src/runtime/asm_amd64.s | |
parent | 45964e4f9c950863adcaeb62fbe49f3fa913f27d (diff) | |
download | go-git-f6332bb84ad87e958290ae23b29a2b13a41ee2a2.tar.gz |
internal/bytealg: move compare functions to bytealg
Move bytes.Compare and runtime·cmpstring to bytealg.
Update #19792
Change-Id: I139e6d7c59686bef7a3017e3dec99eba5fd10447
Reviewed-on: https://go-review.googlesource.com/98515
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Diffstat (limited to 'src/runtime/asm_amd64.s')
-rw-r--r-- | src/runtime/asm_amd64.s | 222 |
1 files changed, 0 insertions, 222 deletions
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index 386307afa5..f91a01da72 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -1358,228 +1358,6 @@ DATA shifts<>+0xf0(SB)/8, $0x0807060504030201 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09 GLOBL shifts<>(SB),RODATA,$256 -TEXT runtime·cmpstring(SB),NOSPLIT,$0-40 - MOVQ s1_base+0(FP), SI - MOVQ s1_len+8(FP), BX - MOVQ s2_base+16(FP), DI - MOVQ s2_len+24(FP), DX - LEAQ ret+32(FP), R9 - JMP runtime·cmpbody(SB) - -TEXT bytes·Compare(SB),NOSPLIT,$0-56 - MOVQ s1+0(FP), SI - MOVQ s1+8(FP), BX - MOVQ s2+24(FP), DI - MOVQ s2+32(FP), DX - LEAQ res+48(FP), R9 - JMP runtime·cmpbody(SB) - -// input: -// SI = a -// DI = b -// BX = alen -// DX = blen -// R9 = address of output word (stores -1/0/1 here) -TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 - CMPQ SI, DI - JEQ allsame - CMPQ BX, DX - MOVQ DX, R8 - CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare - CMPQ R8, $8 - JB small - - CMPQ R8, $63 - JBE loop - CMPB runtime·support_avx2(SB), $1 - JEQ big_loop_avx2 - JMP big_loop -loop: - CMPQ R8, $16 - JBE _0through16 - MOVOU (SI), X0 - MOVOU (DI), X1 - PCMPEQB X0, X1 - PMOVMSKB X1, AX - XORQ $0xffff, AX // convert EQ to NE - JNE diff16 // branch if at least one byte is not equal - ADDQ $16, SI - ADDQ $16, DI - SUBQ $16, R8 - JMP loop - -diff64: - ADDQ $48, SI - ADDQ $48, DI - JMP diff16 -diff48: - ADDQ $32, SI - ADDQ $32, DI - JMP diff16 -diff32: - ADDQ $16, SI - ADDQ $16, DI - // AX = bit mask of differences -diff16: - BSFQ AX, BX // index of first byte that differs - XORQ AX, AX - MOVB (SI)(BX*1), CX - CMPB CX, (DI)(BX*1) - SETHI AX - LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 - MOVQ AX, (R9) - RET - - // 0 through 16 bytes left, alen>=8, blen>=8 -_0through16: - CMPQ R8, $8 - JBE _0through8 - MOVQ (SI), AX - MOVQ (DI), CX - CMPQ AX, CX - JNE diff8 -_0through8: - MOVQ -8(SI)(R8*1), AX - MOVQ -8(DI)(R8*1), CX - CMPQ AX, CX - JEQ allsame - - // AX and CX contain parts of a and b that differ. -diff8: - BSWAPQ AX // reverse order of bytes - BSWAPQ CX - XORQ AX, CX - BSRQ CX, CX // index of highest bit difference - SHRQ CX, AX // move a's bit to bottom - ANDQ $1, AX // mask bit - LEAQ -1(AX*2), AX // 1/0 => +1/-1 - MOVQ AX, (R9) - RET - - // 0-7 bytes in common -small: - LEAQ (R8*8), CX // bytes left -> bits left - NEGQ CX // - bits lift (== 64 - bits left mod 64) - JEQ allsame - - // load bytes of a into high bytes of AX - CMPB SI, $0xf8 - JA si_high - MOVQ (SI), SI - JMP si_finish -si_high: - MOVQ -8(SI)(R8*1), SI - SHRQ CX, SI -si_finish: - SHLQ CX, SI - - // load bytes of b in to high bytes of BX - CMPB DI, $0xf8 - JA di_high - MOVQ (DI), DI - JMP di_finish -di_high: - MOVQ -8(DI)(R8*1), DI - SHRQ CX, DI -di_finish: - SHLQ CX, DI - - BSWAPQ SI // reverse order of bytes - BSWAPQ DI - XORQ SI, DI // find bit differences - JEQ allsame - BSRQ DI, CX // index of highest bit difference - SHRQ CX, SI // move a's bit to bottom - ANDQ $1, SI // mask bit - LEAQ -1(SI*2), AX // 1/0 => +1/-1 - MOVQ AX, (R9) - RET - -allsame: - XORQ AX, AX - XORQ CX, CX - CMPQ BX, DX - SETGT AX // 1 if alen > blen - SETEQ CX // 1 if alen == blen - LEAQ -1(CX)(AX*2), AX // 1,0,-1 result - MOVQ AX, (R9) - RET - - // this works for >= 64 bytes of data. -big_loop: - MOVOU (SI), X0 - MOVOU (DI), X1 - PCMPEQB X0, X1 - PMOVMSKB X1, AX - XORQ $0xffff, AX - JNE diff16 - - MOVOU 16(SI), X0 - MOVOU 16(DI), X1 - PCMPEQB X0, X1 - PMOVMSKB X1, AX - XORQ $0xffff, AX - JNE diff32 - - MOVOU 32(SI), X0 - MOVOU 32(DI), X1 - PCMPEQB X0, X1 - PMOVMSKB X1, AX - XORQ $0xffff, AX - JNE diff48 - - MOVOU 48(SI), X0 - MOVOU 48(DI), X1 - PCMPEQB X0, X1 - PMOVMSKB X1, AX - XORQ $0xffff, AX - JNE diff64 - - ADDQ $64, SI - ADDQ $64, DI - SUBQ $64, R8 - CMPQ R8, $64 - JBE loop - JMP big_loop - - // Compare 64-bytes per loop iteration. - // Loop is unrolled and uses AVX2. -big_loop_avx2: - VMOVDQU (SI), Y2 - VMOVDQU (DI), Y3 - VMOVDQU 32(SI), Y4 - VMOVDQU 32(DI), Y5 - VPCMPEQB Y2, Y3, Y0 - VPMOVMSKB Y0, AX - XORL $0xffffffff, AX - JNE diff32_avx2 - VPCMPEQB Y4, Y5, Y6 - VPMOVMSKB Y6, AX - XORL $0xffffffff, AX - JNE diff64_avx2 - - ADDQ $64, SI - ADDQ $64, DI - SUBQ $64, R8 - CMPQ R8, $64 - JB big_loop_avx2_exit - JMP big_loop_avx2 - - // Avoid AVX->SSE transition penalty and search first 32 bytes of 64 byte chunk. -diff32_avx2: - VZEROUPPER - JMP diff16 - - // Same as diff32_avx2, but for last 32 bytes. -diff64_avx2: - VZEROUPPER - JMP diff48 - - // For <64 bytes remainder jump to normal loop. -big_loop_avx2_exit: - VZEROUPPER - JMP loop - TEXT strings·indexShortStr(SB),NOSPLIT,$0-40 MOVQ s+0(FP), DI // We want len in DX and AX, because PCMPESTRI implicitly consumes them |