internal/bytealg: move compare functions to bytealg

Move bytes.Compare and runtime·cmpstring to bytealg. Update #19792 Change-Id: I139e6d7c59686bef7a3017e3dec99eba5fd10447 Reviewed-on: https://go-review.googlesource.com/98515 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
author: Keith Randall <khr@golang.org> 2018-03-03 14:24:54 -0800
committer: Keith Randall <khr@golang.org> 2018-03-04 17:49:39 +0000
commit: f6332bb84ad87e958290ae23b29a2b13a41ee2a2 (patch)
tree: f09ef9174bee3ae86920a113318f4a322a1a98ad /src/runtime/asm_amd64.s
parent: 45964e4f9c950863adcaeb62fbe49f3fa913f27d (diff)
download: go-git-f6332bb84ad87e958290ae23b29a2b13a41ee2a2.tar.gz
1 files changed, 0 insertions, 222 deletions
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index 386307afa5..f91a01da72 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -1358,228 +1358,6 @@ DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
 GLOBL shifts<>(SB),RODATA,$256
 
-TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
-	MOVQ	s1_base+0(FP), SI
-	MOVQ	s1_len+8(FP), BX
-	MOVQ	s2_base+16(FP), DI
-	MOVQ	s2_len+24(FP), DX
-	LEAQ	ret+32(FP), R9
-	JMP	runtime·cmpbody(SB)
-
-TEXT bytes·Compare(SB),NOSPLIT,$0-56
-	MOVQ	s1+0(FP), SI
-	MOVQ	s1+8(FP), BX
-	MOVQ	s2+24(FP), DI
-	MOVQ	s2+32(FP), DX
-	LEAQ	res+48(FP), R9
-	JMP	runtime·cmpbody(SB)
-
-// input:
-//   SI = a
-//   DI = b
-//   BX = alen
-//   DX = blen
-//   R9 = address of output word (stores -1/0/1 here)
-TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
-	CMPQ	SI, DI
-	JEQ	allsame
-	CMPQ	BX, DX
-	MOVQ	DX, R8
-	CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
-	CMPQ	R8, $8
-	JB	small
-
-	CMPQ	R8, $63
-	JBE	loop
-	CMPB    runtime·support_avx2(SB), $1
-	JEQ     big_loop_avx2
-	JMP	big_loop
-loop:
-	CMPQ	R8, $16
-	JBE	_0through16
-	MOVOU	(SI), X0
-	MOVOU	(DI), X1
-	PCMPEQB X0, X1
-	PMOVMSKB X1, AX
-	XORQ	$0xffff, AX	// convert EQ to NE
-	JNE	diff16	// branch if at least one byte is not equal
-	ADDQ	$16, SI
-	ADDQ	$16, DI
-	SUBQ	$16, R8
-	JMP	loop
-	
-diff64:
-	ADDQ	$48, SI
-	ADDQ	$48, DI
-	JMP	diff16
-diff48:
-	ADDQ	$32, SI
-	ADDQ	$32, DI
-	JMP	diff16
-diff32:
-	ADDQ	$16, SI
-	ADDQ	$16, DI
-	// AX = bit mask of differences
-diff16:
-	BSFQ	AX, BX	// index of first byte that differs
-	XORQ	AX, AX
-	MOVB	(SI)(BX*1), CX
-	CMPB	CX, (DI)(BX*1)
-	SETHI	AX
-	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
-	MOVQ	AX, (R9)
-	RET
-
-	// 0 through 16 bytes left, alen>=8, blen>=8
-_0through16:
-	CMPQ	R8, $8
-	JBE	_0through8
-	MOVQ	(SI), AX
-	MOVQ	(DI), CX
-	CMPQ	AX, CX
-	JNE	diff8
-_0through8:
-	MOVQ	-8(SI)(R8*1), AX
-	MOVQ	-8(DI)(R8*1), CX
-	CMPQ	AX, CX
-	JEQ	allsame
-
-	// AX and CX contain parts of a and b that differ.
-diff8:
-	BSWAPQ	AX	// reverse order of bytes
-	BSWAPQ	CX
-	XORQ	AX, CX
-	BSRQ	CX, CX	// index of highest bit difference
-	SHRQ	CX, AX	// move a's bit to bottom
-	ANDQ	$1, AX	// mask bit
-	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
-	MOVQ	AX, (R9)
-	RET
-
-	// 0-7 bytes in common
-small:
-	LEAQ	(R8*8), CX	// bytes left -> bits left
-	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
-	JEQ	allsame
-
-	// load bytes of a into high bytes of AX
-	CMPB	SI, $0xf8
-	JA	si_high
-	MOVQ	(SI), SI
-	JMP	si_finish
-si_high:
-	MOVQ	-8(SI)(R8*1), SI
-	SHRQ	CX, SI
-si_finish:
-	SHLQ	CX, SI
-
-	// load bytes of b in to high bytes of BX
-	CMPB	DI, $0xf8
-	JA	di_high
-	MOVQ	(DI), DI
-	JMP	di_finish
-di_high:
-	MOVQ	-8(DI)(R8*1), DI
-	SHRQ	CX, DI
-di_finish:
-	SHLQ	CX, DI
-
-	BSWAPQ	SI	// reverse order of bytes
-	BSWAPQ	DI
-	XORQ	SI, DI	// find bit differences
-	JEQ	allsame
-	BSRQ	DI, CX	// index of highest bit difference
-	SHRQ	CX, SI	// move a's bit to bottom
-	ANDQ	$1, SI	// mask bit
-	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
-	MOVQ	AX, (R9)
-	RET
-
-allsame:
-	XORQ	AX, AX
-	XORQ	CX, CX
-	CMPQ	BX, DX
-	SETGT	AX	// 1 if alen > blen
-	SETEQ	CX	// 1 if alen == blen
-	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
-	MOVQ	AX, (R9)
-	RET
-
-	// this works for >= 64 bytes of data.
-big_loop:
-	MOVOU	(SI), X0
-	MOVOU	(DI), X1
-	PCMPEQB X0, X1
-	PMOVMSKB X1, AX
-	XORQ	$0xffff, AX
-	JNE	diff16
-
-	MOVOU	16(SI), X0
-	MOVOU	16(DI), X1
-	PCMPEQB X0, X1
-	PMOVMSKB X1, AX
-	XORQ	$0xffff, AX
-	JNE	diff32
-
-	MOVOU	32(SI), X0
-	MOVOU	32(DI), X1
-	PCMPEQB X0, X1
-	PMOVMSKB X1, AX
-	XORQ	$0xffff, AX
-	JNE	diff48
-
-	MOVOU	48(SI), X0
-	MOVOU	48(DI), X1
-	PCMPEQB X0, X1
-	PMOVMSKB X1, AX
-	XORQ	$0xffff, AX
-	JNE	diff64
-
-	ADDQ	$64, SI
-	ADDQ	$64, DI
-	SUBQ	$64, R8
-	CMPQ	R8, $64
-	JBE	loop
-	JMP	big_loop
-
-	// Compare 64-bytes per loop iteration.
-	// Loop is unrolled and uses AVX2.
-big_loop_avx2:
-	VMOVDQU	(SI), Y2
-	VMOVDQU	(DI), Y3
-	VMOVDQU	32(SI), Y4
-	VMOVDQU	32(DI), Y5
-	VPCMPEQB Y2, Y3, Y0
-	VPMOVMSKB Y0, AX
-	XORL	$0xffffffff, AX
-	JNE	diff32_avx2
-	VPCMPEQB Y4, Y5, Y6
-	VPMOVMSKB Y6, AX
-	XORL	$0xffffffff, AX
-	JNE	diff64_avx2
-
-	ADDQ	$64, SI
-	ADDQ	$64, DI
-	SUBQ	$64, R8
-	CMPQ	R8, $64
-	JB	big_loop_avx2_exit
-	JMP	big_loop_avx2
-
-	// Avoid AVX->SSE transition penalty and search first 32 bytes of 64 byte chunk.
-diff32_avx2:
-	VZEROUPPER
-	JMP diff16
-
-	// Same as diff32_avx2, but for last 32 bytes.
-diff64_avx2:
-	VZEROUPPER
-	JMP diff48
-
-	// For <64 bytes remainder jump to normal loop.
-big_loop_avx2_exit:
-	VZEROUPPER
-	JMP loop
-
 TEXT strings·indexShortStr(SB),NOSPLIT,$0-40
 	MOVQ s+0(FP), DI
 	// We want len in DX and AX, because PCMPESTRI implicitly consumes them
author	Keith Randall <khr@golang.org>	2018-03-03 14:24:54 -0800
committer	Keith Randall <khr@golang.org>	2018-03-04 17:49:39 +0000
commit	f6332bb84ad87e958290ae23b29a2b13a41ee2a2 (patch)
tree	f09ef9174bee3ae86920a113318f4a322a1a98ad /src/runtime/asm_amd64.s
parent	45964e4f9c950863adcaeb62fbe49f3fa913f27d (diff)
download	go-git-f6332bb84ad87e958290ae23b29a2b13a41ee2a2.tar.gz