1 files changed, 379 insertions, 58 deletions
diff --git a/libgo/go/strings/replace_test.go b/libgo/go/strings/replace_test.go
index 23c7e2e5334..d33dea95b09 100644
--- a/libgo/go/strings/replace_test.go
+++ b/libgo/go/strings/replace_test.go
@@ -7,105 +7,390 @@ package strings_test
 import (
 	"bytes"
 	"fmt"
-	"log"
 	. "strings"
 	"testing"
 )
 
-var _ = log.Printf
-
-type ReplacerTest struct {
-	r   *Replacer
-	in  string
-	out string
-}
+var htmlEscaper = NewReplacer(
+	"&", "&amp;",
+	"<", "&lt;",
+	">", "&gt;",
+	`"`, "&quot;",
+	"'", "&apos;",
+)
 
-var htmlEscaper = NewReplacer("&", "&amp;", "<", "&lt;", ">", "&gt;", "\"", "&quot;")
+var htmlUnescaper = NewReplacer(
+	"&amp;", "&",
+	"&lt;", "<",
+	"&gt;", ">",
+	"&quot;", `"`,
+	"&apos;", "'",
+)
 
 // The http package's old HTML escaping function.
-func oldhtmlEscape(s string) string {
+func oldHTMLEscape(s string) string {
 	s = Replace(s, "&", "&amp;", -1)
 	s = Replace(s, "<", "&lt;", -1)
 	s = Replace(s, ">", "&gt;", -1)
-	s = Replace(s, "\"", "&quot;", -1)
+	s = Replace(s, `"`, "&quot;", -1)
 	s = Replace(s, "'", "&apos;", -1)
 	return s
 }
 
-var replacer = NewReplacer("aaa", "3[aaa]", "aa", "2[aa]", "a", "1[a]", "i", "i",
-	"longerst", "most long", "longer", "medium", "long", "short",
-	"X", "Y", "Y", "Z")
-
 var capitalLetters = NewReplacer("a", "A", "b", "B")
 
-var blankToXReplacer = NewReplacer("", "X", "o", "O")
+// TestReplacer tests the replacer implementations.
+func TestReplacer(t *testing.T) {
+	type testCase struct {
+		r       *Replacer
+		in, out string
+	}
+	var testCases []testCase
 
-var ReplacerTests = []ReplacerTest{
-	// byte->string
-	{htmlEscaper, "No changes", "No changes"},
-	{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
-	{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
+	// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
+	str := func(b byte) string {
+		return string([]byte{b})
+	}
+	var s []string
 
-	// generic
-	{replacer, "fooaaabar", "foo3[aaa]b1[a]r"},
-	{replacer, "long, longerst, longer", "short, most long, medium"},
-	{replacer, "XiX", "YiY"},
+	// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
+	s = nil
+	for i := 0; i < 256; i++ {
+		s = append(s, str(byte(i)), str(byte(i+1)))
+	}
+	inc := NewReplacer(s...)
 
-	// byte->byte
-	{capitalLetters, "brad", "BrAd"},
-	{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
+	// Test cases with 1-byte old strings, 1-byte new strings.
+	testCases = append(testCases,
+		testCase{capitalLetters, "brad", "BrAd"},
+		testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
+		testCase{capitalLetters, "", ""},
 
-	// hitting "" special case
-	{blankToXReplacer, "oo", "XOXOX"},
-}
+		testCase{inc, "brad", "csbe"},
+		testCase{inc, "\x00\xff", "\x01\x00"},
+		testCase{inc, "", ""},
 
-func TestReplacer(t *testing.T) {
-	for i, tt := range ReplacerTests {
-		if s := tt.r.Replace(tt.in); s != tt.out {
-			t.Errorf("%d. Replace(%q) = %q, want %q", i, tt.in, s, tt.out)
+		testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
+	)
+
+	// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
+	s = nil
+	for i := 0; i < 256; i++ {
+		n := i + 1 - 'a'
+		if n < 1 {
+			n = 1
+		}
+		s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
+	}
+	repeat := NewReplacer(s...)
+
+	// Test cases with 1-byte old strings, variable length new strings.
+	testCases = append(testCases,
+		testCase{htmlEscaper, "No changes", "No changes"},
+		testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
+		testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
+		testCase{htmlEscaper, "", ""},
+
+		testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
+		testCase{repeat, "abba", "abbbba"},
+		testCase{repeat, "", ""},
+
+		testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
+	)
+
+	// The remaining test cases have variable length old strings.
+
+	testCases = append(testCases,
+		testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
+		testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
+		testCase{htmlUnescaper, "", ""},
+
+		testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
+
+		testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
+
+		testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
+	)
+
+	// gen1 has multiple old strings of variable length. There is no
+	// overall non-empty common prefix, but some pairwise common prefixes.
+	gen1 := NewReplacer(
+		"aaa", "3[aaa]",
+		"aa", "2[aa]",
+		"a", "1[a]",
+		"i", "i",
+		"longerst", "most long",
+		"longer", "medium",
+		"long", "short",
+		"xx", "xx",
+		"x", "X",
+		"X", "Y",
+		"Y", "Z",
+	)
+	testCases = append(testCases,
+		testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
+		testCase{gen1, "long, longerst, longer", "short, most long, medium"},
+		testCase{gen1, "xxxxx", "xxxxX"},
+		testCase{gen1, "XiX", "YiY"},
+		testCase{gen1, "", ""},
+	)
+
+	// gen2 has multiple old strings with no pairwise common prefix.
+	gen2 := NewReplacer(
+		"roses", "red",
+		"violets", "blue",
+		"sugar", "sweet",
+	)
+	testCases = append(testCases,
+		testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
+		testCase{gen2, "", ""},
+	)
+
+	// gen3 has multiple old strings with an overall common prefix.
+	gen3 := NewReplacer(
+		"abracadabra", "poof",
+		"abracadabrakazam", "splat",
+		"abraham", "lincoln",
+		"abrasion", "scrape",
+		"abraham", "isaac",
+	)
+	testCases = append(testCases,
+		testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
+		testCase{gen3, "abrasion abracad", "scrape abracad"},
+		testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
+		testCase{gen3, "", ""},
+	)
+
+	// foo{1,2,3,4} have multiple old strings with an overall common prefix
+	// and 1- or 2- byte extensions from the common prefix.
+	foo1 := NewReplacer(
+		"foo1", "A",
+		"foo2", "B",
+		"foo3", "C",
+	)
+	foo2 := NewReplacer(
+		"foo1", "A",
+		"foo2", "B",
+		"foo31", "C",
+		"foo32", "D",
+	)
+	foo3 := NewReplacer(
+		"foo11", "A",
+		"foo12", "B",
+		"foo31", "C",
+		"foo32", "D",
+	)
+	foo4 := NewReplacer(
+		"foo12", "B",
+		"foo32", "D",
+	)
+	testCases = append(testCases,
+		testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
+		testCase{foo1, "", ""},
+
+		testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
+		testCase{foo2, "", ""},
+
+		testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
+		testCase{foo3, "", ""},
+
+		testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
+		testCase{foo4, "", ""},
+	)
+
+	// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
+	allBytes := make([]byte, 256)
+	for i := range allBytes {
+		allBytes[i] = byte(i)
+	}
+	allString := string(allBytes)
+	genAll := NewReplacer(
+		allString, "[all]",
+		"\xff", "[ff]",
+		"\x00", "[00]",
+	)
+	testCases = append(testCases,
+		testCase{genAll, allString, "[all]"},
+		testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
+		testCase{genAll, "", ""},
+	)
+
+	// Test cases with empty old strings.
+
+	blankToX1 := NewReplacer("", "X")
+	blankToX2 := NewReplacer("", "X", "", "")
+	blankHighPriority := NewReplacer("", "X", "o", "O")
+	blankLowPriority := NewReplacer("o", "O", "", "X")
+	blankNoOp1 := NewReplacer("", "")
+	blankNoOp2 := NewReplacer("", "", "", "A")
+	blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
+	testCases = append(testCases,
+		testCase{blankToX1, "foo", "XfXoXoX"},
+		testCase{blankToX1, "", "X"},
+
+		testCase{blankToX2, "foo", "XfXoXoX"},
+		testCase{blankToX2, "", "X"},
+
+		testCase{blankHighPriority, "oo", "XOXOX"},
+		testCase{blankHighPriority, "ii", "XiXiX"},
+		testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
+		testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
+		testCase{blankHighPriority, "", "X"},
+
+		testCase{blankLowPriority, "oo", "OOX"},
+		testCase{blankLowPriority, "ii", "XiXiX"},
+		testCase{blankLowPriority, "oiio", "OXiXiOX"},
+		testCase{blankLowPriority, "iooi", "XiOOXiX"},
+		testCase{blankLowPriority, "", "X"},
+
+		testCase{blankNoOp1, "foo", "foo"},
+		testCase{blankNoOp1, "", ""},
+
+		testCase{blankNoOp2, "foo", "foo"},
+		testCase{blankNoOp2, "", ""},
+
+		testCase{blankFoo, "foobarfoobaz", "XRXZX"},
+		testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
+		testCase{blankFoo, "", "X"},
+	)
+
+	// single string replacer
+
+	abcMatcher := NewReplacer("abc", "[match]")
+
+	testCases = append(testCases,
+		testCase{abcMatcher, "", ""},
+		testCase{abcMatcher, "ab", "ab"},
+		testCase{abcMatcher, "abcd", "[match]d"},
+		testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
+	)
+
+	// No-arg test cases.
+
+	nop := NewReplacer()
+	testCases = append(testCases,
+		testCase{nop, "abc", "abc"},
+		testCase{nop, "", ""},
+	)
+
+	// Run the test cases.
+
+	for i, tc := range testCases {
+		if s := tc.r.Replace(tc.in); s != tc.out {
+			t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
 		}
 		var buf bytes.Buffer
-		n, err := tt.r.WriteString(&buf, tt.in)
+		n, err := tc.r.WriteString(&buf, tc.in)
 		if err != nil {
 			t.Errorf("%d. WriteString: %v", i, err)
 			continue
 		}
 		got := buf.String()
-		if got != tt.out {
-			t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tt.in, got, tt.out)
+		if got != tc.out {
+			t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
 			continue
 		}
-		if n != len(tt.out) {
+		if n != len(tc.out) {
 			t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
-				i, tt.in, n, len(tt.out), tt.out)
+				i, tc.in, n, len(tc.out), tc.out)
 		}
 	}
 }
 
-// pickAlgorithmTest is a test that verifies that given input for a
-// Replacer that we pick the correct algorithm.
-type pickAlgorithmTest struct {
-	r    *Replacer
-	want string // name of algorithm
+// TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
+func TestPickAlgorithm(t *testing.T) {
+	testCases := []struct {
+		r    *Replacer
+		want string
+	}{
+		{capitalLetters, "*strings.byteReplacer"},
+		{htmlEscaper, "*strings.byteStringReplacer"},
+		{NewReplacer("12", "123"), "*strings.singleStringReplacer"},
+		{NewReplacer("1", "12"), "*strings.byteStringReplacer"},
+		{NewReplacer("", "X"), "*strings.genericReplacer"},
+		{NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
+	}
+	for i, tc := range testCases {
+		got := fmt.Sprintf("%T", tc.r.Replacer())
+		if got != tc.want {
+			t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
+		}
+	}
 }
 
-var pickAlgorithmTests = []pickAlgorithmTest{
-	{capitalLetters, "*strings.byteReplacer"},
-	{NewReplacer("12", "123"), "*strings.genericReplacer"},
-	{NewReplacer("1", "12"), "*strings.byteStringReplacer"},
-	{htmlEscaper, "*strings.byteStringReplacer"},
-}
+// TestGenericTrieBuilding verifies the structure of the generated trie. There
+// is one node per line, and the key ending with the current line is in the
+// trie if it ends with a "+".
+func TestGenericTrieBuilding(t *testing.T) {
+	testCases := []struct{ in, out string }{
+		{"abc;abdef;abdefgh;xx;xy;z", `-
+			a-
+			.b-
+			..c+
+			..d-
+			...ef+
+			.....gh+
+			x-
+			.x+
+			.y+
+			z+
+			`},
+		{"abracadabra;abracadabrakazam;abraham;abrasion", `-
+			a-
+			.bra-
+			....c-
+			.....adabra+
+			...........kazam+
+			....h-
+			.....am+
+			....s-
+			.....ion+
+			`},
+		{"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
+			X+
+			Y+
+			a+
+			.a+
+			..a+
+			i+
+			l-
+			.ong+
+			....er+
+			......st+
+			x+
+			.x+
+			`},
+		{"foo;;foo;foo1", `+
+			f-
+			.oo+
+			...1+
+			`},
+	}
 
-func TestPickAlgorithm(t *testing.T) {
-	for i, tt := range pickAlgorithmTests {
-		got := fmt.Sprintf("%T", tt.r.Replacer())
-		if got != tt.want {
-			t.Errorf("%d. algorithm = %s, want %s", i, got, tt.want)
+	for _, tc := range testCases {
+		keys := Split(tc.in, ";")
+		args := make([]string, len(keys)*2)
+		for i, key := range keys {
+			args[i*2] = key
+		}
+
+		got := NewReplacer(args...).PrintTrie()
+		// Remove tabs from tc.out
+		wantbuf := make([]byte, 0, len(tc.out))
+		for i := 0; i < len(tc.out); i++ {
+			if tc.out[i] != '\t' {
+				wantbuf = append(wantbuf, tc.out[i])
+			}
+		}
+		want := string(wantbuf)
+
+		if got != want {
+			t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
 		}
 	}
 }
 
-func BenchmarkGenericMatch(b *testing.B) {
+func BenchmarkGenericNoMatch(b *testing.B) {
 	str := Repeat("A", 100) + Repeat("B", 100)
 	generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
 	for i := 0; i < b.N; i++ {
@@ -113,6 +398,42 @@ func BenchmarkGenericMatch(b *testing.B) {
 	}
 }
 
+func BenchmarkGenericMatch1(b *testing.B) {
+	str := Repeat("a", 100) + Repeat("b", 100)
+	generic := NewReplacer("a", "A", "b", "B", "12", "123")
+	for i := 0; i < b.N; i++ {
+		generic.Replace(str)
+	}
+}
+
+func BenchmarkGenericMatch2(b *testing.B) {
+	str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
+	for i := 0; i < b.N; i++ {
+		htmlUnescaper.Replace(str)
+	}
+}
+
+func benchmarkSingleString(b *testing.B, pattern, text string) {
+	r := NewReplacer(pattern, "[match]")
+	b.SetBytes(int64(len(text)))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		r.Replace(text)
+	}
+}
+
+func BenchmarkSingleMaxSkipping(b *testing.B) {
+	benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
+}
+
+func BenchmarkSingleLongSuffixFail(b *testing.B) {
+	benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
+}
+
+func BenchmarkSingleMatch(b *testing.B) {
+	benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
+}
+
 func BenchmarkByteByteNoMatch(b *testing.B) {
 	str := Repeat("A", 100) + Repeat("B", 100)
 	for i := 0; i < b.N; i++ {
@@ -144,7 +465,7 @@ func BenchmarkHTMLEscapeNew(b *testing.B) {
 func BenchmarkHTMLEscapeOld(b *testing.B) {
 	str := "I <3 to escape HTML & other text too."
 	for i := 0; i < b.N; i++ {
-		oldhtmlEscape(str)
+		oldHTMLEscape(str)
 	}
 }