diff options
Diffstat (limited to 'libgo/go/html/token_test.go')
-rw-r--r-- | libgo/go/html/token_test.go | 285 |
1 files changed, 258 insertions, 27 deletions
diff --git a/libgo/go/html/token_test.go b/libgo/go/html/token_test.go index 0a0beb201b3..310cd97d670 100644 --- a/libgo/go/html/token_test.go +++ b/libgo/go/html/token_test.go @@ -21,6 +21,11 @@ type tokenTest struct { } var tokenTests = []tokenTest{ + { + "empty", + "", + "", + }, // A single text node. The tokenizer should not break text nodes on whitespace, // nor should it normalize whitespace within a text node. { @@ -41,6 +46,88 @@ var tokenTests = []tokenTest{ "<a>b<c/>d</e>", "<a>$b$<c/>$d$</e>", }, + // Angle brackets that aren't a tag. + { + "not a tag #0", + "<", + "<", + }, + { + "not a tag #1", + "</", + "</", + }, + { + "not a tag #2", + "</>", + "", + }, + { + "not a tag #3", + "a</>b", + "a$b", + }, + { + "not a tag #4", + "</ >", + "<!-- -->", + }, + { + "not a tag #5", + "</.", + "<!--.-->", + }, + { + "not a tag #6", + "</.>", + "<!--.-->", + }, + { + "not a tag #7", + "a < b", + "a < b", + }, + { + "not a tag #8", + "<.>", + "<.>", + }, + { + "not a tag #9", + "a<<<b>>>c", + "a<<$<b>$>>c", + }, + { + "not a tag #10", + "if x<0 and y < 0 then x*y>0", + "if x<0 and y < 0 then x*y>0", + }, + // EOF in a tag name. + { + "tag name eof #0", + "<a", + "", + }, + { + "tag name eof #1", + "<a ", + "", + }, + { + "tag name eof #2", + "a<b", + "a", + }, + { + "tag name eof #3", + "<a><b", + "<a>", + }, + { + "tag name eof #4", + `<a x`, + `<a x="">`, + }, // Some malformed tags that are missing a '>'. { "malformed tag #0", @@ -54,70 +141,198 @@ var tokenTests = []tokenTest{ }, { "malformed tag #2", + `<p id`, + `<p id="">`, + }, + { + "malformed tag #3", + `<p id=`, + `<p id="">`, + }, + { + "malformed tag #4", + `<p id=>`, + `<p id="">`, + }, + { + "malformed tag #5", + `<p id=0`, + `<p id="0">`, + }, + { + "malformed tag #6", `<p id=0</p>`, `<p id="0</p">`, }, { - "malformed tag #3", + "malformed tag #7", `<p id="0</p>`, `<p id="0</p>">`, }, { - "malformed tag #4", + "malformed tag #8", `<p id="0"</p>`, `<p id="0" <="" p="">`, }, + // Raw text and RCDATA. + { + "basic raw text", + "<script><a></b></script>", + "<script>$<a></b>$</script>", + }, + { + "unfinished script end tag", + "<SCRIPT>a</SCR", + "<script>$a</SCR", + }, + { + "broken script end tag", + "<SCRIPT>a</SCR ipt>", + "<script>$a</SCR ipt>", + }, + { + "EOF in script end tag", + "<SCRIPT>a</SCRipt", + "<script>$a</SCRipt", + }, + { + "scriptx end tag", + "<SCRIPT>a</SCRiptx", + "<script>$a</SCRiptx", + }, + { + "' ' completes script end tag", + "<SCRIPT>a</SCRipt ", + "<script>$a$</script>", + }, + { + "'>' completes script end tag", + "<SCRIPT>a</SCRipt>", + "<script>$a$</script>", + }, + { + "self-closing script end tag", + "<SCRIPT>a</SCRipt/>", + "<script>$a$</script>", + }, + { + "nested script tag", + "<SCRIPT>a</SCRipt<script>", + "<script>$a</SCRipt<script>", + }, + { + "script end tag after unfinished", + "<SCRIPT>a</SCRipt</script>", + "<script>$a</SCRipt$</script>", + }, + { + "script/style mismatched tags", + "<script>a</style>", + "<script>$a</style>", + }, + { + "style element with entity", + "<style>'", + "<style>$&apos;", + }, + { + "textarea with tag", + "<textarea><div></textarea>", + "<textarea>$<div>$</textarea>", + }, + { + "title with tag and entity", + "<title><b>K&R C</b></title>", + "<title>$<b>K&R C</b>$</title>", + }, + // DOCTYPE tests. + { + "Proper DOCTYPE", + "<!DOCTYPE html>", + "<!DOCTYPE html>", + }, + { + "DOCTYPE with no space", + "<!doctypehtml>", + "<!DOCTYPE html>", + }, + { + "DOCTYPE with two spaces", + "<!doctype html>", + "<!DOCTYPE html>", + }, + { + "looks like DOCTYPE but isn't", + "<!DOCUMENT html>", + "<!--DOCUMENT html-->", + }, + { + "DOCTYPE at EOF", + "<!DOCtype", + "<!DOCTYPE >", + }, + // XML processing instructions. + { + "XML processing instruction", + "<?xml?>", + "<!--?xml?-->", + }, // Comments. { "comment0", "abc<b><!-- skipme --></b>def", - "abc$<b>$</b>$def", + "abc$<b>$<!-- skipme -->$</b>$def", }, { "comment1", "a<!-->z", - "a$z", + "a$<!---->$z", }, { "comment2", "a<!--->z", - "a$z", + "a$<!---->$z", }, { "comment3", "a<!--x>-->z", - "a$z", + "a$<!--x>-->$z", }, { "comment4", "a<!--x->-->z", - "a$z", + "a$<!--x->-->$z", }, { "comment5", "a<!>z", - "a$<!>z", + "a$<!---->$z", }, { "comment6", "a<!->z", - "a$<!->z", + "a$<!----->$z", }, { "comment7", "a<!---<>z", - "a$<!---<>z", + "a$<!---<>z-->", }, { "comment8", "a<!--z", - "a$<!--z", + "a$<!--z-->", + }, + { + "comment9", + "a<!--x--!>z", + "a$<!--x-->$z", }, // An attribute with a backslash. { "backslash", `<p id="a\"b">`, - `<p id="a"b">`, + `<p id="a\" b"="">`, }, // Entities, tag name and attribute key lower-casing, and whitespace // normalization within a tag. @@ -133,11 +348,14 @@ var tokenTests = []tokenTest{ `<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`, `<a b="c&noSuchEntity;d">$<&alsoDoesntExist;&`, }, - { - "entity without semicolon", - `¬it;∉<a b="q=z&=5¬ice=hello¬=world">`, - `¬it;∉$<a b="q=z&amp=5&notice=hello¬=world">`, - }, + /* + // TODO: re-enable this test when it works. This input/output matches html5lib's behavior. + { + "entity without semicolon", + `¬it;∉<a b="q=z&=5¬ice=hello¬=world">`, + `¬it;∉$<a b="q=z&amp=5&notice=hello¬=world">`, + }, + */ { "entity with digits", "½", @@ -190,21 +408,34 @@ var tokenTests = []tokenTest{ `<meta http-equiv="content-type">`, `<meta http-equiv="content-type">`, }, + { + "Mixed attributes", + `a<P V="0 1" w='2' X=3 y>z`, + `a$<p v="0 1" w="2" x="3" y="">$z`, + }, + { + "Attributes with a solitary single quote", + `<p id=can't><p id=won't>`, + `<p id="can't">$<p id="won't">`, + }, } func TestTokenizer(t *testing.T) { loop: for _, tt := range tokenTests { - z := NewTokenizer(bytes.NewBuffer([]byte(tt.html))) - for i, s := range strings.Split(tt.golden, "$") { - if z.Next() == ErrorToken { - t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Error()) - continue loop - } - actual := z.Token().String() - if s != actual { - t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual) - continue loop + z := NewTokenizer(strings.NewReader(tt.html)) + z.ReturnComments = true + if tt.golden != "" { + for i, s := range strings.Split(tt.golden, "$") { + if z.Next() == ErrorToken { + t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Error()) + continue loop + } + actual := z.Token().String() + if s != actual { + t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual) + continue loop + } } } z.Next() |