libgo: Update to weekly.2011-12-02.

From-SVN: r182295
author: Ian Lance Taylor <ian@gcc.gnu.org> 2011-12-13 19:16:27 +0000
committer: Ian Lance Taylor <ian@gcc.gnu.org> 2011-12-13 19:16:27 +0000
commit: 7b1c3dd9e670da2041ff1af415999310f88888ad (patch)
tree: c5132538d5da85ed816c7e1f9d93c4a503b838ab /libgo/go/html/parse.go
parent: 36cfbee133027429a681ce585643d38228ab1213 (diff)
download: gcc-7b1c3dd9e670da2041ff1af415999310f88888ad.tar.gz
1 files changed, 144 insertions, 25 deletions
diff --git a/libgo/go/html/parse.go b/libgo/go/html/parse.go
index 9b7e934ac34..97fbc514d82 100644
--- a/libgo/go/html/parse.go
+++ b/libgo/go/html/parse.go
@@ -37,6 +37,11 @@ type parser struct {
 	// fosterParenting is whether new elements should be inserted according to
 	// the foster parenting rules (section 11.2.5.3).
 	fosterParenting bool
+	// quirks is whether the parser is operating in "quirks mode."
+	quirks bool
+	// context is the context element when parsing an HTML fragment
+	// (section 11.4).
+	context *Node
 }
 
 func (p *parser) top() *Node {
@@ -285,9 +290,10 @@ func (p *parser) setOriginalIM() {
 func (p *parser) resetInsertionMode() {
 	for i := len(p.oe) - 1; i >= 0; i-- {
 		n := p.oe[i]
-		if i == 0 {
-			// TODO: set n to the context element, for HTML fragment parsing.
+		if i == 0 && p.context != nil {
+			n = p.context
 		}
+
 		switch n.Data {
 		case "select":
 			p.im = inSelectIM
@@ -319,9 +325,17 @@ func (p *parser) resetInsertionMode() {
 	p.im = inBodyIM
 }
 
+const whitespace = " \t\r\n\f"
+
 // Section 11.2.5.4.1.
 func initialIM(p *parser) bool {
 	switch p.tok.Type {
+	case TextToken:
+		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
+		if len(p.tok.Data) == 0 {
+			// It was all whitespace, so ignore it.
+			return true
+		}
 	case CommentToken:
 		p.doc.Add(&Node{
 			Type: CommentNode,
@@ -329,15 +343,13 @@ func initialIM(p *parser) bool {
 		})
 		return true
 	case DoctypeToken:
-		p.doc.Add(&Node{
-			Type: DoctypeNode,
-			Data: p.tok.Data,
-		})
+		n, quirks := parseDoctype(p.tok.Data)
+		p.doc.Add(n)
+		p.quirks = quirks
 		p.im = beforeHTMLIM
 		return true
 	}
-	// TODO: set "quirks mode"? It's defined in the DOM spec instead of HTML5 proper,
-	// and so switching on "quirks mode" might belong in a different package.
+	p.quirks = true
 	p.im = beforeHTMLIM
 	return false
 }
@@ -345,6 +357,12 @@ func initialIM(p *parser) bool {
 // Section 11.2.5.4.2.
 func beforeHTMLIM(p *parser) bool {
 	switch p.tok.Type {
+	case TextToken:
+		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
+		if len(p.tok.Data) == 0 {
+			// It was all whitespace, so ignore it.
+			return true
+		}
 	case StartTagToken:
 		if p.tok.Data == "html" {
 			p.addElement(p.tok.Data, p.tok.Attr)
@@ -383,7 +401,11 @@ func beforeHeadIM(p *parser) bool {
 	case ErrorToken:
 		implied = true
 	case TextToken:
-		// TODO: distinguish whitespace text from others.
+		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
+		if len(p.tok.Data) == 0 {
+			// It was all whitespace, so ignore it.
+			return true
+		}
 		implied = true
 	case StartTagToken:
 		switch p.tok.Data {
@@ -417,8 +439,6 @@ func beforeHeadIM(p *parser) bool {
 	return !implied
 }
 
-const whitespace = " \t\r\n\f"
-
 // Section 11.2.5.4.4.
 func inHeadIM(p *parser) bool {
 	var (
@@ -441,6 +461,8 @@ func inHeadIM(p *parser) bool {
 		implied = true
 	case StartTagToken:
 		switch p.tok.Data {
+		case "html":
+			return inBodyIM(p)
 		case "base", "basefont", "bgsound", "command", "link", "meta":
 			p.addElement(p.tok.Data, p.tok.Attr)
 			p.oe.pop()
@@ -450,6 +472,9 @@ func inHeadIM(p *parser) bool {
 			p.setOriginalIM()
 			p.im = textIM
 			return true
+		case "head":
+			// Ignore the token.
+			return true
 		default:
 			implied = true
 		}
@@ -560,11 +585,30 @@ func copyAttributes(dst *Node, src Token) {
 func inBodyIM(p *parser) bool {
 	switch p.tok.Type {
 	case TextToken:
+		switch n := p.oe.top(); n.Data {
+		case "pre", "listing", "textarea":
+			if len(n.Child) == 0 {
+				// Ignore a newline at the start of a <pre> block.
+				d := p.tok.Data
+				if d != "" && d[0] == '\r' {
+					d = d[1:]
+				}
+				if d != "" && d[0] == '\n' {
+					d = d[1:]
+				}
+				if d == "" {
+					return true
+				}
+				p.tok.Data = d
+			}
+		}
 		p.reconstructActiveFormattingElements()
 		p.addText(p.tok.Data)
 		p.framesetOK = false
 	case StartTagToken:
 		switch p.tok.Data {
+		case "html":
+			copyAttributes(p.oe[0], p.tok)
 		case "address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu", "nav", "ol", "p", "section", "summary", "ul":
 			p.popUntil(buttonScopeStopTags, "p")
 			p.addElement(p.tok.Data, p.tok.Attr)
@@ -589,6 +633,13 @@ func inBodyIM(p *parser) bool {
 		case "b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u":
 			p.reconstructActiveFormattingElements()
 			p.addFormattingElement(p.tok.Data, p.tok.Attr)
+		case "nobr":
+			p.reconstructActiveFormattingElements()
+			if p.elementInScope(defaultScopeStopTags, "nobr") {
+				p.inBodyEndTagFormatting("nobr")
+				p.reconstructActiveFormattingElements()
+			}
+			p.addFormattingElement(p.tok.Data, p.tok.Attr)
 		case "applet", "marquee", "object":
 			p.reconstructActiveFormattingElements()
 			p.addElement(p.tok.Data, p.tok.Attr)
@@ -601,7 +652,9 @@ func inBodyIM(p *parser) bool {
 			p.acknowledgeSelfClosingTag()
 			p.framesetOK = false
 		case "table":
-			p.popUntil(buttonScopeStopTags, "p") // TODO: skip this step in quirks mode.
+			if !p.quirks {
+				p.popUntil(buttonScopeStopTags, "p")
+			}
 			p.addElement(p.tok.Data, p.tok.Attr)
 			p.framesetOK = false
 			p.im = inTableIM
@@ -721,6 +774,11 @@ func inBodyIM(p *parser) bool {
 			p.oe.pop()
 			p.oe.pop()
 			p.form = nil
+		case "xmp":
+			p.popUntil(buttonScopeStopTags, "p")
+			p.reconstructActiveFormattingElements()
+			p.framesetOK = false
+			p.addElement(p.tok.Data, p.tok.Attr)
 		case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
 			// Ignore the token.
 		default:
@@ -1462,18 +1520,7 @@ func afterAfterFramesetIM(p *parser) bool {
 	return true
 }
 
-// Parse returns the parse tree for the HTML from the given Reader.
-// The input is assumed to be UTF-8 encoded.
-func Parse(r io.Reader) (*Node, error) {
-	p := &parser{
-		tokenizer: NewTokenizer(r),
-		doc: &Node{
-			Type: DocumentNode,
-		},
-		scripting:  true,
-		framesetOK: true,
-		im:         initialIM,
-	}
+func (p *parser) parse() error {
 	// Iterate until EOF. Any other error will cause an early return.
 	consumed := true
 	for {
@@ -1482,7 +1529,7 @@ func Parse(r io.Reader) (*Node, error) {
 				if err == io.EOF {
 					break
 				}
-				return nil, err
+				return err
 			}
 		}
 		consumed = p.im(p)
@@ -1493,5 +1540,77 @@ func Parse(r io.Reader) (*Node, error) {
 			break
 		}
 	}
+	return nil
+}
+
+// Parse returns the parse tree for the HTML from the given Reader.
+// The input is assumed to be UTF-8 encoded.
+func Parse(r io.Reader) (*Node, error) {
+	p := &parser{
+		tokenizer: NewTokenizer(r),
+		doc: &Node{
+			Type: DocumentNode,
+		},
+		scripting:  true,
+		framesetOK: true,
+		im:         initialIM,
+	}
+	err := p.parse()
+	if err != nil {
+		return nil, err
+	}
 	return p.doc, nil
 }
+
+// ParseFragment parses a fragment of HTML and returns the nodes that were 
+// found. If the fragment is the InnerHTML for an existing element, pass that
+// element in context.
+func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
+	p := &parser{
+		tokenizer: NewTokenizer(r),
+		doc: &Node{
+			Type: DocumentNode,
+		},
+		scripting: true,
+		context:   context,
+	}
+
+	if context != nil {
+		switch context.Data {
+		case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "title", "textarea", "xmp":
+			p.tokenizer.rawTag = context.Data
+		}
+	}
+
+	root := &Node{
+		Type: ElementNode,
+		Data: "html",
+	}
+	p.doc.Add(root)
+	p.oe = nodeStack{root}
+	p.resetInsertionMode()
+
+	for n := context; n != nil; n = n.Parent {
+		if n.Type == ElementNode && n.Data == "form" {
+			p.form = n
+			break
+		}
+	}
+
+	err := p.parse()
+	if err != nil {
+		return nil, err
+	}
+
+	parent := p.doc
+	if context != nil {
+		parent = root
+	}
+
+	result := parent.Child
+	parent.Child = nil
+	for _, n := range result {
+		n.Parent = nil
+	}
+	return result, nil
+}
author	Ian Lance Taylor <ian@gcc.gnu.org>	2011-12-13 19:16:27 +0000
committer	Ian Lance Taylor <ian@gcc.gnu.org>	2011-12-13 19:16:27 +0000
commit	7b1c3dd9e670da2041ff1af415999310f88888ad (patch)
tree	c5132538d5da85ed816c7e1f9d93c4a503b838ab /libgo/go/html/parse.go
parent	36cfbee133027429a681ce585643d38228ab1213 (diff)
download	gcc-7b1c3dd9e670da2041ff1af415999310f88888ad.tar.gz