summaryrefslogtreecommitdiff
path: root/src/zope/tal/htmltalparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/zope/tal/htmltalparser.py')
-rw-r--r--src/zope/tal/htmltalparser.py56
1 files changed, 38 insertions, 18 deletions
diff --git a/src/zope/tal/htmltalparser.py b/src/zope/tal/htmltalparser.py
index c79bbea..1761bc7 100644
--- a/src/zope/tal/htmltalparser.py
+++ b/src/zope/tal/htmltalparser.py
@@ -11,7 +11,9 @@
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
-"""Parse HTML and compile to TALInterpreter intermediate code.
+"""
+Parse HTML and compile to :class:`~.TALInterpreter` intermediate code, using
+a :class:`~.TALGenerator`.
"""
# When Python 3 becomes mainstream please swap the try and except parts.
@@ -28,6 +30,7 @@ except ImportError:
# so here's a copy taken from Python 3.4:
class HTMLParseError(Exception):
def __init__(self, msg, position=(None, None)):
+ Exception.__init__(self)
assert msg
self.msg = msg
self.lineno = position[0]
@@ -50,30 +53,30 @@ _html_parser_extras = {}
if 'convert_charrefs' in HTMLParser.__init__.__code__.co_names:
_html_parser_extras['convert_charrefs'] = False # pragma: NO COVER py34
-
+#: List of Boolean attributes in HTML that may be given in
+#: minimized form (e.g. ``<img ismap>`` rather than ``<img ismap="">``)
+#: From http://www.w3.org/TR/xhtml1/#guidelines (C.10)
BOOLEAN_HTML_ATTRS = frozenset([
- # List of Boolean attributes in HTML that may be given in
- # minimized form (e.g. <img ismap> rather than <img ismap="">)
- # From http://www.w3.org/TR/xhtml1/#guidelines (C.10)
"compact", "nowrap", "ismap", "declare", "noshade", "checked",
"disabled", "readonly", "multiple", "selected", "noresize",
"defer"
])
+#: List of HTML tags with an empty content model; these are
+#: rendered in minimized form, e.g. ``<img />``.
+#: From http://www.w3.org/TR/xhtml1/#dtds
EMPTY_HTML_TAGS = frozenset([
- # List of HTML tags with an empty content model; these are
- # rendered in minimized form, e.g. <img />.
- # From http://www.w3.org/TR/xhtml1/#dtds
"base", "meta", "link", "hr", "br", "param", "img", "area",
"input", "col", "basefont", "isindex", "frame",
])
+#: List of HTML elements that close open paragraph-level elements
+#: and are themselves paragraph-level.
PARA_LEVEL_HTML_TAGS = frozenset([
- # List of HTML elements that close open paragraph-level elements
- # and are themselves paragraph-level.
"h1", "h2", "h3", "h4", "h5", "h6", "p",
])
+#: Tags that automatically close other tags.
BLOCK_CLOSING_TAG_MAP = {
"tr": frozenset(["tr", "td", "th"]),
"td": frozenset(["td", "th"]),
@@ -83,12 +86,13 @@ BLOCK_CLOSING_TAG_MAP = {
"dt": frozenset(["dd", "dt"]),
}
+#: List of HTML tags that denote larger sections than paragraphs.
BLOCK_LEVEL_HTML_TAGS = frozenset([
- # List of HTML tags that denote larger sections than paragraphs.
"blockquote", "table", "tr", "th", "td", "thead", "tfoot", "tbody",
"noframe", "ul", "ol", "li", "dl", "dt", "dd", "div",
])
+#: Section level HTML tags
SECTION_LEVEL_HTML_TAGS = PARA_LEVEL_HTML_TAGS.union(BLOCK_LEVEL_HTML_TAGS)
TIGHTEN_IMPLICIT_CLOSE_TAGS = PARA_LEVEL_HTML_TAGS.union(BLOCK_CLOSING_TAG_MAP)
@@ -127,25 +131,37 @@ class OpenTagError(NestingError):
HTMLParseError.__init__(self, msg, position)
class HTMLTALParser(HTMLParser):
+ """
+ Parser for HTML.
+
+ After you call either :meth:`parseFile` and :meth:`parseString`
+ you can retrieve the compiled program using :meth:`getCode`.
+ """
# External API
def __init__(self, gen=None):
+ """
+ :keyword TALGenerator gen: The configured (with an expression compiler)
+ code generator to use. If one is not given, a default will be used.
+ """
HTMLParser.__init__(self, **_html_parser_extras)
if gen is None:
gen = TALGenerator(xml=0)
self.gen = gen
self.tagstack = []
self.nsstack = []
- self.nsdict = {'tal': ZOPE_TAL_NS,
- 'metal': ZOPE_METAL_NS,
- 'i18n': ZOPE_I18N_NS,
- }
+ self.nsdict = {
+ 'tal': ZOPE_TAL_NS,
+ 'metal': ZOPE_METAL_NS,
+ 'i18n': ZOPE_I18N_NS,
+ }
def parseFile(self, file):
- f = open(file)
- data = f.read()
- f.close()
+ """Parse data in the given file."""
+ with open(file) as f:
+ data = f.read()
+
try:
self.parseString(data)
except TALError as e:
@@ -153,6 +169,7 @@ class HTMLTALParser(HTMLParser):
raise
def parseString(self, data):
+ """Parse data in the given string."""
self.feed(data)
self.close()
while self.tagstack:
@@ -160,6 +177,9 @@ class HTMLTALParser(HTMLParser):
assert self.nsstack == [], self.nsstack
def getCode(self):
+ """
+ After parsing, this returns ``(program, macros)``.
+ """
return self.gen.getCode()
# Overriding HTMLParser methods