summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWaylan Limberg <waylan@gmail.com>2011-12-14 21:59:21 -0500
committerWaylan Limberg <waylan@gmail.com>2011-12-14 21:59:21 -0500
commit790aeb56053940f299d600d8d364fc3fc8151085 (patch)
treef9c84fdafccf4ada2bdfdfb26cb178ac3b0bf072
parent2e3830b9bf3a0829db7a5d04f731b5019d28cc65 (diff)
downloadpython-markdown-slowhtml.tar.gz
Reverted rawhtmlblock preprocessor to the pre-attr state as it was too slow.slowhtml
-rw-r--r--markdown/preprocessors.py133
1 files changed, 19 insertions, 114 deletions
diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py
index b59f057..f4a33b3 100644
--- a/markdown/preprocessors.py
+++ b/markdown/preprocessors.py
@@ -44,71 +44,20 @@ class HtmlBlockPreprocessor(Preprocessor):
"""Remove html blocks from the text and store them for later retrieval."""
right_tag_patterns = ["</%s>", "%s>"]
- attrs_pattern = r"""
- \s+(?P<attr>[^>"'/= ]+)=(?P<q>['"])(?P<value>.*?)(?P=q) # attr="value"
- | # OR
- \s+(?P<attr1>[^>"'/= ]+)=(?P<value1>[^> ]+) # attr=value
- | # OR
- \s+(?P<attr2>[^>"'/= ]+) # attr
- """
- left_tag_pattern = r'^\<(?P<tag>[^> ]+)(?P<attrs>(%s)*)\s*\/?\>?' % attrs_pattern
- attrs_re = re.compile(attrs_pattern, re.VERBOSE)
- left_tag_re = re.compile(left_tag_pattern, re.VERBOSE)
- markdown_in_raw = False
def _get_left_tag(self, block):
- m = self.left_tag_re.match(block)
- if m:
- tag = m.group('tag')
- raw_attrs = m.group('attrs')
- attrs = {}
- if raw_attrs:
- for ma in self.attrs_re.finditer(raw_attrs):
- if ma.group('attr'):
- if ma.group('value'):
- attrs[ma.group('attr').strip()] = ma.group('value')
- else:
- attrs[ma.group('attr').strip()] = ""
- elif ma.group('attr1'):
- if ma.group('value1'):
- attrs[ma.group('attr1').strip()] = ma.group('value1')
- else:
- attrs[ma.group('attr1').strip()] = ""
- elif ma.group('attr2'):
- attrs[ma.group('attr2').strip()] = ""
- return tag, len(m.group(0)), attrs
- else:
- tag = block[1:].replace(">", " ", 1).split()[0].lower()
- return tag, len(tag)+2, {}
-
- def _recursive_tagfind(self, ltag, rtag, start_index, block):
- while 1:
- i = block.find(rtag, start_index)
- if i == -1:
- return -1
- j = block.find(ltag, start_index)
- # if no ltag, or rtag found before another ltag, return index
- if (j > i or j == -1):
- return i + len(rtag)
- # another ltag found before rtag, use end of ltag as starting
- # point and search again
- j = block.find('>', j)
- start_index = self._recursive_tagfind(ltag, rtag, j + 1, block)
- if start_index == -1:
- # HTML potentially malformed- ltag has no corresponding
- # rtag
- return -1
-
- def _get_right_tag(self, left_tag, left_index, block):
+ return block[1:].replace(">", " ", 1).split()[0].lower()
+
+ def _get_right_tag(self, left_tag, block):
for p in self.right_tag_patterns:
tag = p % left_tag
- i = self._recursive_tagfind("<%s" % left_tag, tag, left_index, block)
+ i = block.rfind(tag)
if i > 2:
- return tag.lstrip("<").rstrip(">"), i
- return block.rstrip()[-left_index:-1].lower(), len(block)
-
+ return tag.lstrip("<").rstrip(">"), i + len(p)-2 + len(left_tag)
+ return block.rstrip()[-len(left_tag)-2:-1].lower(), len(block)
+
def _equal_tags(self, left_tag, right_tag):
- if left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
+ if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
return True
if ("/" + left_tag) == right_tag:
return True
@@ -142,18 +91,14 @@ class HtmlBlockPreprocessor(Preprocessor):
block = block[1:]
if not in_tag:
- if block.startswith("<") and len(block.strip()) > 1:
- left_tag, left_index, attrs = self._get_left_tag(block)
- right_tag, data_index = self._get_right_tag(left_tag,
- left_index,
- block)
+ if block.startswith("<"):
+ left_tag = self._get_left_tag(block)
+ right_tag, data_index = self._get_right_tag(left_tag, block)
if block[1] == "!":
# is a comment block
left_tag = "--"
- right_tag, data_index = self._get_right_tag(left_tag,
- left_index,
- block)
+ right_tag, data_index = self._get_right_tag(left_tag, block)
# keep checking conditions below and maybe just append
if data_index < len(block) \
@@ -172,21 +117,10 @@ class HtmlBlockPreprocessor(Preprocessor):
if block.rstrip().endswith(">") \
and self._equal_tags(left_tag, right_tag):
- if self.markdown_in_raw and 'markdown' in attrs.keys():
- start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',
- '', block[:left_index])
- end = block[-len(right_tag)-2:]
- block = block[left_index:-len(right_tag)-2]
- new_blocks.append(
- self.markdown.htmlStash.store(start))
- new_blocks.append(block)
- new_blocks.append(
- self.markdown.htmlStash.store(end))
- else:
- new_blocks.append(
- self.markdown.htmlStash.store(block.strip()))
+ new_blocks.append(
+ self.markdown.htmlStash.store(block.strip()))
continue
- else:
+ else: #if not block[1] == "!":
# if is block level tag and is not complete
if util.isBlockLevel(left_tag) or left_tag == "--" \
@@ -204,46 +138,17 @@ class HtmlBlockPreprocessor(Preprocessor):
else:
items.append(block)
- right_tag, data_index = self._get_right_tag(left_tag,
- left_index,
- block)
+ right_tag, data_index = self._get_right_tag(left_tag, block)
if self._equal_tags(left_tag, right_tag):
# if find closing tag
in_tag = False
- if self.markdown_in_raw and 'markdown' in attrs.keys():
- start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',
- '', items[0][:left_index])
- items[0] = items[0][left_index:]
- end = items[-1][-len(right_tag)-2:]
- items[-1] = items[-1][:-len(right_tag)-2]
- new_blocks.append(
- self.markdown.htmlStash.store(start))
- new_blocks.extend(items)
- new_blocks.append(
- self.markdown.htmlStash.store(end))
- else:
- new_blocks.append(
- self.markdown.htmlStash.store('\n\n'.join(items)))
+ new_blocks.append(
+ self.markdown.htmlStash.store('\n\n'.join(items)))
items = []
if items:
- if self.markdown_in_raw and 'markdown' in attrs.keys():
- start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?',
- '', items[0][:left_index])
- items[0] = items[0][left_index:]
- end = items[-1][-len(right_tag)-2:]
- items[-1] = items[-1][:-len(right_tag)-2]
- new_blocks.append(
- self.markdown.htmlStash.store(start))
- new_blocks.extend(items)
- if end.strip():
- new_blocks.append(
- self.markdown.htmlStash.store(end))
- else:
- new_blocks.append(
- self.markdown.htmlStash.store('\n\n'.join(items)))
- #new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))
+ new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))
new_blocks.append('\n')
new_text = "\n\n".join(new_blocks)