markdown_extensions/toc.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

"""
Table of Contents Extension for Python-Markdown
* * *

(c) 2008 [Jack Miller](http://codezen.org)

Dependencies:
* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/)

"""
import markdown
from markdown import etree
import re

class TocPostprocessor (markdown.Postprocessor):
    # Iterator wrapper to get parent and child all at once
    def iterparent(self, root):
        for parent in root.getiterator():
            for child in parent:
                yield parent, child

    def run(self, doc) :
        div = etree.Element("div")
        div.attrib["class"] = "toc"
        last_li = None

        # Add title to the div
        if self.config["title"][0]:
            header = etree.SubElement(div, "span")
            header.attrib["class"] = "toctitle"
            header.text = self.config["title"][0]

        level = 0
        list_stack=[div]
        header_rgx = re.compile("[Hh][123456]")

        for (p, c) in self.iterparent(doc):
            if not c.text:
                continue

            # To keep the output from screwing up the
            # validation by putting a <div> inside of a <p>
            # we actually replace the <p> in its entirety.

            if c.text.find(self.config["marker"][0]) > -1:
                for i in range(len(p)):
                    if p[i] == c:
                        p[i] = div
                        break
                    
            if header_rgx.match(c.tag):
                tag_level = int(c.tag[-1])
                
                # Regardless of how many levels we jumped
                # only one list should be created, since
                # empty lists containing lists are illegal.
    
                if tag_level < level:
                    list_stack.pop()
                    level = tag_level

                if tag_level > level:
                    newlist = etree.Element("ul")
                    if last_li:
                        last_li.append(newlist)
                    else:
                        list_stack[-1].append(newlist)
                    list_stack.append(newlist)
                    level = tag_level

                # Do not override pre-existing ids 
                if c.attrib.has_key("id"):
                    id = c.attrib["id"]
                else:
                    id = self.config["slugify"][0](c.text)

                # List item link, to be inserted into the toc div
                last_li = etree.Element("li")
                link = etree.SubElement(last_li, "a")
                link.text = c.text
                link.attrib["href"] = '#' + id

                if int(self.config["anchorlink"][0]):
                    anchor = etree.SubElement(c, "a")
                    anchor.text = c.text
                    anchor.attrib["id"] = id
                    anchor.attrib["href"] = "#" + id
                    anchor.attrib["class"] = "toclink"
                    c.text = ""
                else:
                    c.attrib["id"] = id

                list_stack[-1].append(last_li)

class TocExtension (markdown.Extension):
    def __init__(self, configs):
        self.config = { "marker" : ["[TOC]", 
                            "Text to find and replace with Table of Contents -"
                            "Defaults to \"[TOC]\""],
                        "slugify" : [self.slugify,
                            "Function to generate anchors based on header text-"
                            "Defaults to a built in slugify function."],
                        "title" : [None,
                            "Title to insert into TOC <div> - "
                            "Defaults to None"],
                        "anchorlink" : [0,
                            "1 if header should be a self link"
                            "Defaults to 0"]}

        for key, value in configs:
            self.setConfig(key, value)

    # This is exactly the same as Django's slugify
    def slugify(self, value):
        """ Slugify a string, to make it URL friendly. """
        import unicodedata
        value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
        value = unicode(re.sub('[^\w\s-]', '', value).strip().lower())
        return re.sub('[-\s]+','-',value)

    def extendMarkdown(self, md, md_globals) :
        tocext = TocPostprocessor(md)
        tocext.config = self.config
        md.postprocessors.add("toc", tocext, "_begin")
	
def makeExtension(configs={}) :
    return TocExtension(configs=configs)