| e4396d6 by Yuri Takhteyev at 2008-10-17 |
1 |
""" |
|
2 |
Table of Contents Extension for Python-Markdown |
|
3 |
* * * |
|
4 |
|
|
5 |
(c) 2008 [Jack Miller](http://codezen.org) |
|
6 |
|
|
7 |
Dependencies: |
|
8 |
* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/) |
|
9 |
|
|
10 |
""" |
|
11 |
import markdown |
| 2eb00c8 by Toshio Kuratomi at 2010-07-06 |
12 |
from markdown.util import etree |
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
13 |
import re |
|
14 |
|
| d624305 by Waylan Limberg at 2008-11-21 |
15 |
class TocTreeprocessor(markdown.treeprocessors.Treeprocessor): |
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
16 |
# Iterator wrapper to get parent and child all at once |
|
17 |
def iterparent(self, root): |
|
18 |
for parent in root.getiterator(): |
|
19 |
for child in parent: |
|
20 |
yield parent, child |
|
21 |
|
| d624305 by Waylan Limberg at 2008-11-21 |
22 |
def run(self, doc): |
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
23 |
div = etree.Element("div") |
|
24 |
div.attrib["class"] = "toc" |
|
25 |
last_li = None |
|
26 |
|
|
27 |
# Add title to the div |
|
28 |
if self.config["title"][0]: |
|
29 |
header = etree.SubElement(div, "span") |
|
30 |
header.attrib["class"] = "toctitle" |
|
31 |
header.text = self.config["title"][0] |
|
32 |
|
|
33 |
level = 0 |
|
34 |
list_stack=[div] |
|
35 |
header_rgx = re.compile("[Hh][123456]") |
|
36 |
|
| e5fcf1b by Jack Miller at 2008-10-29 |
37 |
# Get a list of id attributes |
|
38 |
used_ids = [] |
|
39 |
for c in doc.getiterator(): |
| 50783a6 by Yuri Takhteyev at 2008-12-04 |
40 |
if "id" in c.attrib: |
| e5fcf1b by Jack Miller at 2008-10-29 |
41 |
used_ids.append(c.attrib["id"]) |
|
42 |
|
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
43 |
for (p, c) in self.iterparent(doc): |
|
44 |
if not c.text: |
|
45 |
continue |
|
46 |
|
|
47 |
# To keep the output from screwing up the |
|
48 |
# validation by putting a <div> inside of a <p> |
|
49 |
# we actually replace the <p> in its entirety. |
| e4508b2 by Waylan Limberg at 2009-03-19 |
50 |
# We do not allow the marker inside a header as that |
|
51 |
# would causes an enless loop of placing a new TOC |
|
52 |
# inside previously generated TOC. |
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
53 |
|
| e4508b2 by Waylan Limberg at 2009-03-19 |
54 |
if c.text.find(self.config["marker"][0]) > -1 and not header_rgx.match(c.tag): |
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
55 |
for i in range(len(p)): |
|
56 |
if p[i] == c: |
|
57 |
p[i] = div |
|
58 |
break |
|
59 |
|
|
60 |
if header_rgx.match(c.tag): |
|
61 |
tag_level = int(c.tag[-1]) |
|
62 |
|
| 93dddf9 by Waylan Limberg at 2009-07-22 |
63 |
while tag_level < level: |
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
64 |
list_stack.pop() |
| 93dddf9 by Waylan Limberg at 2009-07-22 |
65 |
level -= 1 |
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
66 |
|
|
67 |
if tag_level > level: |
|
68 |
newlist = etree.Element("ul") |
|
69 |
if last_li: |
|
70 |
last_li.append(newlist) |
|
71 |
else: |
|
72 |
list_stack[-1].append(newlist) |
|
73 |
list_stack.append(newlist) |
| 3a89552 by Steve Losh at 2009-12-25 |
74 |
if level == 0: |
|
75 |
level = tag_level |
|
76 |
else: |
|
77 |
level += 1 |
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
78 |
|
|
79 |
# Do not override pre-existing ids |
| 50783a6 by Yuri Takhteyev at 2008-12-04 |
80 |
if not "id" in c.attrib: |
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
81 |
id = self.config["slugify"][0](c.text) |
| e5fcf1b by Jack Miller at 2008-10-29 |
82 |
if id in used_ids: |
|
83 |
ctr = 1 |
|
84 |
while "%s_%d" % (id, ctr) in used_ids: |
|
85 |
ctr += 1 |
|
86 |
id = "%s_%d" % (id, ctr) |
|
87 |
used_ids.append(id) |
|
88 |
c.attrib["id"] = id |
|
89 |
else: |
|
90 |
id = c.attrib["id"] |
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
91 |
|
|
92 |
# List item link, to be inserted into the toc div |
|
93 |
last_li = etree.Element("li") |
|
94 |
link = etree.SubElement(last_li, "a") |
|
95 |
link.text = c.text |
|
96 |
link.attrib["href"] = '#' + id |
|
97 |
|
|
98 |
if int(self.config["anchorlink"][0]): |
|
99 |
anchor = etree.SubElement(c, "a") |
|
100 |
anchor.text = c.text |
|
101 |
anchor.attrib["href"] = "#" + id |
|
102 |
anchor.attrib["class"] = "toclink" |
|
103 |
c.text = "" |
|
104 |
|
|
105 |
list_stack[-1].append(last_li) |
|
106 |
|
| d624305 by Waylan Limberg at 2008-11-21 |
107 |
class TocExtension(markdown.Extension): |
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
108 |
def __init__(self, configs): |
|
109 |
self.config = { "marker" : ["[TOC]", |
|
110 |
"Text to find and replace with Table of Contents -" |
|
111 |
"Defaults to \"[TOC]\""], |
|
112 |
"slugify" : [self.slugify, |
|
113 |
"Function to generate anchors based on header text-" |
|
114 |
"Defaults to a built in slugify function."], |
|
115 |
"title" : [None, |
|
116 |
"Title to insert into TOC <div> - " |
|
117 |
"Defaults to None"], |
|
118 |
"anchorlink" : [0, |
|
119 |
"1 if header should be a self link" |
|
120 |
"Defaults to 0"]} |
|
121 |
|
|
122 |
for key, value in configs: |
|
123 |
self.setConfig(key, value) |
|
124 |
|
|
125 |
# This is exactly the same as Django's slugify |
|
126 |
def slugify(self, value): |
|
127 |
""" Slugify a string, to make it URL friendly. """ |
|
128 |
import unicodedata |
|
129 |
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') |
|
130 |
value = unicode(re.sub('[^\w\s-]', '', value).strip().lower()) |
|
131 |
return re.sub('[-\s]+','-',value) |
|
132 |
|
| d624305 by Waylan Limberg at 2008-11-21 |
133 |
def extendMarkdown(self, md, md_globals): |
| 15224bd by Waylan Limberg at 2008-10-20 |
134 |
tocext = TocTreeprocessor(md) |
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
135 |
tocext.config = self.config |
| 15224bd by Waylan Limberg at 2008-10-20 |
136 |
md.treeprocessors.add("toc", tocext, "_begin") |
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
137 |
|
| d624305 by Waylan Limberg at 2008-11-21 |
138 |
def makeExtension(configs={}): |
| e4396d6 by Yuri Takhteyev at 2008-10-17 |
139 |
return TocExtension(configs=configs) |