1
#!/usr/bin/python
2
3
"""
4
CodeHilite Extension for Python-Markdown
5
========================================
6
7
Adds code/syntax highlighting to standard Python-Markdown code blocks.
8
9
Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/).
10
11
Project website: <http://www.freewisdom.org/project/python-markdown/CodeHilite>
12
Contact: markdown@freewisdom.org
13
14
License: BSD (see ../docs/LICENSE for details)
15
16
Dependencies:
17
* [Python 2.3+](http://python.org/)
18
* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/)
19
* [Pygments](http://pygments.org/)
20
21
"""
22
23
import markdown
24
25
# ------------------ The Main CodeHilite Class ----------------------
26
class CodeHilite:
27
    """
28
    Determine language of source code, and pass it into the pygments hilighter.
29
30
    Basic Usage:
31
        >>> code = CodeHilite(src = 'some text')
32
        >>> html = code.hilite()
33
34
    * src: Source string or any object with a .readline attribute.
35
36
    * linenos: (Boolen) Turn line numbering 'on' or 'off' (off by default).
37
38
    * css_class: Set class name of wrapper div ('codehilite' by default).
39
40
    Low Level Usage:
41
        >>> code = CodeHilite()
42
        >>> code.src = 'some text' # String or anything with a .readline attr.
43
        >>> code.linenos = True  # True or False; Turns line numbering on or of.
44
        >>> html = code.hilite()
45
46
    """
47
48
    def __init__(self, src=None, linenos=False, css_class="codehilite",
49
                lang=None, style='default', noclasses=False, tab_length=4):
50
        self.src = src
51
        self.lang = lang
52
        self.linenos = linenos
53
        self.css_class = css_class
54
        self.style = style
55
        self.noclasses = noclasses
56
        self.tab_length = tab_length
57
58
    def hilite(self):
59
        """
60
        Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with
61
        optional line numbers. The output should then be styled with css to
62
        your liking. No styles are applied by default - only styling hooks
63
        (i.e.: <span class="k">).
64
65
        returns : A string of html.
66
67
        """
68
69
        self.src = self.src.strip('\n')
70
71
        if self.lang == None:
72
            self._getLang()
73
74
        try:
75
            from pygments import highlight
76
            from pygments.lexers import get_lexer_by_name, guess_lexer, \
77
                                        TextLexer
78
            from pygments.formatters import HtmlFormatter
79
        except ImportError:
80
            # just escape and pass through
81
            txt = self._escape(self.src)
82
            if self.linenos:
83
                txt = self._number(txt)
84
            else :
85
                txt = '<div class="%s"><pre>%s</pre></div>\n'% \
86
                        (self.css_class, txt)
87
            return txt
88
        else:
89
            try:
90
                lexer = get_lexer_by_name(self.lang)
91
            except ValueError:
92
                try:
93
                    lexer = guess_lexer(self.src)
94
                except ValueError:
95
                    lexer = TextLexer()
96
            formatter = HtmlFormatter(linenos=self.linenos,
97
                                      cssclass=self.css_class,
98
                                      style=self.style,
99
                                      noclasses=self.noclasses)
100
            return highlight(self.src, lexer, formatter)
101
102
    def _escape(self, txt):
103
        """ basic html escaping """
104
        txt = txt.replace('&', '&amp;')
105
        txt = txt.replace('<', '&lt;')
106
        txt = txt.replace('>', '&gt;')
107
        txt = txt.replace('"', '&quot;')
108
        return txt
109
110
    def _number(self, txt):
111
        """ Use <ol> for line numbering """
112
        # Fix Whitespace
113
        txt = txt.replace('\t', ' '*self.tab_length)
114
        txt = txt.replace(" "*4, "&nbsp; &nbsp; ")
115
        txt = txt.replace(" "*3, "&nbsp; &nbsp;")
116
        txt = txt.replace(" "*2, "&nbsp; ")
117
118
        # Add line numbers
119
        lines = txt.splitlines()
120
        txt = '<div class="codehilite"><pre><ol>\n'
121
        for line in lines:
122
            txt += '\t<li>%s</li>\n'% line
123
        txt += '</ol></pre></div>\n'
124
        return txt
125
126
127
    def _getLang(self):
128
        """
129
        Determines language of a code block from shebang lines and whether said
130
        line should be removed or left in place. If the sheband line contains a
131
        path (even a single /) then it is assumed to be a real shebang lines and
132
        left alone. However, if no path is given (e.i.: #!python or :::python)
133
        then it is assumed to be a mock shebang for language identifitation of a
134
        code fragment and removed from the code block prior to processing for
135
        code highlighting. When a mock shebang (e.i: #!python) is found, line
136
        numbering is turned on. When colons are found in place of a shebang
137
        (e.i.: :::python), line numbering is left in the current state - off
138
        by default.
139
140
        """
141
142
        import re
143
144
        #split text into lines
145
        lines = self.src.split("\n")
146
        #pull first line to examine
147
        fl = lines.pop(0)
148
149
        c = re.compile(r'''
150
            (?:(?:::+)|(?P<shebang>[#]!))	# Shebang or 2 or more colons.
151
            (?P<path>(?:/\w+)*[/ ])?        # Zero or 1 path
152
            (?P<lang>[\w+-]*)               # The language
153
            ''',  re.VERBOSE)
154
        # search first line for shebang
155
        m = c.search(fl)
156
        if m:
157
            # we have a match
158
            try:
159
                self.lang = m.group('lang').lower()
160
            except IndexError:
161
                self.lang = None
162
            if m.group('path'):
163
                # path exists - restore first line
164
                lines.insert(0, fl)
165
            if m.group('shebang'):
166
                # shebang exists - use line numbers
167
                self.linenos = True
168
        else:
169
            # No match
170
            lines.insert(0, fl)
171
172
        self.src = "\n".join(lines).strip("\n")
173
174
175
176
# ------------------ The Markdown Extension -------------------------------
177
class HiliteTreeprocessor(markdown.treeprocessors.Treeprocessor):
178
    """ Hilight source code in code blocks. """
179
180
    def run(self, root):
181
        """ Find code blocks and store in htmlStash. """
182
        blocks = root.getiterator('pre')
183
        for block in blocks:
184
            children = block.getchildren()
185
            if len(children) == 1 and children[0].tag == 'code':
186
                code = CodeHilite(children[0].text,
187
                            linenos=self.config['force_linenos'][0],
188
                            css_class=self.config['css_class'][0],
189
                            style=self.config['pygments_style'][0],
190
                            noclasses=self.config['noclasses'][0],
191
                            tab_length=self.markdown.tab_length)
192
                placeholder = self.markdown.htmlStash.store(code.hilite(),
193
                                                            safe=True)
194
                # Clear codeblock in etree instance
195
                block.clear()
196
                # Change to p element which will later
197
                # be removed when inserting raw html
198
                block.tag = 'p'
199
                block.text = placeholder
200
201
202
class CodeHiliteExtension(markdown.Extension):
203
    """ Add source code hilighting to markdown codeblocks. """
204
205
    def __init__(self, configs):
206
        # define default configs
207
        self.config = {
208
            'force_linenos' : [False, "Force line numbers - Default: False"],
209
            'css_class' : ["codehilite",
210
                           "Set class name for wrapper <div> - Default: codehilite"],
211
            'pygments_style' : ['default', 'Pygments HTML Formatter Style (Colorscheme) - Default: default'],
212
            'noclasses': [False, 'Use inline styles instead of CSS classes - Default false']
213
            }
214
215
        # Override defaults with user settings
216
        for key, value in configs:
217
            self.setConfig(key, value)
218
219
    def extendMarkdown(self, md, md_globals):
220
        """ Add HilitePostprocessor to Markdown instance. """
221
        hiliter = HiliteTreeprocessor(md)
222
        hiliter.config = self.config
223
        md.treeprocessors.add("hilite", hiliter, "_begin")
224
225
        md.registerExtension(self)
226
227
228
def makeExtension(configs={}):
229
  return CodeHiliteExtension(configs=configs)