1
import re
2
import inlinepatterns
3
import util
4
import odict
5
6
7
def build_treeprocessors(md_instance, **kwargs):
8
    """ Build the default treeprocessors for Markdown. """
9
    treeprocessors = odict.OrderedDict()
10
    treeprocessors["inline"] = InlineProcessor(md_instance)
11
    treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance)
12
    return treeprocessors
13
14
15
def isString(s):
16
    """ Check if it's string """
17
    if not isinstance(s, util.AtomicString):
18
        return isinstance(s, basestring)
19
    return False
20
21
22
class Processor:
23
    def __init__(self, markdown_instance=None):
24
        if markdown_instance:
25
            self.markdown = markdown_instance
26
27
28
class Treeprocessor(Processor):
29
    """
30
    Treeprocessors are run on the ElementTree object before serialization.
31
32
    Each Treeprocessor implements a "run" method that takes a pointer to an
33
    ElementTree, modifies it as necessary and returns an ElementTree
34
    object.
35
36
    Treeprocessors must extend markdown.Treeprocessor.
37
38
    """
39
    def run(self, root):
40
        """
41
        Subclasses of Treeprocessor should implement a `run` method, which
42
        takes a root ElementTree. This method can return another ElementTree 
43
        object, and the existing root ElementTree will be replaced, or it can 
44
        modify the current tree and return None.
45
        """
46
        pass
47
48
49
class InlineProcessor(Treeprocessor):
50
    """
51
    A Treeprocessor that traverses a tree, applying inline patterns.
52
    """
53
54
    def __init__(self, md):
55
        self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
56
        self.__placeholder_suffix = util.ETX
57
        self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
58
                                      + len(self.__placeholder_suffix)
59
        self.__placeholder_re = \
60
                            re.compile(util.INLINE_PLACEHOLDER % r'([0-9]{4})')
61
        self.markdown = md
62
63
    def __makePlaceholder(self, type):
64
        """ Generate a placeholder """
65
        id = "%04d" % len(self.stashed_nodes)
66
        hash = util.INLINE_PLACEHOLDER % id
67
        return hash, id
68
69
    def __findPlaceholder(self, data, index):
70
        """
71
        Extract id from data string, start from index
72
73
        Keyword arguments:
74
75
        * data: string
76
        * index: index, from which we start search
77
78
        Returns: placeholder id and string index, after the found placeholder.
79
        
80
        """
81
        m = self.__placeholder_re.search(data, index)
82
        if m:
83
            return m.group(1), m.end()
84
        else:
85
            return None, index + 1
86
87
    def __stashNode(self, node, type):
88
        """ Add node to stash """
89
        placeholder, id = self.__makePlaceholder(type)
90
        self.stashed_nodes[id] = node
91
        return placeholder
92
93
    def __handleInline(self, data, patternIndex=0):
94
        """
95
        Process string with inline patterns and replace it
96
        with placeholders
97
98
        Keyword arguments:
99
100
        * data: A line of Markdown text
101
        * patternIndex: The index of the inlinePattern to start with
102
103
        Returns: String with placeholders.
104
105
        """
106
        if not isinstance(data, util.AtomicString):
107
            startIndex = 0
108
            while patternIndex < len(self.markdown.inlinePatterns):
109
                data, matched, startIndex = self.__applyPattern(
110
                    self.markdown.inlinePatterns.value_for_index(patternIndex),
111
                    data, patternIndex, startIndex)
112
                if not matched:
113
                    patternIndex += 1
114
        return data
115
116
    def __processElementText(self, node, subnode, isText=True):
117
        """
118
        Process placeholders in Element.text or Element.tail
119
        of Elements popped from self.stashed_nodes.
120
121
        Keywords arguments:
122
123
        * node: parent node
124
        * subnode: processing node
125
        * isText: bool variable, True - it's text, False - it's tail
126
127
        Returns: None
128
129
        """
130
        if isText:
131
            text = subnode.text
132
            subnode.text = None
133
        else:
134
            text = subnode.tail
135
            subnode.tail = None
136
137
        childResult = self.__processPlaceholders(text, subnode)
138
139
        if not isText and node is not subnode:
140
            pos = node.getchildren().index(subnode)
141
            node.remove(subnode)
142
        else:
143
            pos = 0
144
145
        childResult.reverse()
146
        for newChild in childResult:
147
            node.insert(pos, newChild)
148
149
    def __processPlaceholders(self, data, parent):
150
        """
151
        Process string with placeholders and generate ElementTree tree.
152
153
        Keyword arguments:
154
155
        * data: string with placeholders instead of ElementTree elements.
156
        * parent: Element, which contains processing inline data
157
158
        Returns: list with ElementTree elements with applied inline patterns.
159
        
160
        """
161
        def linkText(text):
162
            if text:
163
                if result:
164
                    if result[-1].tail:
165
                        result[-1].tail += text
166
                    else:
167
                        result[-1].tail = text
168
                else:
169
                    if parent.text:
170
                        parent.text += text
171
                    else:
172
                        parent.text = text
173
        result = []
174
        strartIndex = 0
175
        while data:
176
            index = data.find(self.__placeholder_prefix, strartIndex)
177
            if index != -1:
178
                id, phEndIndex = self.__findPlaceholder(data, index)
179
180
                if id in self.stashed_nodes:
181
                    node = self.stashed_nodes.get(id)
182
183
                    if index > 0:
184
                        text = data[strartIndex:index]
185
                        linkText(text)
186
187
                    if not isString(node): # it's Element
188
                        for child in [node] + node.getchildren():
189
                            if child.tail:
190
                                if child.tail.strip():
191
                                    self.__processElementText(node, child,False)
192
                            if child.text:
193
                                if child.text.strip():
194
                                    self.__processElementText(child, child)
195
                    else: # it's just a string
196
                        linkText(node)
197
                        strartIndex = phEndIndex
198
                        continue
199
200
                    strartIndex = phEndIndex
201
                    result.append(node)
202
203
                else: # wrong placeholder
204
                    end = index + len(self.__placeholder_prefix)
205
                    linkText(data[strartIndex:end])
206
                    strartIndex = end
207
            else:
208
                text = data[strartIndex:]
209
                if isinstance(data, util.AtomicString):
210
                    # We don't want to loose the AtomicString
211
                    text = util.AtomicString(text)
212
                linkText(text)
213
                data = ""
214
215
        return result
216
217
    def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
218
        """
219
        Check if the line fits the pattern, create the necessary
220
        elements, add it to stashed_nodes.
221
222
        Keyword arguments:
223
224
        * data: the text to be processed
225
        * pattern: the pattern to be checked
226
        * patternIndex: index of current pattern
227
        * startIndex: string index, from which we start searching
228
229
        Returns: String with placeholders instead of ElementTree elements.
230
231
        """
232
        match = pattern.getCompiledRegExp().match(data[startIndex:])
233
        leftData = data[:startIndex]
234
235
        if not match:
236
            return data, False, 0
237
238
        node = pattern.handleMatch(match)
239
240
        if node is None:
241
            return data, True, len(leftData)+match.span(len(match.groups()))[0]
242
243
        if not isString(node):
244
            if not isinstance(node.text, util.AtomicString):
245
                # We need to process current node too
246
                for child in [node] + node.getchildren():
247
                    if not isString(node):
248
                        if child.text: 
249
                            child.text = self.__handleInline(child.text,
250
                                                            patternIndex + 1)
251
                        if child.tail:
252
                            child.tail = self.__handleInline(child.tail,
253
                                                            patternIndex)
254
255
        placeholder = self.__stashNode(node, pattern.type())
256
257
        return "%s%s%s%s" % (leftData,
258
                             match.group(1),
259
                             placeholder, match.groups()[-1]), True, 0
260
261
    def run(self, tree):
262
        """Apply inline patterns to a parsed Markdown tree.
263
264
        Iterate over ElementTree, find elements with inline tag, apply inline
265
        patterns and append newly created Elements to tree.  If you don't
266
        want to process your data with inline paterns, instead of normal string,
267
        use subclass AtomicString:
268
269
            node.text = markdown.AtomicString("This will not be processed.")
270
271
        Arguments:
272
273
        * tree: ElementTree object, representing Markdown tree.
274
275
        Returns: ElementTree object with applied inline patterns.
276
277
        """
278
        self.stashed_nodes = {}
279
280
        stack = [tree]
281
282
        while stack:
283
            currElement = stack.pop()
284
            insertQueue = []
285
            for child in currElement.getchildren():
286
                if child.text and not isinstance(child.text, util.AtomicString):
287
                    text = child.text
288
                    child.text = None
289
                    lst = self.__processPlaceholders(self.__handleInline(
290
                                                    text), child)
291
                    stack += lst
292
                    insertQueue.append((child, lst))
293
294
                if child.getchildren():
295
                    stack.append(child)
296
297
            if self.markdown.enable_attributes:
298
                for element, lst in insertQueue:
299
                    if element.text:
300
                        element.text = \
301
                            inlinepatterns.handleAttributes(element.text, 
302
                                                                    element)
303
                    i = 0
304
                    for newChild in lst:
305
                        # Processing attributes
306
                        if newChild.tail:
307
                            newChild.tail = \
308
                                inlinepatterns.handleAttributes(newChild.tail,
309
                                                                    element)
310
                        if newChild.text:
311
                            newChild.text = \
312
                                inlinepatterns.handleAttributes(newChild.text,
313
                                                                    newChild)
314
                        element.insert(i, newChild)
315
                        i += 1
316
        return tree
317
318
319
class PrettifyTreeprocessor(Treeprocessor):
320
    """ Add linebreaks to the html document. """
321
322
    def _prettifyETree(self, elem):
323
        """ Recursively add linebreaks to ElementTree children. """
324
325
        i = "\n"
326
        if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
327
            if (not elem.text or not elem.text.strip()) \
328
                    and len(elem) and util.isBlockLevel(elem[0].tag):
329
                elem.text = i
330
            for e in elem:
331
                if util.isBlockLevel(e.tag):
332
                    self._prettifyETree(e)
333
            if not elem.tail or not elem.tail.strip():
334
                elem.tail = i
335
        if not elem.tail or not elem.tail.strip():
336
            elem.tail = i
337
338
    def run(self, root):
339
        """ Add linebreaks to ElementTree root object. """
340
341
        self._prettifyETree(root)
342
        # Do <br />'s seperately as they are often in the middle of
343
        # inline content and missed by _prettifyETree.
344
        brs = root.getiterator('br')
345
        for br in brs:
346
            if not br.tail or not br.tail.strip():
347
                br.tail = '\n'
348
            else:
349
                br.tail = '\n%s' % br.tail