1
# -*- coding: utf-8 -*-
2
import re
3
from logging import CRITICAL
4
5
import etree_loader
6
7
8
"""
9
CONSTANTS
10
=============================================================================
11
"""
12
13
"""
14
Constants you might want to modify
15
-----------------------------------------------------------------------------
16
"""
17
18
BLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
19
                                  "|script|noscript|form|fieldset|iframe|math"
20
                                  "|ins|del|hr|hr/|style|li|dt|dd|thead|tbody"
21
                                  "|tr|th|td")
22
# Placeholders
23
STX = u'\u0002'  # Use STX ("Start of text") for start-of-placeholder
24
ETX = u'\u0003'  # Use ETX ("End of text") for end-of-placeholder
25
INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
26
INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
27
AMP_SUBSTITUTE = STX+"amp"+ETX
28
29
"""
30
Constants you probably do not need to change
31
-----------------------------------------------------------------------------
32
"""
33
34
RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
35
                     # Hebrew (0590-05FF), Arabic (0600-06FF),
36
                     # Syriac (0700-074F), Arabic supplement (0750-077F),
37
                     # Thaana (0780-07BF), Nko (07C0-07FF).
38
                    (u'\u2D30', u'\u2D7F'), # Tifinagh
39
                    )
40
41
# Extensions should use "markdown.util.etree" instead of "etree" (or do `from
42
# markdown.util import etree`).  Do not import it by yourself.
43
44
etree = etree_loader.importETree()
45
46
"""
47
AUXILIARY GLOBAL FUNCTIONS
48
=============================================================================
49
"""
50
51
52
def isBlockLevel(tag):
53
    """Check if the tag is a block level HTML tag."""
54
    if isinstance(tag, basestring):
55
        return BLOCK_LEVEL_ELEMENTS.match(tag)
56
    # Some ElementTree tags are not strings, so return False.
57
    return False
58
59
"""
60
MISC AUXILIARY CLASSES
61
=============================================================================
62
"""
63
64
class AtomicString(unicode):
65
    """A string which should not be further processed."""
66
    pass
67
68
69
class Processor:
70
    def __init__(self, markdown_instance=None):
71
        if markdown_instance:
72
            self.markdown = markdown_instance
73
74
75
class HtmlStash:
76
    """
77
    This class is used for stashing HTML objects that we extract
78
    in the beginning and replace with place-holders.
79
    """
80
81
    def __init__ (self):
82
        """ Create a HtmlStash. """
83
        self.html_counter = 0 # for counting inline html segments
84
        self.rawHtmlBlocks=[]
85
86
    def store(self, html, safe=False):
87
        """
88
        Saves an HTML segment for later reinsertion.  Returns a
89
        placeholder string that needs to be inserted into the
90
        document.
91
92
        Keyword arguments:
93
94
        * html: an html segment
95
        * safe: label an html segment as safe for safemode
96
97
        Returns : a placeholder string
98
99
        """
100
        self.rawHtmlBlocks.append((html, safe))
101
        placeholder = self.get_placeholder(self.html_counter)
102
        self.html_counter += 1
103
        return placeholder
104
105
    def reset(self):
106
        self.html_counter = 0
107
        self.rawHtmlBlocks = []
108
109
    def get_placeholder(self, key):
110
        return "%swzxhzdk:%d%s" % (STX, key, ETX)