| 1 |
# -*- coding: utf-8 -*- |
| 2 |
import re |
| 3 |
from logging import CRITICAL |
| 4 |
|
| 5 |
import etree_loader |
| 6 |
|
| 7 |
|
| 8 |
""" |
| 9 |
CONSTANTS |
| 10 |
============================================================================= |
| 11 |
""" |
| 12 |
|
| 13 |
""" |
| 14 |
Constants you might want to modify |
| 15 |
----------------------------------------------------------------------------- |
| 16 |
""" |
| 17 |
|
| 18 |
BLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" |
| 19 |
"|script|noscript|form|fieldset|iframe|math" |
| 20 |
"|ins|del|hr|hr/|style|li|dt|dd|thead|tbody" |
| 21 |
"|tr|th|td") |
| 22 |
# Placeholders |
| 23 |
STX = u'\u0002' # Use STX ("Start of text") for start-of-placeholder |
| 24 |
ETX = u'\u0003' # Use ETX ("End of text") for end-of-placeholder |
| 25 |
INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" |
| 26 |
INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX |
| 27 |
AMP_SUBSTITUTE = STX+"amp"+ETX |
| 28 |
|
| 29 |
""" |
| 30 |
Constants you probably do not need to change |
| 31 |
----------------------------------------------------------------------------- |
| 32 |
""" |
| 33 |
|
| 34 |
RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'), |
| 35 |
# Hebrew (0590-05FF), Arabic (0600-06FF), |
| 36 |
# Syriac (0700-074F), Arabic supplement (0750-077F), |
| 37 |
# Thaana (0780-07BF), Nko (07C0-07FF). |
| 38 |
(u'\u2D30', u'\u2D7F'), # Tifinagh |
| 39 |
) |
| 40 |
|
| 41 |
# Extensions should use "markdown.util.etree" instead of "etree" (or do `from |
| 42 |
# markdown.util import etree`). Do not import it by yourself. |
| 43 |
|
| 44 |
etree = etree_loader.importETree() |
| 45 |
|
| 46 |
""" |
| 47 |
AUXILIARY GLOBAL FUNCTIONS |
| 48 |
============================================================================= |
| 49 |
""" |
| 50 |
|
| 51 |
|
| 52 |
def isBlockLevel(tag): |
| 53 |
"""Check if the tag is a block level HTML tag.""" |
| 54 |
if isinstance(tag, basestring): |
| 55 |
return BLOCK_LEVEL_ELEMENTS.match(tag) |
| 56 |
# Some ElementTree tags are not strings, so return False. |
| 57 |
return False |
| 58 |
|
| 59 |
""" |
| 60 |
MISC AUXILIARY CLASSES |
| 61 |
============================================================================= |
| 62 |
""" |
| 63 |
|
| 64 |
class AtomicString(unicode): |
| 65 |
"""A string which should not be further processed.""" |
| 66 |
pass |
| 67 |
|
| 68 |
|
| 69 |
class Processor: |
| 70 |
def __init__(self, markdown_instance=None): |
| 71 |
if markdown_instance: |
| 72 |
self.markdown = markdown_instance |
| 73 |
|
| 74 |
|
| 75 |
class HtmlStash: |
| 76 |
""" |
| 77 |
This class is used for stashing HTML objects that we extract |
| 78 |
in the beginning and replace with place-holders. |
| 79 |
""" |
| 80 |
|
| 81 |
def __init__ (self): |
| 82 |
""" Create a HtmlStash. """ |
| 83 |
self.html_counter = 0 # for counting inline html segments |
| 84 |
self.rawHtmlBlocks=[] |
| 85 |
|
| 86 |
def store(self, html, safe=False): |
| 87 |
""" |
| 88 |
Saves an HTML segment for later reinsertion. Returns a |
| 89 |
placeholder string that needs to be inserted into the |
| 90 |
document. |
| 91 |
|
| 92 |
Keyword arguments: |
| 93 |
|
| 94 |
* html: an html segment |
| 95 |
* safe: label an html segment as safe for safemode |
| 96 |
|
| 97 |
Returns : a placeholder string |
| 98 |
|
| 99 |
""" |
| 100 |
self.rawHtmlBlocks.append((html, safe)) |
| 101 |
placeholder = self.get_placeholder(self.html_counter) |
| 102 |
self.html_counter += 1 |
| 103 |
return placeholder |
| 104 |
|
| 105 |
def reset(self): |
| 106 |
self.html_counter = 0 |
| 107 |
self.rawHtmlBlocks = [] |
| 108 |
|
| 109 |
def get_placeholder(self, key): |
| 110 |
return "%swzxhzdk:%d%s" % (STX, key, ETX) |