| 1 |
from htmlentitydefs import name2codepoint as n2cp |
| 2 |
from htmlentitydefs import codepoint2name as cp2n |
| 3 |
import re |
| 4 |
|
| 5 |
#### |
| 6 |
# Module header |
| 7 |
## |
| 8 |
|
| 9 |
webhelpers_helpers = ['regex_url', |
| 10 |
'regex_ahref', |
| 11 |
'rewrite_url', |
| 12 |
'substitiute_from_entity', |
| 13 |
'substitiute_to_entity', |
| 14 |
'unquotestring', |
| 15 |
'quotestring', |
| 16 |
'shorten_url', |
| 17 |
'expand_url', |
| 18 |
'convert_to_irc_string', |
| 19 |
'convert_to_twitter_string'] |
| 20 |
|
| 21 |
do_modurl_command = { 'description': "Modify URL's", |
| 22 |
'arguments': [("expand <url> [<url2> [..]]", "Expand a short url to a long url."), |
| 23 |
("compress <url> [<url2> [..]]", "Compress a long url into a short url.")], |
| 24 |
'public': True, |
| 25 |
'level': 0} |
| 26 |
|
| 27 |
webhelpers_alias = {'curl': 'modurl compress', |
| 28 |
'eurl': 'modurl expand'} |
| 29 |
|
| 30 |
def do_modurl(bot, server, sender, target, args): |
| 31 |
if args: |
| 32 |
if args[0] == "compress": |
| 33 |
do_compress_url(bot, server, sender, target, args[1:]) |
| 34 |
elif args[0] == "expand": |
| 35 |
do_expand_url(bot, server, sender, target, args[1:]) |
| 36 |
else: |
| 37 |
bot.respond(server, sender, target, "\x02'%s'\x02 is not a valid argument." % args[0]) |
| 38 |
else: |
| 39 |
bot.respond(server, sender, target, "Not enough arguments.") |
| 40 |
|
| 41 |
## |
| 42 |
# End of module header |
| 43 |
#### |
| 44 |
|
| 45 |
# Regexp for recognising URLs |
| 46 |
regex_url = r'http:\/\/([\w.]+\/?)\S*' |
| 47 |
|
| 48 |
# Regexp for recognising html hyperlinks such as <a href="linkurl">Linkname</a> |
| 49 |
regex_ahref = r'<a .*?href=[\'"](.*?)[\'"].*>(.*?)</a>' |
| 50 |
|
| 51 |
def do_compress_url(bot, server, sender, target, args): |
| 52 |
"""This is another of the commands and all it does is take a url and return a short |
| 53 |
version of it.""" |
| 54 |
if args: |
| 55 |
for url in args: |
| 56 |
if re.match(bot.regex_url, url): |
| 57 |
surl = shorten_url(url) |
| 58 |
print " + Shortening url '%s' to '%s'." % (url, surl) |
| 59 |
bot.respond(server, sender, target, "Short url is: %s" % surl) |
| 60 |
else: |
| 61 |
bot.respond(server, sender, target, "Could not shorten: %s" % url) |
| 62 |
else: |
| 63 |
bot.respond(server, sender, target, "Not enough arguments.") |
| 64 |
|
| 65 |
def do_expand_url(bot, server, sender, target, args): |
| 66 |
"""This is another of the commands and all it does is take a url and return a short |
| 67 |
version of it.""" |
| 68 |
if args: |
| 69 |
for url in args: |
| 70 |
if re.match(bot.regex_url, url): |
| 71 |
lurl = expand_url(url) |
| 72 |
print " + Expanding url '%s' to '%s'." % (url, lurl) |
| 73 |
bot.respond(server, sender, target, "Long url is: %s" % lurl) |
| 74 |
else: |
| 75 |
bot.respond(server, sender, target, "Could not expand: %s" % url) |
| 76 |
else: |
| 77 |
bot.respond(server, sender, target, "Not enough arguments.") |
| 78 |
|
| 79 |
def rewrite_url(match): |
| 80 |
""" [Internal] Rewrite html link to irc version. """ |
| 81 |
url = match.group(1) |
| 82 |
name = match.group(2) |
| 83 |
if re.match(regex_url, name) or name == url: |
| 84 |
return url |
| 85 |
else: |
| 86 |
return "%s [%s]" % (name, url) |
| 87 |
|
| 88 |
def substitiute_from_entity(match): |
| 89 |
""" [Internal] Substitiue all html-entities to real characters.""" |
| 90 |
ent = match.group(2) |
| 91 |
if match.group(1) == '#': |
| 92 |
return unichr(int(ent)) |
| 93 |
else: |
| 94 |
cp = n2cp.get(ent) |
| 95 |
if cp: |
| 96 |
return unichr(cp) |
| 97 |
else: |
| 98 |
return match.group() |
| 99 |
|
| 100 |
def substitiute_to_entity(match): |
| 101 |
""" [Internal] Substitute special characters to html-entities.""" |
| 102 |
code = ord(match.group(1)) |
| 103 |
n = cp2n.get(code) |
| 104 |
if n: |
| 105 |
return "&%s;" % n |
| 106 |
else: |
| 107 |
return unichr(code) |
| 108 |
|
| 109 |
def unquotestring(string): |
| 110 |
""" [Internal] Make html string into real string.""" |
| 111 |
return re.sub(r'&(#?)(\d{1,5}|\w{1,8});', substitiute_from_entity, string) |
| 112 |
|
| 113 |
def quotestring(string): |
| 114 |
""" [Internal] Make real string into html string.""" |
| 115 |
return re.sub(r'([^a-zA-Z0-9,. !@])', substitiute_to_entity, string) |
| 116 |
|
| 117 |
def convert_to_irc_string(string): |
| 118 |
""" [Internal] Format the string for posting in IRC.""" |
| 119 |
try: |
| 120 |
return unquotestring(string).encode("utf-8") |
| 121 |
except Exception, e: |
| 122 |
print "Convert to irc string:", e |
| 123 |
return string |
| 124 |
|
| 125 |
def convert_to_twitter_string(string): |
| 126 |
""" [Internal] Format the string for posting to twitter.""" |
| 127 |
try: |
| 128 |
return quotestring(string) |
| 129 |
except Exception, e: |
| 130 |
print "Convert to twitter string:", e |
| 131 |
return string |
| 132 |
|
| 133 |
### Cache of earlier shortened or expanded url's. ### |
| 134 |
### { short_url: long_url } |
| 135 |
url_cache = {} |
| 136 |
|
| 137 |
def shorten_url(url): |
| 138 |
""" [Internal] Use is.gd to convert a link to a short version.""" |
| 139 |
for key, value in url_cache.iteritems(): |
| 140 |
if value == url: |
| 141 |
return key |
| 142 |
try: |
| 143 |
import urllib |
| 144 |
apiurl = "http://is.gd/api.php?longurl=" |
| 145 |
tinyurl = urllib.urlopen(apiurl+url).read() |
| 146 |
url_cache[tinyurl] = url |
| 147 |
except: |
| 148 |
return url |
| 149 |
return tinyurl |
| 150 |
|
| 151 |
def expand_url(url): |
| 152 |
""" [Internal] Expand a url in short format.""" |
| 153 |
if url in url_cache: |
| 154 |
return url_cache[url] |
| 155 |
try: |
| 156 |
import urllib |
| 157 |
apiurl = "http://api.longurl.org/v2/expand?url=" |
| 158 |
longurl_respons = urllib.urlopen(apiurl+url).read() |
| 159 |
except: |
| 160 |
return url |
| 161 |
### longurl_respons is formated as such: ### |
| 162 |
### <?xml version="1.0"?> ### |
| 163 |
### <response> ### |
| 164 |
### <long-url><![CDATA[http://www.google.com/]]></long-url> ### |
| 165 |
### </response> ### |
| 166 |
res = re.search('<!\[CDATA\[(.*)\]\]>', longurl_respons, re.M) |
| 167 |
if res: |
| 168 |
longurl = res.group(1) |
| 169 |
if not url == longurl: |
| 170 |
url_cache[url] = longurl |
| 171 |
return longurl |
| 172 |
else: |
| 173 |
return url |