1
from htmlentitydefs import name2codepoint as n2cp
2
from htmlentitydefs import codepoint2name as cp2n
3
import re
4
5
####
6
# Module header
7
##
8
9
webhelpers_helpers = ['regex_url',
10
                      'regex_ahref',
11
                      'rewrite_url',
12
                      'substitiute_from_entity',
13
                      'substitiute_to_entity',
14
                      'unquotestring',
15
                      'quotestring',
16
                      'shorten_url',
17
                      'expand_url',
18
                      'convert_to_irc_string',
19
                      'convert_to_twitter_string']
20
21
do_modurl_command = { 'description': "Modify URL's",
22
                      'arguments': [("expand <url> [<url2> [..]]", "Expand a short url to a long url."),
23
                                    ("compress <url> [<url2> [..]]", "Compress a long url into a short url.")],
24
                      'public': True,
25
                      'level': 0}
26
27
webhelpers_alias = {'curl': 'modurl compress',
28
                    'eurl': 'modurl expand'}
29
30
def do_modurl(bot, server, sender, target, args):
31
    if args:
32
        if args[0] == "compress":
33
            do_compress_url(bot, server, sender, target, args[1:])
34
        elif args[0] == "expand":
35
            do_expand_url(bot, server, sender, target, args[1:])
36
        else:
37
            bot.respond(server, sender, target, "\x02'%s'\x02 is not a valid argument." % args[0])
38
    else:
39
        bot.respond(server, sender, target, "Not enough arguments.")
40
41
##
42
# End of module header
43
####
44
45
# Regexp for recognising URLs
46
regex_url = r'http:\/\/([\w.]+\/?)\S*'
47
48
# Regexp for recognising html hyperlinks such as <a href="linkurl">Linkname</a>
49
regex_ahref = r'<a .*?href=[\'"](.*?)[\'"].*>(.*?)</a>'
50
51
def do_compress_url(bot, server, sender, target, args):
52
    """This is another of the commands and all it does is take a url and return a short
53
       version of it."""
54
    if args:
55
        for url in args:
56
            if re.match(bot.regex_url, url):
57
                surl = shorten_url(url)
58
                print " + Shortening url '%s' to '%s'." % (url, surl)
59
                bot.respond(server, sender, target, "Short url is: %s" % surl)
60
            else:
61
                bot.respond(server, sender, target, "Could not shorten: %s" % url)
62
    else:
63
        bot.respond(server, sender, target, "Not enough arguments.")
64
65
def do_expand_url(bot, server, sender, target, args):
66
    """This is another of the commands and all it does is take a url and return a short
67
       version of it."""
68
    if args:
69
        for url in args:
70
            if re.match(bot.regex_url, url):
71
                lurl = expand_url(url)
72
                print " + Expanding url '%s' to '%s'." % (url, lurl)
73
                bot.respond(server, sender, target, "Long url is: %s" % lurl)
74
            else:
75
                bot.respond(server, sender, target, "Could not expand: %s" % url)
76
    else:
77
        bot.respond(server, sender, target, "Not enough arguments.")
78
79
def rewrite_url(match):
80
    """ [Internal] Rewrite html link to irc version. """
81
    url = match.group(1)
82
    name = match.group(2)
83
    if re.match(regex_url, name) or name == url:
84
        return url
85
    else:
86
        return "%s [%s]" % (name, url)
87
88
def substitiute_from_entity(match):
89
    """ [Internal] Substitiue all html-entities to real characters."""
90
    ent = match.group(2)
91
    if match.group(1) == '#':
92
        return unichr(int(ent))
93
    else:
94
        cp = n2cp.get(ent)
95
        if cp:
96
            return unichr(cp)
97
        else:
98
            return match.group()
99
100
def substitiute_to_entity(match):
101
    """ [Internal] Substitute special characters to html-entities."""
102
    code = ord(match.group(1))
103
    n = cp2n.get(code)
104
    if n:
105
        return "&%s;" % n
106
    else:
107
        return unichr(code)
108
109
def unquotestring(string):
110
    """ [Internal] Make html string into real string."""
111
    return re.sub(r'&(#?)(\d{1,5}|\w{1,8});', substitiute_from_entity, string)
112
            
113
def quotestring(string):
114
    """ [Internal] Make real string into html string."""
115
    return re.sub(r'([^a-zA-Z0-9,. !@])', substitiute_to_entity, string)
116
117
def convert_to_irc_string(string):
118
    """ [Internal] Format the string for posting in IRC."""
119
    try:
120
        return unquotestring(string).encode("utf-8")
121
    except Exception, e:
122
        print "Convert to irc string:", e
123
        return string
124
125
def convert_to_twitter_string(string):
126
    """ [Internal] Format the string for posting to twitter."""
127
    try:
128
        return quotestring(string)
129
    except Exception, e:
130
        print "Convert to twitter string:", e
131
        return string
132
133
### Cache of earlier shortened or expanded url's. ###
134
### { short_url: long_url }
135
url_cache = {}
136
    
137
def shorten_url(url):
138
    """ [Internal] Use is.gd to convert a link to a short version."""
139
    for key, value in url_cache.iteritems():
140
        if value == url:
141
            return key
142
    try:
143
        import urllib
144
        apiurl = "http://is.gd/api.php?longurl="
145
        tinyurl = urllib.urlopen(apiurl+url).read()
146
        url_cache[tinyurl] = url
147
    except:
148
        return url
149
    return tinyurl
150
151
def expand_url(url):
152
    """ [Internal] Expand a url in short format."""
153
    if url in url_cache:
154
        return url_cache[url]
155
    try:
156
        import urllib
157
        apiurl = "http://api.longurl.org/v2/expand?url="
158
        longurl_respons = urllib.urlopen(apiurl+url).read()
159
    except:
160
        return url
161
    ### longurl_respons is formated as such:                        ###
162
    ### <?xml version="1.0"?>                                       ###
163
    ###   <response>                                                ###
164
    ###     <long-url><![CDATA[http://www.google.com/]]></long-url> ###
165
    ###   </response>                                               ###
166
    res = re.search('<!\[CDATA\[(.*)\]\]>', longurl_respons, re.M)
167
    if res:
168
        longurl = res.group(1)
169
        if not url == longurl:
170
            url_cache[url] = longurl
171
        return longurl
172
    else:
173
        return url