1
### This file is part of KoFooBot and is licensed under BSD-license according to the   ###
2
### LICENSE file in the base directory.                                                ###
3
### Code in this file is contributed by:                                               ###
4
###    Krister Svanlund <krister.svanlund gmail.com>                                   ###
5
6
### Feedparser module from http://feedparser.org/                                      ###
7
8
import feedparser
9
import time, calendar
10
import re
11
12
####
13
# Module header starts here
14
##
15
16
rss_settings = { 'feed_channel': "",
17
                 'feed_streams': {}}
18
19
rss_requires = ['webhelpers']
20
21
update_rss_delay = 30
22
update_rss_time_left = 0
23
24
def init_rss(bot, server, sender = None):
25
    if type(bot.settings.rss_feed_streams) is list:
26
        bot.settings.rss_feed_streams = dict.fromkeys(bot.settings.rss_feed_streams, 0)
27
    return True
28
29
#def unload_rss(bot, server, sender = None):
30
#    return True
31
32
do_rss_command = { 'description': "",
33
                   'long help': """\
34
""",
35
                   'arguments': [("get queue count", ""),
36
                                 ("clear queue|feeds", ""),
37
                                 ("add|remove <stream> [<stream2> [..]]", "")],
38
                   'public': False,
39
                   'level': 70}
40
41
new_posts = []
42
43
def do_rss(bot, server, sender, target, args):
44
    if args:
45
        if args[0] == "get":
46
            do_rss_get(bot, server, sender, target, args[1:])
47
        elif args[0] == "clear":
48
            do_rss_clear(bot, server, sender, target, args[1:])
49
        elif args[0] == "add":
50
            do_rss_add(bot, server, sender, target, args[1:])
51
        elif args[0] == "remove" or args[0] == "del":
52
            do_rss_del(bot, server, sender, target, args[1:])
53
        else:
54
            bot.respond(server, sender, target, "'%s' is not a acceptable argument." % args[0])
55
    else:
56
        bot.respond(server, sender, target, "Not enough arguments for 'rss'.")
57
58
def update_rss(bot, server):
59
    """Look for updates in streams and publish them."""
60
    global new_posts
61
    for stream, last_update in bot.settings.rss_feed_streams.iteritems():
62
        ### Never include the whole feed. If last update at epoch (=0) start from now. ###
63
        if last_update == 0:
64
            last_update = time.gmtime()
65
            print " + Last update at 0 for '%s', changed to now." % stream
66
        print " + Parse stream '%s'." % stream
67
        data = feedparser.parse(stream)
68
        if data.feed:
69
            feed_updated_at = calendar.timegm(data.feed.get('updated_parsed', time.gmtime()))
70
            if feed_updated_at > last_update:
71
                ### A valid stream is returned. ###
72
                print " + Got feed data for '%s'." % data.feed.get('title', '>No title<')
73
                if data.entries:
74
                    data.entries.reverse()
75
                    for entry in data.entries:
76
                        updp = calendar.timegm(entry.get('updated_parsed', time.gmtime(0)))
77
                        pubp = calendar.timegm(entry.get('published_parsed', time.gmtime(0)))
78
                        #print " + Updated at %d, published at %d." % (updp, pubp)
79
                        entry_updated_at = max(updp, pubp)
80
                        if entry_updated_at > last_update:
81
                            print " + Add post '%s' to new posts." % entry.get('title', ">No title<")
82
                            entry.feed_title = data.feed.get('title', 'No title')
83
                            new_posts.insert(0, entry)
84
                else:
85
                    print " - No entries."
86
                bot.settings.rss_feed_streams[stream] = feed_updated_at
87
            else:
88
                print " + No updates."
89
        else:
90
            print " + Invalid stream."
91
    for i in xrange(0,3):
92
        if new_posts:
93
            post = new_posts.pop()
94
            publish_post_in_channel(bot, server, bot.settings.rss_feed_channel, post)
95
    if new_posts:
96
        print " + %d posts left for update." % len(new_posts)
97
    
98
##
99
# Module headers end here
100
####
101
102
def do_rss_clear(bot, server, sender, target, args):
103
    global new_posts
104
    if args:
105
        if args[0] == "queue":
106
            post_count = len(new_posts)
107
            new_posts = []
108
            bot.respond(server, sender, target, "%d messages has been removed from queue." % post_count)
109
            print " + Cleared RSS post queue."
110
        elif args[0] == "feeds":
111
            if len(args) > 1 and args[1] == "YES":
112
                bot.settings.rss_feed_streams = {}
113
                bot.respond(server, sender, target, "All feeds has been removed.")
114
                print " + Cleared all RSS feeds."
115
            else:
116
                bot.respond(server, sender, target, "You have to write 'clear feeds YES' for this command to be executed.")
117
        else:
118
            bot.respond(server, sender, target, "'%s' is not an accepted argument." % args[0])
119
    else:
120
        bot.respond(server, sender, target, "Not enough arguments for 'clear'.")
121
122
def do_rss_add(bot, server, sender, target, args):
123
    global new_posts
124
    if args:
125
        for new_stream in args:
126
            if new_stream in bot.settings.rss_feed_streams:
127
                bot.respond(server, sender, target, "This stream has already been added.")
128
                print " + Stream already in stream list."
129
                return
130
            bot.settings.rss_feed_streams[new_stream] = time.gmtime()
131
            bot.respond(server, sender, target, "Added '%s' to rss feeds." % (new_stream[:30]+"..", new_stream)[len(new_stream) < 40])
132
            print " + Added '%s' to rss feeds." % new_stream
133
    else:
134
        bot.respond(server, sender, target, "Not enough arguments for 'add'.")
135
136
def do_rss_del(bot, server, sender, target, args):
137
    if args:
138
        for del_stream in args:
139
            if del_stream in bot.settings.rss_feed_streams:
140
                try:
141
                    del bot.settings.rss_feed_streams[del_stream]
142
                    bot.respond(server, sender, target, "Successfully removed stream.")
143
                    print " + Removed stream '%s'." % del_stream
144
                except Exception, e:
145
                    bot.respond(server, sender, target, "Failed to remove stream: %s" % str(e))
146
                    print " - Could not remove stream: %s" % str(e)
147
    else:
148
        bot.respond(server, sender, target, "Not enough arguments for 'del'.")
149
150
def do_rss_get(bot, server, sender, target, args):
151
    global new_posts
152
    if args:
153
        if args[0] == "queue":
154
            if args[1:]:
155
                if args[1] == "size":
156
                    bot.respond(server, sender, target, "There are %d posts in the queue." % len(new_posts))
157
                    print " + There are %d posts in the queue." % len(new_posts)
158
                else:
159
                    bot.respond(server, sender, target, "'%s' is not a valid argument." % args[1])
160
            else:
161
                bot.respond(server, sender, target, "Not enough arguments 'queue'.")
162
        elif args[0] == "feeds":
163
            bot.respond(server, sender, target, "The feeds currently added are:")
164
            for stream in bot.settings.rss_feed_streams.keys():
165
                bot.respond(server, sender, target, "  %s" % stream)
166
            bot.respond(server, sender, target, "End of feed list.")
167
        else:
168
            bot.respond(server, sender, target, "'%s' is not a valid argument." % args[0])
169
    else:
170
        bot.respond(server, sender, target, "Not enough arguments for 'get'.")
171
172
def publish_post_in_channel(bot, server, channel, post):
173
    title = post.get('title', 'No title')
174
    link = bot.shorten_url(post.get('link', ''))
175
    summary = post.get('summary', '')
176
    content = post.get('content', '')
177
    author = post.get('author', '')
178
    if content and not summary:
179
        for _con in content:
180
            con_type = _con.get('type', '')
181
            if con_type[:5] == "text/":
182
                summary += _con.get('value', '')+'\n'
183
    if summary:
184
        summary = ' '.join(summary.splitlines())
185
        summary = ' '.join(strip_html(summary).split())
186
    print " + Show post\n  Title: '%s'\n  Link: '%s'\n  Summary: '%s'\n  Author: '%s'" % (title, link, summary, author)
187
    if channel:
188
        if author:
189
            entry_string = "[RSS - %s] \x02%s\x02 %s - '%s' [%s]" % (post.feed_title, title, summary, author, link)
190
        else:
191
            entry_string = "[RSS - %s] \x02%s\x02 %s [%s]" % (post.feed_title, title, summary, link)
192
193
        entry_string = re.sub(r'&(#?)(\d{1,5}|\w{1,8});', bot.substitiute_from_entity, entry_string)
194
        
195
        server.privmsg(channel, entry_string.encode("utf-8"))
196
197
def replace_html(m):
198
    tag = m.group(1)
199
    tag_attr = m.group(2)
200
    text = m.group(3)
201
    end_tag = m.group(4)
202
    if tag == end_tag:
203
        print " + Correct tag '%s'." % tag
204
    return text
205
206
def strip_html(text):
207
    for m in re.finditer('</?(.*?)/?>', text):
208
        tag = m.group(1)
209
        text = text.replace(m.group(0), ' ')
210
    return text