| 1 |
### This file is part of KoFooBot and is licensed under BSD-license according to the ### |
| 2 |
### LICENSE file in the base directory. ### |
| 3 |
### Code in this file is contributed by: ### |
| 4 |
### Krister Svanlund <krister.svanlund gmail.com> ### |
| 5 |
|
| 6 |
### Feedparser module from http://feedparser.org/ ### |
| 7 |
|
| 8 |
import feedparser |
| 9 |
import time, calendar |
| 10 |
import re |
| 11 |
|
| 12 |
#### |
| 13 |
# Module header starts here |
| 14 |
## |
| 15 |
|
| 16 |
rss_settings = { 'feed_channel': "", |
| 17 |
'feed_streams': {}} |
| 18 |
|
| 19 |
rss_requires = ['webhelpers'] |
| 20 |
|
| 21 |
update_rss_delay = 30 |
| 22 |
update_rss_time_left = 0 |
| 23 |
|
| 24 |
def init_rss(bot, server, sender = None): |
| 25 |
if type(bot.settings.rss_feed_streams) is list: |
| 26 |
bot.settings.rss_feed_streams = dict.fromkeys(bot.settings.rss_feed_streams, 0) |
| 27 |
return True |
| 28 |
|
| 29 |
#def unload_rss(bot, server, sender = None): |
| 30 |
# return True |
| 31 |
|
| 32 |
do_rss_command = { 'description': "", |
| 33 |
'long help': """\ |
| 34 |
""", |
| 35 |
'arguments': [("get queue count", ""), |
| 36 |
("clear queue|feeds", ""), |
| 37 |
("add|remove <stream> [<stream2> [..]]", "")], |
| 38 |
'public': False, |
| 39 |
'level': 70} |
| 40 |
|
| 41 |
new_posts = [] |
| 42 |
|
| 43 |
def do_rss(bot, server, sender, target, args): |
| 44 |
if args: |
| 45 |
if args[0] == "get": |
| 46 |
do_rss_get(bot, server, sender, target, args[1:]) |
| 47 |
elif args[0] == "clear": |
| 48 |
do_rss_clear(bot, server, sender, target, args[1:]) |
| 49 |
elif args[0] == "add": |
| 50 |
do_rss_add(bot, server, sender, target, args[1:]) |
| 51 |
elif args[0] == "remove" or args[0] == "del": |
| 52 |
do_rss_del(bot, server, sender, target, args[1:]) |
| 53 |
else: |
| 54 |
bot.respond(server, sender, target, "'%s' is not a acceptable argument." % args[0]) |
| 55 |
else: |
| 56 |
bot.respond(server, sender, target, "Not enough arguments for 'rss'.") |
| 57 |
|
| 58 |
def update_rss(bot, server): |
| 59 |
"""Look for updates in streams and publish them.""" |
| 60 |
global new_posts |
| 61 |
for stream, last_update in bot.settings.rss_feed_streams.iteritems(): |
| 62 |
### Never include the whole feed. If last update at epoch (=0) start from now. ### |
| 63 |
if last_update == 0: |
| 64 |
last_update = time.gmtime() |
| 65 |
print " + Last update at 0 for '%s', changed to now." % stream |
| 66 |
print " + Parse stream '%s'." % stream |
| 67 |
data = feedparser.parse(stream) |
| 68 |
if data.feed: |
| 69 |
feed_updated_at = calendar.timegm(data.feed.get('updated_parsed', time.gmtime())) |
| 70 |
if feed_updated_at > last_update: |
| 71 |
### A valid stream is returned. ### |
| 72 |
print " + Got feed data for '%s'." % data.feed.get('title', '>No title<') |
| 73 |
if data.entries: |
| 74 |
data.entries.reverse() |
| 75 |
for entry in data.entries: |
| 76 |
updp = calendar.timegm(entry.get('updated_parsed', time.gmtime(0))) |
| 77 |
pubp = calendar.timegm(entry.get('published_parsed', time.gmtime(0))) |
| 78 |
#print " + Updated at %d, published at %d." % (updp, pubp) |
| 79 |
entry_updated_at = max(updp, pubp) |
| 80 |
if entry_updated_at > last_update: |
| 81 |
print " + Add post '%s' to new posts." % entry.get('title', ">No title<") |
| 82 |
entry.feed_title = data.feed.get('title', 'No title') |
| 83 |
new_posts.insert(0, entry) |
| 84 |
else: |
| 85 |
print " - No entries." |
| 86 |
bot.settings.rss_feed_streams[stream] = feed_updated_at |
| 87 |
else: |
| 88 |
print " + No updates." |
| 89 |
else: |
| 90 |
print " + Invalid stream." |
| 91 |
for i in xrange(0,3): |
| 92 |
if new_posts: |
| 93 |
post = new_posts.pop() |
| 94 |
publish_post_in_channel(bot, server, bot.settings.rss_feed_channel, post) |
| 95 |
if new_posts: |
| 96 |
print " + %d posts left for update." % len(new_posts) |
| 97 |
|
| 98 |
## |
| 99 |
# Module headers end here |
| 100 |
#### |
| 101 |
|
| 102 |
def do_rss_clear(bot, server, sender, target, args): |
| 103 |
global new_posts |
| 104 |
if args: |
| 105 |
if args[0] == "queue": |
| 106 |
post_count = len(new_posts) |
| 107 |
new_posts = [] |
| 108 |
bot.respond(server, sender, target, "%d messages has been removed from queue." % post_count) |
| 109 |
print " + Cleared RSS post queue." |
| 110 |
elif args[0] == "feeds": |
| 111 |
if len(args) > 1 and args[1] == "YES": |
| 112 |
bot.settings.rss_feed_streams = {} |
| 113 |
bot.respond(server, sender, target, "All feeds has been removed.") |
| 114 |
print " + Cleared all RSS feeds." |
| 115 |
else: |
| 116 |
bot.respond(server, sender, target, "You have to write 'clear feeds YES' for this command to be executed.") |
| 117 |
else: |
| 118 |
bot.respond(server, sender, target, "'%s' is not an accepted argument." % args[0]) |
| 119 |
else: |
| 120 |
bot.respond(server, sender, target, "Not enough arguments for 'clear'.") |
| 121 |
|
| 122 |
def do_rss_add(bot, server, sender, target, args): |
| 123 |
global new_posts |
| 124 |
if args: |
| 125 |
for new_stream in args: |
| 126 |
if new_stream in bot.settings.rss_feed_streams: |
| 127 |
bot.respond(server, sender, target, "This stream has already been added.") |
| 128 |
print " + Stream already in stream list." |
| 129 |
return |
| 130 |
bot.settings.rss_feed_streams[new_stream] = time.gmtime() |
| 131 |
bot.respond(server, sender, target, "Added '%s' to rss feeds." % (new_stream[:30]+"..", new_stream)[len(new_stream) < 40]) |
| 132 |
print " + Added '%s' to rss feeds." % new_stream |
| 133 |
else: |
| 134 |
bot.respond(server, sender, target, "Not enough arguments for 'add'.") |
| 135 |
|
| 136 |
def do_rss_del(bot, server, sender, target, args): |
| 137 |
if args: |
| 138 |
for del_stream in args: |
| 139 |
if del_stream in bot.settings.rss_feed_streams: |
| 140 |
try: |
| 141 |
del bot.settings.rss_feed_streams[del_stream] |
| 142 |
bot.respond(server, sender, target, "Successfully removed stream.") |
| 143 |
print " + Removed stream '%s'." % del_stream |
| 144 |
except Exception, e: |
| 145 |
bot.respond(server, sender, target, "Failed to remove stream: %s" % str(e)) |
| 146 |
print " - Could not remove stream: %s" % str(e) |
| 147 |
else: |
| 148 |
bot.respond(server, sender, target, "Not enough arguments for 'del'.") |
| 149 |
|
| 150 |
def do_rss_get(bot, server, sender, target, args): |
| 151 |
global new_posts |
| 152 |
if args: |
| 153 |
if args[0] == "queue": |
| 154 |
if args[1:]: |
| 155 |
if args[1] == "size": |
| 156 |
bot.respond(server, sender, target, "There are %d posts in the queue." % len(new_posts)) |
| 157 |
print " + There are %d posts in the queue." % len(new_posts) |
| 158 |
else: |
| 159 |
bot.respond(server, sender, target, "'%s' is not a valid argument." % args[1]) |
| 160 |
else: |
| 161 |
bot.respond(server, sender, target, "Not enough arguments 'queue'.") |
| 162 |
elif args[0] == "feeds": |
| 163 |
bot.respond(server, sender, target, "The feeds currently added are:") |
| 164 |
for stream in bot.settings.rss_feed_streams.keys(): |
| 165 |
bot.respond(server, sender, target, " %s" % stream) |
| 166 |
bot.respond(server, sender, target, "End of feed list.") |
| 167 |
else: |
| 168 |
bot.respond(server, sender, target, "'%s' is not a valid argument." % args[0]) |
| 169 |
else: |
| 170 |
bot.respond(server, sender, target, "Not enough arguments for 'get'.") |
| 171 |
|
| 172 |
def publish_post_in_channel(bot, server, channel, post): |
| 173 |
title = post.get('title', 'No title') |
| 174 |
link = bot.shorten_url(post.get('link', '')) |
| 175 |
summary = post.get('summary', '') |
| 176 |
content = post.get('content', '') |
| 177 |
author = post.get('author', '') |
| 178 |
if content and not summary: |
| 179 |
for _con in content: |
| 180 |
con_type = _con.get('type', '') |
| 181 |
if con_type[:5] == "text/": |
| 182 |
summary += _con.get('value', '')+'\n' |
| 183 |
if summary: |
| 184 |
summary = ' '.join(summary.splitlines()) |
| 185 |
summary = ' '.join(strip_html(summary).split()) |
| 186 |
print " + Show post\n Title: '%s'\n Link: '%s'\n Summary: '%s'\n Author: '%s'" % (title, link, summary, author) |
| 187 |
if channel: |
| 188 |
if author: |
| 189 |
entry_string = "[RSS - %s] \x02%s\x02 %s - '%s' [%s]" % (post.feed_title, title, summary, author, link) |
| 190 |
else: |
| 191 |
entry_string = "[RSS - %s] \x02%s\x02 %s [%s]" % (post.feed_title, title, summary, link) |
| 192 |
|
| 193 |
entry_string = re.sub(r'&(#?)(\d{1,5}|\w{1,8});', bot.substitiute_from_entity, entry_string) |
| 194 |
|
| 195 |
server.privmsg(channel, entry_string.encode("utf-8")) |
| 196 |
|
| 197 |
def replace_html(m): |
| 198 |
tag = m.group(1) |
| 199 |
tag_attr = m.group(2) |
| 200 |
text = m.group(3) |
| 201 |
end_tag = m.group(4) |
| 202 |
if tag == end_tag: |
| 203 |
print " + Correct tag '%s'." % tag |
| 204 |
return text |
| 205 |
|
| 206 |
def strip_html(text): |
| 207 |
for m in re.finditer('</?(.*?)/?>', text): |
| 208 |
tag = m.group(1) |
| 209 |
text = text.replace(m.group(0), ' ') |
| 210 |
return text |