| a18bfc9 by Aurélien Bompard at 2010-04-11 |
1 |
#!/usr/bin/env python |
|
2 |
# -*- coding: utf-8 -*- |
|
3 |
u""" |
|
4 |
Reads an RSS/Atom feed and converts the enclosures to AVI. |
|
5 |
|
|
6 |
Dependencies: |
|
7 |
|
|
8 |
- ``flvstreamer`` for the RTMP streams |
|
9 |
- ``mimms`` for the MMS streams |
|
10 |
- ``file`` to get the sizes of the videos |
|
11 |
- ``mencoder`` to do the conversion |
| fcbb169 by Aurélien Bompard at 2010-05-09 |
12 |
- ``tedtalksubs.py`` to dowload ted talks subtitles (in this repo) |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
13 |
|
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
14 |
:Authors: |
|
15 |
Aurélien Bompard <aurelien@bompard.org> <http://aurelien.bompard.org> |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
16 |
|
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
17 |
:License: |
|
18 |
GNU GPL v3 or later |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
19 |
""" |
|
20 |
|
|
21 |
import os |
|
22 |
import sys |
|
23 |
import urllib2 |
| 45222ed by Aurélien Bompard at 2012-01-07 |
24 |
import httplib |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
25 |
import glob |
|
26 |
import subprocess |
|
27 |
import re |
| 2536794 by Aurélien Bompard at 2010-05-23 |
28 |
import tempfile |
|
29 |
import atexit |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
30 |
from urlparse import urlparse |
|
31 |
from optparse import OptionParser |
| 26929f9 by Aurélien Bompard at 2011-11-06 |
32 |
from stat import S_IRUSR, S_IWUSR, S_IROTH, S_IRGRP |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
33 |
#from xml.etree import ElementTree as ET |
|
34 |
from pprint import pprint |
|
35 |
|
| fcbb169 by Aurélien Bompard at 2010-05-09 |
36 |
from lxml import etree as ET |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
37 |
import urlgrabber |
|
38 |
import urlgrabber.progress |
|
39 |
|
| 079fdc8 by Aurélien Bompard at 2010-05-09 |
40 |
# Tags to skip |
| fcbb169 by Aurélien Bompard at 2010-05-09 |
41 |
EXCLUDE_TAGS = "" |
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
42 |
# Max size of the encoded video |
|
43 |
WIDTH = 800 |
|
44 |
HEIGHT = 480 |
| 079fdc8 by Aurélien Bompard at 2010-05-09 |
45 |
# Default MIME type |
| 26929f9 by Aurélien Bompard at 2011-11-06 |
46 |
MIME_DEFAULT = "video/x-msvideo" |
|
47 |
EXTENSION = "avi" |
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
48 |
|
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
49 |
|
|
50 |
def get_options(): |
|
51 |
usage = "usage: %prog -i input_feed -u URL -o output_feed [-d directory]" |
|
52 |
parser = OptionParser(usage=usage) |
|
53 |
parser.add_option("-i", "--input", dest="input", |
|
54 |
help="Process this file") |
|
55 |
parser.add_option("-o", "--output", dest="output", |
|
56 |
help="Write the RSS in this file") |
|
57 |
parser.add_option("-d", "--directory", dest="directory", |
|
58 |
help="Write the converted videos in this directory") |
|
59 |
parser.add_option("-u", "--url", dest="url", |
|
60 |
help="The external URL of the video folder") |
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
61 |
parser.add_option("-W", "--width", dest="width", type="int", default=WIDTH, |
| fcbb169 by Aurélien Bompard at 2010-05-09 |
62 |
help="Width of the converted video [default: %default]") |
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
63 |
parser.add_option("-H", "--height", dest="height", type="int", default=HEIGHT, |
|
64 |
help="Height of the converted video [default: %default]") |
| fcbb169 by Aurélien Bompard at 2010-05-09 |
65 |
parser.add_option("-m", "--max", dest="max", type="int", default=10, |
|
66 |
help="Only convert that many videos, drop the rest [default: %default]") |
|
67 |
parser.add_option("-q", "--quiet", dest="quiet", action="store_true", |
|
68 |
default=False, help="Don't show progress bars") |
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
69 |
parser.add_option("-k", "--keep", dest="keep", action="store_true", |
|
70 |
default=False, help="Don't remove original files") |
| fcbb169 by Aurélien Bompard at 2010-05-09 |
71 |
parser.add_option("--exclude-tags", dest="exclude_tags", |
|
72 |
default=EXCLUDE_TAGS, help="Drop videos tagged with a " |
|
73 |
"tag in this comma-sparated list [default: %default]") |
|
74 |
parser.add_option("--subtitles", dest="subtitles", metavar="LANG", |
|
75 |
help="Download subtitles in this language") |
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
76 |
parser.add_option("--old-ffmpeg", dest="oldffmpeg", action="store_true", |
|
77 |
help="FFMpeg is old (like on Debian Lenny)") |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
78 |
options, args = parser.parse_args() |
|
79 |
if len(args) > 0: |
|
80 |
parser.error("illegal arguments: %s" % ", ".join(args)) |
|
81 |
if not options.input: |
|
82 |
parser.error("I need a file to process") |
|
83 |
if not os.path.exists(options.input): |
|
84 |
parser.error("The file to process does not exist") |
|
85 |
if not options.output: |
|
86 |
parser.error("I need a file to write to") |
|
87 |
if not options.url: |
|
88 |
parser.error("I need an external URL") |
|
89 |
if not options.directory and options.input != "-": |
|
90 |
options.directory = os.path.abspath(os.path.dirname(options.input)) |
|
91 |
if not options.directory: |
|
92 |
parser.error("I need a directory for the videos") |
| fcbb169 by Aurélien Bompard at 2010-05-09 |
93 |
if not options.quiet and "TERM" not in os.environ: |
|
94 |
options.quiet = True # Not in a terminal, be quiet anyway |
|
95 |
if isinstance(options.exclude_tags, basestring): |
|
96 |
options.exclude_tags = [ t.strip() for t in |
|
97 |
options.exclude_tags.split(",") ] |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
98 |
return options, args |
|
99 |
|
| f571082 by Aurélien Bompard at 2010-07-22 |
100 |
|
|
101 |
class PodcastError(Exception): pass |
|
102 |
class NotAPodcastError(PodcastError): pass |
|
103 |
class TranscodingError(PodcastError): pass |
|
104 |
class DownloadingError(PodcastError): pass |
|
105 |
|
|
106 |
|
|
107 |
class Podcast(object): |
|
108 |
|
|
109 |
_mimetypes = {} |
|
110 |
|
|
111 |
def __init__(self, item): |
|
112 |
self.item = item |
|
113 |
self.enclosure = item.find("enclosure") |
|
114 |
if not ET.iselement(self.enclosure): |
|
115 |
raise NotAPodcastError() |
|
116 |
self.url = self._get_url() |
|
117 |
self.path_downloaded = self._get_downloaded_path() |
|
118 |
self.content_type = self._get_content_type() |
|
119 |
self.path_encoded = self._get_encoded_path() |
|
120 |
self.subs = self._get_subtitles() |
|
121 |
self.video_info = None |
|
122 |
self.size = None |
|
123 |
|
|
124 |
def _get_url(self): |
|
125 |
url = self.enclosure.get("url") |
|
126 |
if url.startswith(options.url): |
|
127 |
return url |
|
128 |
# Resolve redirects |
|
129 |
try: |
|
130 |
remote_file = urllib2.urlopen(url) |
|
131 |
url = remote_file.geturl() |
|
132 |
self.content_type = remote_file.info().get("Content-Type") |
|
133 |
remote_file.close() |
| 45222ed by Aurélien Bompard at 2012-01-07 |
134 |
except (urllib2.HTTPError, httplib.HTTPException), e: |
| f571082 by Aurélien Bompard at 2010-07-22 |
135 |
print "Failed downloading %s" % url |
|
136 |
print e |
|
137 |
except urllib2.URLError, e: |
|
138 |
print "Probably RTMP or MMS: %s" % url |
|
139 |
return url |
|
140 |
|
|
141 |
def _get_content_type(self): |
|
142 |
if getattr(self, "content_type", None): |
|
143 |
return self.content_type # already set by _get_url() |
|
144 |
if "content-type" in self.enclosure.attrib: |
|
145 |
return self.enclosure.attrib.get("content-type") |
|
146 |
for extension, mimetype in self.mimetypes.iteritems(): |
|
147 |
if self.path_downloaded.endswith("."+extension): |
|
148 |
return mimetype |
|
149 |
return MIME_DEFAULT |
|
150 |
|
|
151 |
def _get_downloaded_path(self): |
|
152 |
url_obj = urlparse(self.url) |
|
153 |
filename = os.path.basename(url_obj.path) |
|
154 |
if filename.count("?"): |
|
155 |
filename = filename[:filename.index("?")] |
|
156 |
return os.path.join(options.directory, filename) |
|
157 |
|
|
158 |
def _get_encoded_path(self): |
|
159 |
if self.content_type.startswith("audio/"): |
|
160 |
# the file won't be transcoded |
|
161 |
return self.path_downloaded |
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
162 |
filename_encoded = "%s.%s" % ( |
|
163 |
os.path.splitext(self.path_downloaded)[0], EXTENSION) |
| f571082 by Aurélien Bompard at 2010-07-22 |
164 |
return os.path.join(options.directory, filename_encoded) |
|
165 |
|
|
166 |
def _get_subtitles(self): |
|
167 |
if not options.subtitles: |
|
168 |
return None |
|
169 |
subfile, subfile_path = tempfile.mkstemp(prefix="podcast-transcode-sub-", |
|
170 |
suffix=".srt") |
|
171 |
os.close(subfile) |
|
172 |
atexit.register(os.remove, subfile_path) |
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
173 |
if (flux_xml.findtext("channel/title") == "TEDTalks (video)" or |
|
174 |
flux_xml.findtext("channel/title") == "TEDTalks (hd)"): |
| f571082 by Aurélien Bompard at 2010-07-22 |
175 |
talkid = self.item.findtext("guid").split(":")[1] |
|
176 |
subdl = subprocess.Popen(["tedtalksubs.py", "-l", options.subtitles, |
|
177 |
"-o", subfile_path, talkid], |
|
178 |
stdout=subprocess.PIPE, |
|
179 |
stderr=subprocess.STDOUT) |
|
180 |
out, err = subdl.communicate() |
|
181 |
if subdl.returncode != 0: |
|
182 |
print >>sys.stderr, "Failed to download subtitles. Message:" |
|
183 |
print >>sys.stderr, out |
|
184 |
return None |
|
185 |
return subfile_path |
|
186 |
return None |
|
187 |
|
|
188 |
def _get_mimetypes(self): |
|
189 |
if self._mimetypes: |
|
190 |
return self._mimetypes |
|
191 |
mimetypes_re = re.compile("\s*([^\s]+)\s+([^\s]+)\s*") |
|
192 |
mimetypes = open("/etc/mime.types") |
|
193 |
for line in mimetypes: |
|
194 |
line_mo = mimetypes_re.match(line) |
|
195 |
if not line_mo: |
|
196 |
continue |
|
197 |
mimetype = line_mo.group(1) |
|
198 |
extension = line_mo.group(2) |
|
199 |
if not mimetype.startswith("video/") \ |
|
200 |
and not mimetype.startswith("audio/"): |
|
201 |
continue |
|
202 |
self._mimetypes[line_mo.group(2)] = line_mo.group(1) |
|
203 |
mimetypes.close() |
|
204 |
return self._mimetypes |
|
205 |
mimetypes = property(_get_mimetypes) |
|
206 |
|
|
207 |
def is_already_transcoded(self): |
|
208 |
return self.url.startswith(options.url) |
|
209 |
|
|
210 |
def process(self): |
|
211 |
if not os.path.exists(self.path_encoded): |
|
212 |
self.download() |
|
213 |
self.encode_video() |
|
214 |
else: |
|
215 |
print "Already downloaded/encoded: %s" % self.path_encoded |
|
216 |
self.url = "%s/%s" % (options.url, os.path.basename(self.path_encoded)) |
|
217 |
self.size = int(os.stat(self.path_encoded).st_size) |
|
218 |
self.update_item() |
|
219 |
|
|
220 |
def update_item(self): |
|
221 |
self.enclosure.set("url", self.url) |
|
222 |
self.enclosure.set("length", str(self.size)) |
|
223 |
self.enclosure.set("type", self.content_type) |
|
224 |
fb = self.item.find("{http://rssnamespace.org/feedburner/ext/1.0}origEnclosureLink") |
|
225 |
if ET.iselement(fb): |
|
226 |
fb.text = self.url |
|
227 |
mediacontent = self.item.find("{http://search.yahoo.com/mrss/}content") |
|
228 |
if ET.iselement(mediacontent): |
|
229 |
mediacontent.set("url", self.url) |
|
230 |
mediacontent.set("fileSize", str(self.size)) |
|
231 |
mediacontent.set("type", self.content_type) |
|
232 |
|
|
233 |
def download(self): |
|
234 |
if os.path.exists(self.path_downloaded): |
|
235 |
print "Already downloaded: %s" % self.path_downloaded |
|
236 |
return |
|
237 |
if self.url.startswith("rtmp://"): |
|
238 |
self.download_rtmp() |
|
239 |
elif self.url.startswith("mms://"): |
|
240 |
self.download_mms() |
|
241 |
elif self.content_type and self.content_type == "video/x-ms-asf": |
|
242 |
self.download_asf() |
|
243 |
else: |
|
244 |
if options.quiet: |
|
245 |
progress = urlgrabber.progress.BaseMeter() |
|
246 |
else: |
|
247 |
progress = urlgrabber.progress.TextMeter(fo=sys.stdout) |
|
248 |
print "Downloading %s to %s" % (self.url, self.path_downloaded) |
| 6d12ae3 by Aurélien Bompard at 2010-09-23 |
249 |
try: |
|
250 |
urlgrabber.urlgrab(self.url, filename=self.path_downloaded, |
|
251 |
reget='simple', progress_obj=progress) |
|
252 |
except urlgrabber.grabber.URLGrabError, e: |
|
253 |
raise DownloadingError("Error downloading %s: %s" |
|
254 |
% (self.url, e)) |
| f571082 by Aurélien Bompard at 2010-07-22 |
255 |
|
|
256 |
def download_rtmp(self): |
|
257 |
MAX_TRIES = 10 |
|
258 |
def download_rtmp_unit(url, path): |
|
259 |
command = ["flvstreamer", "-r", url, "-o", path] |
|
260 |
if options.quiet: |
|
261 |
command.append("-q") |
|
262 |
if os.path.exists(path): |
|
263 |
command.insert(1, "--resume") |
|
264 |
print "Streaming %s to %s" % (url, path) |
|
265 |
retcode = 0 |
|
266 |
try: |
|
267 |
retcode = subprocess.call(command) |
|
268 |
except KeyboardInterrupt: |
|
269 |
retcode = 1 |
|
270 |
return retcode |
|
271 |
|
|
272 |
retcode = download_rtmp_unit(self.url, self.path_downloaded) |
|
273 |
# flvstreamer returns 2 if the download is incomplete |
|
274 |
current_try = 1 |
|
275 |
while retcode == 2: |
|
276 |
print "Trying again..." |
|
277 |
retcode = download_rtmp_unit(self.url, self.path_downloaded) |
|
278 |
current_try += 1 |
|
279 |
if current_try > MAX_TRIES: |
|
280 |
print "Too many tries, aborting." |
|
281 |
break |
|
282 |
if retcode != 0: |
|
283 |
if os.path.exists(self.path_downloaded): |
|
284 |
os.remove(self.path_downloaded) |
|
285 |
raise DownloadingError("Error code: %s" % retcode) |
|
286 |
|
|
287 |
def download_mms(self): |
|
288 |
#command = ["mplayer", "-dumpstream", "-dumpfile", self.path_downloaded, self.url] |
|
289 |
command = ["mimms", self.url, self.path_downloaded] |
|
290 |
if options.quiet: |
|
291 |
command.append("-q") |
|
292 |
try: |
|
293 |
print "Streaming %s to %s" % (self.url, self.path_downloaded) |
|
294 |
retcode = 0 |
|
295 |
retcode = subprocess.call(command) |
|
296 |
except KeyboardInterrupt: |
|
297 |
retcode = 1 |
|
298 |
if retcode != 0 and os.path.exists(self.path_downloaded): |
|
299 |
os.remove(self.path_downloaded) |
|
300 |
raise DownloadingError("Error code: %s" % retcode) |
|
301 |
|
|
302 |
def download_asf(self): |
|
303 |
mms_xml = urllib2.urlopen(self.url).read() |
|
304 |
mms_match = re.search('"(mms://.*)"', mms_xml) |
|
305 |
mms_url = mms_match.group(1) |
|
306 |
return download_mms(mms_url.replace("&", "&"), self.path_downloaded) |
|
307 |
|
|
308 |
def encode_video(self): |
|
309 |
if self.path_encoded == self.path_downloaded: |
|
310 |
print "No transcoding required" |
|
311 |
return |
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
312 |
transcoded_video = self._transcode_video() |
| 26929f9 by Aurélien Bompard at 2011-11-06 |
313 |
os.rename(transcoded_video, self.path_encoded) |
|
314 |
os.chmod(self.path_encoded, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) # 644 |
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
315 |
|
|
316 |
def _transcode_video(self): |
| f571082 by Aurélien Bompard at 2010-07-22 |
317 |
width, height = self.get_video_info() |
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
318 |
transcoded_video_file, transcoded_video = tempfile.mkstemp( |
|
319 |
prefix="podcast-transcode-", suffix=".avi", dir=options.directory) |
|
320 |
os.close(transcoded_video_file) |
|
321 |
def rm_if_exists(f): |
|
322 |
if os.path.exists(f): |
|
323 |
os.remove(f) |
|
324 |
if not options.keep: |
|
325 |
atexit.register(rm_if_exists, transcoded_video) |
|
326 |
command = ["mencoder", "-oac", "mp3lame", |
|
327 |
"-ovc", "lavc", "-lavcopts", "vbitrate=600", |
|
328 |
"-of", "avi", "-mc", "0", self.path_downloaded, |
|
329 |
"-o", transcoded_video] |
|
330 |
if height and width: |
|
331 |
if int(height) > options.height: |
|
332 |
command.extend(["-vf", "scale=-3:%d" % options.height]) |
|
333 |
elif int(width) > options.width: |
|
334 |
command.extend(["-vf", "scale=%d:-3" % options.width]) |
| f571082 by Aurélien Bompard at 2010-07-22 |
335 |
if options.quiet: |
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
336 |
command.append("-quiet") |
| f571082 by Aurélien Bompard at 2010-07-22 |
337 |
if self.subs: |
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
338 |
command.extend(["-sub", self.subs, "-subfont-text-scale", "4"]) |
|
339 |
if not os.path.exists(os.path.expanduser("~/.mplayer/subfont.ttf")): |
|
340 |
command.extend(["-fontconfig", "-font", "DejaVu Sans"]) |
| f571082 by Aurélien Bompard at 2010-07-22 |
341 |
print " ".join(command) |
|
342 |
retcode = 0 |
|
343 |
try: |
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
344 |
print "Encoding %s to %s" % (self.path_downloaded, transcoded_video) |
|
345 |
retcode = subprocess.call(command) |
|
346 |
except KeyboardInterrupt: |
|
347 |
retcode = 1 |
|
348 |
if retcode != 0: |
|
349 |
if os.path.exists(transcoded_video): |
|
350 |
os.remove(transcoded_video) |
|
351 |
raise TranscodingError("Error code: %s" % retcode) |
|
352 |
self.content_type = "video/x-msvideo" |
|
353 |
return transcoded_video |
|
354 |
|
| f571082 by Aurélien Bompard at 2010-07-22 |
355 |
def get_video_info(self): |
|
356 |
if self.video_info is not None: |
|
357 |
return self.video_info |
|
358 |
ffmpeg_cmd = subprocess.Popen(["ffmpeg", "-i", self.path_downloaded], |
|
359 |
stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
|
360 |
output = ffmpeg_cmd.stdout.read() |
|
361 |
info_match = re.search("Stream .*: Video: (\w+), \w+, (\d+)x(\d+)", output) |
|
362 |
if info_match: |
|
363 |
self.video_info = (info_match.group(2), info_match.group(3)) |
|
364 |
else: |
|
365 |
self.video_info = (None, None) |
|
366 |
return self.video_info |
|
367 |
|
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
368 |
|
|
369 |
def cleanup(items): |
| f571082 by Aurélien Bompard at 2010-07-22 |
370 |
feed_podcasts = set() |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
371 |
for item in items: |
| f571082 by Aurélien Bompard at 2010-07-22 |
372 |
try: |
|
373 |
podcast = Podcast(item) |
|
374 |
except NotAPodcastError: |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
375 |
continue |
| f571082 by Aurélien Bompard at 2010-07-22 |
376 |
feed_podcasts.add(os.path.basename(podcast.path_downloaded)) |
|
377 |
feed_podcasts.add(os.path.basename(podcast.path_encoded)) |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
378 |
|
| 3ecf069 by Aurélien Bompard at 2011-11-05 |
379 |
if options.keep: |
|
380 |
return |
|
381 |
|
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
382 |
for filepath in glob.glob(os.path.join(options.directory, "*")): |
|
383 |
if filepath.endswith(".xml"): |
|
384 |
continue # keep the RSS feed |
|
385 |
filename = os.path.basename(filepath) |
| f571082 by Aurélien Bompard at 2010-07-22 |
386 |
if filename not in feed_podcasts: |
| 81a737f by Aurélien Bompard at 2010-07-28 |
387 |
print "Removing old file %s" % filename |
| f571082 by Aurélien Bompard at 2010-07-22 |
388 |
#print feed_podcasts |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
389 |
os.remove(filepath) |
|
390 |
|
|
391 |
|
|
392 |
def handle_item(item): |
| f571082 by Aurélien Bompard at 2010-07-22 |
393 |
try: |
|
394 |
podcast = Podcast(item) |
|
395 |
except NotAPodcastError: |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
396 |
return |
| f571082 by Aurélien Bompard at 2010-07-22 |
397 |
|
|
398 |
if podcast.is_already_transcoded(): |
|
399 |
print "Already converted: %s" % podcast.url |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
400 |
return |
| f571082 by Aurélien Bompard at 2010-07-22 |
401 |
if options.subtitles and not podcast.subs: |
|
402 |
print "No subtitles for %s, skipping." % item.findtext("guid") |
|
403 |
flux_xml.find("channel").remove(item) |
|
404 |
return |
|
405 |
|
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
406 |
try: |
| f571082 by Aurélien Bompard at 2010-07-22 |
407 |
podcast.process() |
|
408 |
except DownloadingError, e: |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
409 |
print e |
|
410 |
return |
| f571082 by Aurélien Bompard at 2010-07-22 |
411 |
except TranscodingError, e: |
|
412 |
print e |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
413 |
return |
| f571082 by Aurélien Bompard at 2010-07-22 |
414 |
|
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
415 |
|
|
416 |
def to_skip(item): |
|
417 |
tags = item.findall("category") |
|
418 |
for tag in tags: |
| fcbb169 by Aurélien Bompard at 2010-05-09 |
419 |
if tag.text in options.exclude_tags: |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
420 |
return True |
|
421 |
return False |
|
422 |
|
|
423 |
def main(): |
| fcbb169 by Aurélien Bompard at 2010-05-09 |
424 |
global options, flux_xml |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
425 |
options, args = get_options() |
|
426 |
if options.input == "-": |
|
427 |
options.input = sys.stdin |
|
428 |
flux_xml = ET.parse(options.input) |
|
429 |
items = flux_xml.findall("channel/item") |
|
430 |
# tag skipping |
| f571082 by Aurélien Bompard at 2010-07-22 |
431 |
for item in items[:]: |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
432 |
if to_skip(item): |
|
433 |
flux_xml.find("channel").remove(item) |
|
434 |
items.remove(item) |
| f571082 by Aurélien Bompard at 2010-07-22 |
435 |
for i, item in enumerate(items[:]): |
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
436 |
if i < options.max: |
|
437 |
handle_item(item) |
|
438 |
else: |
|
439 |
flux_xml.find("channel").remove(item) |
|
440 |
|
|
441 |
#flux_xml.write(options.output, "utf-8") |
|
442 |
flux_xml.write(options.output) |
|
443 |
cleanup(items[:options.max]) |
| 7581434 by Aurélien Bompard at 2011-04-03 |
444 |
|
| a18bfc9 by Aurélien Bompard at 2010-04-11 |
445 |
|
|
446 |
if __name__ == "__main__": |
|
447 |
main() |
|
448 |
|