1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
u"""
4
5
TED talks subs
6
--------------
7
8
Download and convert the subtitles of a TED talk
9
10
Original script: http://estebanordano.com.ar/ted-talks-download-subtitles/
11
12
A few minor improvements by Aurélien Bompard <http://aurelien.bompard.org>
13
14
:License:
15
    MIT
16
"""
17
18
import optparse
19
import urllib
20
import sys
21
import re
22
23
import simplejson
24
25
def getFormatedTime(intvalue):
26
    mils = intvalue%1000
27
    segs = (intvalue/1000)%60
28
    mins = (intvalue/60000)%60
29
    hors = (intvalue/3600000)
30
    return "%02d:%02d:%02d,%03d"%(hors,mins,segs,mils)
31
32
def availableSubs(subs):
33
    a = subs.find("LanguageCode")
34
    if a == -1:
35
        return []
36
    subs = subs[a+len("LanguageCode"):]
37
    return [re.search("%22([^A-Z]+)%22", subs).group(1)] + availableSubs(subs)
38
39
def getVideoParameters(urldirection):
40
    ht = urllib.urlopen(urldirection).read()
41
    var = re.search('flashVars = {\n([^}]+)}', ht)
42
    if var:
43
        var = var.group(1)
44
    else:
45
        return None
46
    var = [a.replace('\t', '') for a in var.split('\n')]
47
    for a in range(len(var)):
48
        if var[a]:
49
            var[a] = var[a][:var[a].rfind(',')]
50
    resultado = []
51
    for a in var:
52
        l = a.find(':')
53
        if l != -1:
54
            resultado.append((a[:l], a[l+1:]))
55
    return dict(resultado)
56
57
def downloadSub(idtalk, lang, timeIntro):
58
    if options.debug:
59
        print("Downloading subtitles for language %s"%lang)
60
    c = simplejson.load(urllib.urlopen('http://www.ted.com/talks/subtitles/id/%s/lang/%s'%(idtalk, lang)))
61
    filename = options.output
62
    if not filename:
63
        filename = '%s_%s.srt' % (idtalk,lang)
64
    salida = open(filename, 'w')
65
    conta = 1
66
    c = c['captions']
67
    for linea in c:
68
        salida.write("%d\n"%conta)
69
        conta += 1
70
        salida.write("%s --> %s\n"%(getFormatedTime(timeIntro+linea['startTime']), getFormatedTime(timeIntro+linea['startTime']+linea['duration'])))
71
        salida.write("%s\n\n"%(linea['content'].encode('utf-8')))
72
    salida.close()
73
74
def main(idtalk):
75
    if options.debug:
76
        print("Loading information about TED talk number %s..."%idtalk)
77
    vidpar = getVideoParameters('http://www.ted.com/talks/view/id/%s'%idtalk)
78
    if not vidpar:
79
        print >>sys.stderr, "There was a problem fetching information about that TED Talk"
80
        sys.exit(1)
81
    availables = availableSubs(vidpar['languages'])
82
    if options.list:
83
        print "\n".join(availables)
84
        sys.exit(0)
85
    if options.lang and options.lang not in availables:
86
        print >>sys.stderr, "Sorry, this language is not available. " \
87
                           +"Available languages:"
88
        if not len(availables):
89
            print >>sys.stderr, "(none)"
90
        else:
91
            print >>sys.stderr, "\n".join(availables)
92
        sys.exit(1)
93
    if not options.lang:
94
        print("Download all subtitles (write 'all' when prompted) or only one (specify wich)?")
95
        lang = raw_input()
96
        while lang not in availables:
97
            print("We're sorry, the only available languages are:")
98
            print "\t".join(availables)
99
            lang = raw_input()
100
    else:
101
        lang = options.lang
102
    if lang == "all":
103
        for lang in availables:
104
            downloadSub(idtalk, lang, int(vidpar['introDuration']))
105
    else:
106
        downloadSub(idtalk, lang, int(vidpar['introDuration']))
107
108
def parse_opts():
109
    usage = "usage: %prog [-l lang] [-o output] [-t] talkid"
110
    parser = optparse.OptionParser(usage=usage)
111
    parser.add_option("-l", "--lang", dest="lang",
112
                      help="Use this language")
113
    parser.add_option("-o", "--output", dest="output",
114
                      help="Write the subtitle in this file")
115
    parser.add_option("-t", "--list-only", dest="list", action="store_true",
116
                      help="List available subtitles and exit")
117
    parser.add_option("-d", "--debug", dest="debug", action="store_true",
118
                      help="Print debug information")
119
    options, args = parser.parse_args()
120
    if len(args) != 1:
121
        parser.error("You must give a talk ID")
122
    try:
123
        int(args[0])
124
    except ValueError:
125
        parser.error("The argument must be the numeric unique talk ID")
126
    return options, args[0]
127
128
if __name__ == "__main__":
129
    global options
130
    options, talkid = parse_opts()
131
    main(talkid)