Commit 3353db981b5e100e367b60fb2d4c356825161a10

Enconding problems, save things on a file too
failburn.py
(15 / 10)
  
2222cookie_jar = cookielib.CookieJar()
2323opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie_jar))
2424
25output = file('results.txt')
25output = file('results.txt', 'wb')
2626
2727for source in sources:
2828 print source + ':'
29 output.write(source + '\n')
30
2931 content = csv.reader(open(source + '.csv'), delimiter=';')
3032
3133 # The data have two headers
3535 content.next()
3636 for row in content:
3737 print row[0], row[1]
38 #print >>output, row[0], row[1]
38 output.write('%s %s\n' % (row[0], row[1]))
3939
4040 # remove ".", "/" and "-". Do not translate any other characters.
4141 cnpj_clean = row[0].translate(None, './-')
8787
8888 soup = BeautifulSoup(data)
8989 rows = soup.findAll('tr')
90
9091 # 0: Empty
9192 # 1: Donator name
9293 # 2: Empty
114114
115115 for data in rows[2:]:
116116 donation = '%s = %s (%s %s) %s' % (
117 unicode(data.contents[7].contents[0]).strip(),
118 unicode(data.contents[13].contents[2]).strip(),
119 unicode(data.contents[15].contents[0]).strip(),
120 unicode(data.contents[17].contents[0]).strip(),
121 unicode(data.contents[21].contents[0]).strip())
117 data.contents[7].contents[0].strip(),
118 data.contents[13].contents[2].strip(),
119 data.contents[15].contents[0].strip(),
120 data.contents[17].contents[0].strip(),
121 data.contents[21].contents[0].strip())
122122 print donation
123 #print >>output, donation
123 output.write(donation + '\n')
124124 print
125 #print >>output, ''
125 output.write('\n')
126 output.flush()
127
126128 print
127 #print >>output, ''
129 output.write('\n')