Commit 351ffe7256225a953497fd29d368b7212617dd31

Retriving information, but saving on disk (Not sure the format of the results yet)
failburn.py
(56 / 1)
  
11#!/usr/bin/python
2#
23
34import csv
5import urllib
6import urllib2
7import time
48
9# fake user-agent
10headers = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; ' \
11 'rv:1.9.0.2) Gecko/2008092313 Ubuntu/8.04 (hardy) Firefox/3.1.6'}
12
513sources = ['Inidoneas', 'Suspensas']
614
15def _donations(cnpj, name):
16 """Search for donations from the CNPJ/Name"""
17
718for source in sources:
819 print source + ':'
920 content = csv.reader(open(source + '.csv'), delimiter=';')
21
22 # little dirty trick to skip the first row (first next() gets the row,
23 # second moves to the second row, so the for starts there.)
1024 content.next()
25 content.next()
1126
1227 for row in content: # skip the headers
13 print row[0], row[1].decode('iso-8859-1')
28 print row[0], row[1].decode('iso-8859-1'),
29
30 # remove ".", "/" and "-". Do not translate any other characters.
31 cnpj_clean = row[0].translate(None, './-')
32
33 form = {
34 'sgUe': '',
35 'acao': 'Resumo',
36 'nomeFornecDoador': 'Doador',
37 'rdTipo': 'receita',
38 'nmDoador': row[1],
39 'cdCpfCnpjDoador': cnpj_clean,
40 'cdEspRecurso': '-1' # any
41 }
42
43 request = urllib2.Request(url='http://www4.tse.gov.br/' \
44 'spce2008ConsultaFinanciamento/' \
45 'consultaReceitaDespesaCandidatoServlet.do')
46
47 for key in headers:
48 #print 'Header', key, '=', headers[key]
49 request.add_header(key, headers[key])
50
51 body = urllib.urlencode(form)
52 #print body
53
54 request.add_data(body)
55
56 response = urllib2.urlopen(request)
57 data = response.read()
58 response.close()
59
60 name = cnpj_clean + '.result'
61 output = file(name, 'w')
62 output.write(data)
63 output.close()
64
65 print ' --> done'
66
67 time.sleep(5) # So we don't "burn" the server.
68
1469
1570 print