Commit a891144f1ac7d3c3ee8105169f954bf65aa1c02f
- Diff rendering mode:
- inline
- side by side
failburn.py
(12 / 18)
|   | |||
| 9 | 9 | ||
| 10 | 10 | # fake user-agent | |
| 11 | 11 | headers = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; ' \ | |
| 12 | 'rv:1.9.0.2) Gecko/2008092313 Ubuntu/8.04 (hardy) Firefox/3.1.6'} | ||
| 12 | 'rv:1.9.0.2) Gecko/2008092313 Ubuntu/8.04 (hardy) Firefox/3.1.6'} | ||
| 13 | 13 | ||
| 14 | 14 | #sources = ['Inidoneas', 'Suspensas'] | |
| 15 | 15 | sources = ['Testing'] | |
| 16 | 16 | ||
| 17 | def _donations(cnpj, name): | ||
| 18 | """Search for donations from the CNPJ/Name""" | ||
| 19 | |||
| 20 | 17 | for source in sources: | |
| 21 | 18 | print source + ':' | |
| 22 | 19 | content = csv.reader(open(source + '.csv'), delimiter=';') | |
| 23 | 20 | ||
| 24 | # little dirty trick to skip the first row (first next() gets the row, | ||
| 25 | # second moves to the second row, so the for starts there.) | ||
| 21 | # The data have two headers | ||
| 26 | 22 | content.next() | |
| 27 | 23 | content.next() | |
| 28 | 24 | ||
| 29 | for row in content: # skip the headers | ||
| 25 | for row in content: | ||
| 30 | 26 | print row[0], row[1].decode('iso-8859-1'), | |
| 31 | 27 | ||
| 32 | 28 | # remove ".", "/" and "-". Do not translate any other characters. | |
| … | … | ||
| 34 | 34 | continue | |
| 35 | 35 | ||
| 36 | 36 | form = { | |
| 37 | 'sgUe': '', | ||
| 38 | 'acao': 'Resumo', | ||
| 37 | 'acao': 'resumo', | ||
| 38 | 'cdCpfCnpjDoador': cnpj_clean, | ||
| 39 | 'cdEspRecurso': '-1', # any | ||
| 40 | 'dsCargo': '', | ||
| 41 | 'municipio': '', | ||
| 42 | 'nmCandidato': '', | ||
| 39 | 43 | 'nomeFornecDoador': 'Doador', | |
| 44 | 'nrCand': '', | ||
| 40 | 45 | 'rdTipo': 'receita', | |
| 41 | 'nmDoador': row[1], | ||
| 42 | 'cdCpfCnpjDoador': cnpj_clean, | ||
| 43 | 'cdEspRecurso': '-1' # any | ||
| 46 | 'sgPartido': '', | ||
| 47 | 'sgUe': '', | ||
| 44 | 48 | } | |
| 45 | 49 | ||
| 46 | 50 | request = urllib2.Request(url='http://www4.tse.gov.br/' \ | |
| … | … | ||
| 52 | 52 | 'consultaReceitaDespesaCandidatoServlet.do') | |
| 53 | 53 | ||
| 54 | 54 | for key in headers: | |
| 55 | #print 'Header', key, '=', headers[key] | ||
| 56 | 55 | request.add_header(key, headers[key]) | |
| 57 | 56 | ||
| 58 | 57 | body = urllib.urlencode(form) | |
| 59 | #print body | ||
| 60 | |||
| 61 | 58 | request.add_data(body) | |
| 62 | 59 | ||
| 63 | 60 | response = urllib2.urlopen(request) | |
| … | … | ||
| 66 | 66 | output.close() | |
| 67 | 67 | ||
| 68 | 68 | print ' --> done' | |
| 69 | |||
| 70 | time.sleep(30) # So we don't "burn" the server. | ||
| 71 | |||
| 72 | 69 | ||
| 73 | 70 |

