Commit 5360422ba56dd333dd3a3e1a8f7589867078536c
- Diff rendering mode:
- inline
- side by side
gitdb.py
(119 / 0)
|   | |||
| 1 | #!/usr/bin/env python | ||
| 2 | #-*- coding:utf-8 -*- | ||
| 3 | # | ||
| 4 | # Copyright © 2009 Germán Póo-Caamaño <gpoo@gnome.org> | ||
| 5 | # | ||
| 6 | # This program is free software; you can redistribute it and/or modify | ||
| 7 | # it under the terms of the GNU General Public License as published by | ||
| 8 | # the Free Software Foundation; either version 2 of the License, or | ||
| 9 | # (at your option) any later version. | ||
| 10 | # | ||
| 11 | # This program is distributed in the hope that it will be useful, | ||
| 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 14 | # GNU Library General Public License for more details. | ||
| 15 | # | ||
| 16 | # You should have received a copy of the GNU General Public License | ||
| 17 | # along with this program; if not, write to the Free Software | ||
| 18 | # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA | ||
| 19 | """ | ||
| 20 | Handle the git log output and dump the changesets into | ||
| 21 | a CSV file. | ||
| 22 | """ | ||
| 23 | |||
| 24 | import os | ||
| 25 | import sys | ||
| 26 | import subprocess | ||
| 27 | from dataloader import load_modules_from_csv, cmp_version | ||
| 28 | from settings import settings | ||
| 29 | import csv | ||
| 30 | import dbdriver | ||
| 31 | |||
| 32 | def gitlog2csv(src, dst_prefix, force=False): | ||
| 33 | config = settings['config'] | ||
| 34 | gitdm = os.path.join(settings['gitdm']) | ||
| 35 | options = '-c %s -o /dev/null -p %s -s -u' % (config, dst_prefix) | ||
| 36 | command = '%s %s' % (gitdm, options) | ||
| 37 | |||
| 38 | if not force and os.path.isfile('%s-changesets.csv' % dst_prefix): | ||
| 39 | return None | ||
| 40 | |||
| 41 | if os.path.exists(src): | ||
| 42 | proc_cat = subprocess.Popen(['cat', src], stdout=subprocess.PIPE) | ||
| 43 | proc_dm = subprocess.Popen(command, shell=True, stdin=proc_cat.stdout, | ||
| 44 | stdout=subprocess.PIPE) | ||
| 45 | output = proc_dm.communicate()[0] | ||
| 46 | return output | ||
| 47 | |||
| 48 | return None | ||
| 49 | |||
| 50 | def git2csv(releases, modules, input_path, ouput_path, force=False): | ||
| 51 | if not os.path.exists(ouput_path): | ||
| 52 | os.makedirs(ouput_path) | ||
| 53 | |||
| 54 | for (repo, pkg) in modules.iteritems(): | ||
| 55 | print >> sys.stderr, '%20s:' % repo | ||
| 56 | |||
| 57 | log_path = os.path.join(input_path, repo) | ||
| 58 | versions = sorted(pkg['tags'].keys(), cmp_version, reverse=True) | ||
| 59 | |||
| 60 | for i in range(0, len(versions)): | ||
| 61 | cur_version = versions[i] | ||
| 62 | |||
| 63 | log_name = '%s-%s.log' % (repo, cur_version) | ||
| 64 | csv_name = '%s-%s' % (repo, cur_version) | ||
| 65 | src = os.path.join(log_path, log_name) | ||
| 66 | dst = os.path.join(ouput_path, csv_name) | ||
| 67 | |||
| 68 | result = gitlog2csv(src, dst, force) | ||
| 69 | |||
| 70 | if result: | ||
| 71 | |||
| 72 | |||
| 73 | def load_db(releases, modules, db_name, db_user, | ||
| 74 | db_password='', db_host=None, csv_path=''): | ||
| 75 | |||
| 76 | # If there is not data directory, there is nothing to process | ||
| 77 | if not os.path.exists(csv_path): | ||
| 78 | return | ||
| 79 | |||
| 80 | #db = dbdriver.sqlitedb(db_name) | ||
| 81 | db = dbdriver.psqldb(db_name, db_user, db_password, db_host) | ||
| 82 | db.drop_tables() | ||
| 83 | db.create_tables() | ||
| 84 | |||
| 85 | for (repo, pkg) in modules.iteritems(): | ||
| 86 | versions = sorted(pkg['tags'].keys(), cmp_version, reverse=True) | ||
| 87 | status = '' | ||
| 88 | |||
| 89 | for i in range(0, len(versions)): | ||
| 90 | version = versions[i] | ||
| 91 | |||
| 92 | changeset = '%s-%s-changesets.csv' % (repo, version) | ||
| 93 | changeset = os.path.join(csv_path, changeset) | ||
| 94 | |||
| 95 | filetype = '%s-%s-filetypes.csv' % (repo, version) | ||
| 96 | filetype = os.path.join(csv_path, filetype) | ||
| 97 | |||
| 98 | print >> sys.stderr, '%-20s:%s→%-6s\r' % (repo, status, version), | ||
| 99 | |||
| 100 | if os.path.isfile(changeset): | ||
| 101 | reader = csv.reader(open(changeset)) | ||
| 102 | db.changeset2db(reader, repo, version) | ||
| 103 | status += (' %s' % version) | ||
| 104 | |||
| 105 | if os.path.isfile(filetype): | ||
| 106 | reader = csv.reader(open(filetype)) | ||
| 107 | db.filetype2db(reader, repo, version) | ||
| 108 | |||
| 109 | print >> sys.stderr, '%-20s:%s %-6s\n' % (repo, status, 'done'), | ||
| 110 | |||
| 111 | if __name__ == '__main__': | ||
| 112 | db_name = 'mining' | ||
| 113 | csv_path = os.path.join(settings['csvoutput']) | ||
| 114 | log_path = settings['logoutput'] # Now it is the input | ||
| 115 | |||
| 116 | releases, modules = load_modules_from_csv(settings['csvfile']) | ||
| 117 | |||
| 118 | git2csv(releases, modules, log_path, csv_path, force=False) | ||
| 119 | load_db(releases, modules, db_name, 'gpoo', csv_path) |

