Added new dump to CSV
[mining-tools:gitdm.git] / csvdump.py
1 #
2 # aggregate per-month statistics for people
3 #
4 import sys, datetime
5 import csv
6
7 class CSVStat:
8     def __init__ (self, name, email, employer, date):
9         self.name = name
10         self.email = email
11         self.employer = employer
12         self.added = self.removed = 0
13         self.date = date
14     def accumulate (self, p):
15         self.added = self.added + p.added
16         self.removed = self.removed + p.removed
17
18 PeriodCommitHash = { }
19
20 def AccumulatePatch (p):
21     date = "%.2d-%.2d-01"%(p.date.year, p.date.month)
22     authdatekey = "%s-%s"%(p.author.name, date)
23     if authdatekey not in PeriodCommitHash:
24         empl = p.author.emailemployer (p.email, p.date)
25         stat = CSVStat (p.author.name, p.email, empl, date)
26         PeriodCommitHash[authdatekey] = stat
27     else:
28         stat = PeriodCommitHash[authdatekey]
29     stat.accumulate (p)
30
31 ChangeSets = []
32 FileTypes = []
33
34 def store_patch(patch):
35     if not patch.merge:
36         employer = patch.author.emailemployer(patch.email, patch.date)
37         employer = employer.name.replace('"', '.').replace ('\\', '.')
38         author = patch.author.name.replace ('"', '.').replace ('\\', '.')
39         author = patch.author.name.replace ("'", '.')
40         try:
41             domain = patch.email.split('@')[1]
42         except:
43             domain = patch.email
44         ChangeSets.append([patch.commit, str(patch.date),
45                            patch.email, domain, author, employer,
46                            patch.added, patch.removed])
47         for (filetype, (added, removed)) in patch.filetypes.iteritems():
48             FileTypes.append([patch.commit, filetype, added, removed])
49
50
51 def save_csv (prefix='data'):
52     # Dump the ChangeSets
53     if len(ChangeSets) > 0:
54         fd = open('%s-changesets.csv' % prefix, 'w')
55         writer = csv.writer (fd, quoting=csv.QUOTE_NONNUMERIC)
56         writer.writerow (['Commit', 'Date', 'Domain',
57                           'Email', 'Name', 'Affliation',
58                           'Added', 'Removed'])
59         for commit in ChangeSets:
60             writer.writerow(commit)
61
62     # Dump the file types
63     if len(FileTypes) > 0:
64         fd = open('%s-filetypes.csv' % prefix, 'w')
65         writer = csv.writer (fd, quoting=csv.QUOTE_NONNUMERIC)
66
67         writer.writerow (['Commit', 'Type', 'Added', 'Removed'])
68         for commit in FileTypes:
69             writer.writerow(commit)
70
71
72
73 def OutputCSV (file):
74     if file is None:
75         return
76     writer = csv.writer (file, quoting=csv.QUOTE_NONNUMERIC)
77     writer.writerow (['Name', 'Email', 'Affliation', 'Date',
78                       'Added', 'Removed'])
79     for date, stat in PeriodCommitHash.items():
80         # sanitise names " is common and \" sometimes too
81         empl_name = stat.employer.name.replace ('"', '.').replace ('\\', '.')
82         author_name = stat.name.replace ('"', '.').replace ('\\', '.')
83         writer.writerow ([author_name, stat.email, empl_name, stat.date,
84                           stat.added, stat.removed])
85
86 __all__ = [  'AccumulatePatch', 'OutputCSV', 'store_patch' ]