1
#
2
# The "database".
3
#
4
# This code is part of the LWN git data miner.
5
#
6
# Copyright 2007-8 LWN.net
7
# Copyright 2007-8 Jonathan Corbet <corbet@lwn.net>
8
#
9
# This file may be distributed under the terms of the GNU General
10
# Public License, version 2.
11
#
12
import sys, datetime
13
14
15
class Hacker:
16
    def __init__ (self, name, id, elist, email):
17
        self.name = name
18
        self.id = id
19
        self.employer = [ elist ]
20
        self.email = [ email ]
21
        self.added = self.removed = 0
22
        self.patches = [ ]
23
        self.signoffs = [ ]
24
        self.reviews = [ ]
25
        self.tested = [ ]
26
        self.reports = [ ]
27
        self.testcred = self.repcred = 0
28
29
    def addemail (self, email, elist):
30
        self.email.append (email)
31
        self.employer.append (elist)
32
        HackersByEmail[email] = self
33
34
    def emailemployer (self, email, date):
35
        for i in range (0, len (self.email)):
36
            if self.email[i] == email:
37
                for edate, empl in self.employer[i]:
38
                    if edate > date:
39
                        return empl
40
        print 'OOPS.  ', self.name, self.employer, self.email, email, date
41
        return None # Should not happen
42
43
    def addpatch (self, patch):
44
        self.added += patch.added
45
        self.removed += patch.removed
46
        self.patches.append (patch)
47
48
    #
49
    # There's got to be a better way.
50
    #
51
    def addsob (self, patch):
52
        self.signoffs.append (patch)
53
    def addreview (self, patch):
54
        self.reviews.append (patch)
55
    def addtested (self, patch):
56
        self.tested.append (patch)
57
    def addreport (self, patch):
58
        self.reports.append (patch)
59
60
    def reportcredit (self, patch):
61
        self.repcred += 1
62
    def testcredit (self, patch):
63
        self.testcred += 1
64
65
HackersByName = { }
66
HackersByEmail = { }
67
HackersByID = { }
68
MaxID = 0
69
70
def StoreHacker (name, elist, email):
71
    global MaxID
72
73
    id = MaxID
74
    MaxID += 1
75
    h = Hacker (name, id, elist, email)
76
    HackersByName[name] = h
77
    HackersByEmail[email] = h
78
    HackersByID[id] = h
79
    return h
80
81
def LookupEmail (addr):
82
    try:
83
        return HackersByEmail[addr]
84
    except KeyError:
85
        return None
86
87
def LookupName (name):
88
    try:
89
        return HackersByName[name]
90
    except KeyError:
91
        return None
92
93
def LookupID (id):
94
    try:
95
        return HackersByID[id]
96
    except KeyError:
97
        return None
98
99
def AllHackers ():
100
    return HackersByID.values ()
101
#    return [h for h in HackersByID.values ()] #  if (h.added + h.removed) > 0]
102
103
def DumpDB ():
104
    out = open ('database.dump', 'w')
105
    names = HackersByName.keys ()
106
    names.sort ()
107
    for name in names:
108
        h = HackersByName[name]
109
        out.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h.id, h.name,
110
                                                        len (h.patches),
111
                                                        h.added, h.removed,
112
                                                        len (h.signoffs)))
113
        for i in range (0, len (h.email)):
114
            out.write ('\t%s -> \n' % (h.email[i]))
115
            for date, empl in h.employer[i]:
116
                out.write ('\t\t %d-%d-%d %s\n' % (date.year, date.month, date.day,
117
                                                 empl.name))
118
119
#
120
# Employer info.
121
#
122
class Employer:
123
    def __init__ (self, name):
124
        self.name = name
125
        self.added = self.removed = self.count = self.changed = 0
126
        self.sobs = 0
127
        self.hackers = [ ]
128
129
    def AddCSet (self, patch):
130
        self.added += patch.added
131
        self.removed += patch.removed
132
        self.changed += max(patch.added, patch.removed)
133
        self.count += 1
134
        if patch.author not in self.hackers:
135
            self.hackers.append (patch.author)
136
137
    def AddSOB (self):
138
        self.sobs += 1
139
140
Employers = { }
141
142
def GetEmployer (name):
143
    try:
144
        return Employers[name]
145
    except KeyError:
146
        e = Employer (name)
147
        Employers[name] = e
148
        return e
149
150
def AllEmployers ():
151
    return Employers.values ()
152
153
#
154
# The email map.
155
#
156
EmailAliases = { }
157
158
def AddEmailAlias (variant, canonical):
159
    if EmailAliases.has_key (variant):
160
        sys.stderr.write ('Duplicate email alias for %s\n' % (variant))
161
    EmailAliases[variant] = canonical
162
163
def RemapEmail (email):
164
    email = email.lower ()
165
    try:
166
        return EmailAliases[email]
167
    except KeyError:
168
        return email
169
170
#
171
# Email-to-employer mapping.
172
#
173
EmailToEmployer = { }
174
nextyear = datetime.date.today () + datetime.timedelta (days = 365)
175
176
def AddEmailEmployerMapping (email, employer, end = nextyear):
177
    if end is None:
178
        end = nextyear
179
    email = email.lower ()
180
    empl = GetEmployer (employer)
181
    try:
182
        l = EmailToEmployer[email]
183
        for i in range (0, len(l)):
184
            date, xempl = l[i]
185
            if date == end:  # probably both nextyear
186
                print 'WARNING: duplicate email/empl for %s' % (email)
187
            if date > end:
188
                l.insert (i, (end, empl))
189
                return
190
        l.append ((end, empl))
191
    except KeyError:
192
        EmailToEmployer[email] = [(end, empl)]
193
194
def MapToEmployer (email, unknown = 0):
195
    # Somebody sometimes does s/@/ at /; let's fix it.
196
    email = email.lower ().replace (' at ', '@')
197
    try:
198
        return EmailToEmployer[email]
199
    except KeyError:
200
        pass
201
    namedom = email.split ('@')
202
    if len (namedom) < 2:
203
        print 'Oops...funky email %s' % email
204
        return [(nextyear, GetEmployer ('Funky'))]
205
    s = namedom[1].split ('.')
206
    for dots in range (len (s) - 2, -1, -1):
207
        addr = '.'.join (s[dots:])
208
        try:
209
            return EmailToEmployer[addr]
210
        except KeyError:
211
            pass
212
    if unknown:
213
        return [(nextyear, GetEmployer ('(Unknown)'))]
214
    return [(nextyear, GetEmployer (email))]
215
216
217
def LookupEmployer (email, mapunknown = 0):
218
    elist = MapToEmployer (email, mapunknown)
219
    return elist # GetEmployer (ename)