1
#!/usr/bin/python
2
#
3
# Create a graph of patch flow into the mainline.
4
#
5
# This code is part of the LWN git data miner.
6
#
7
# Copyright 2007-8 LWN.net
8
# Copyright 2007-8 Jonathan Corbet <corbet@lwn.net>
9
#
10
# This file may be distributed under the terms of the GNU General
11
# Public License, version 2.
12
#
13
import sys
14
import patterns
15
16
#
17
# The various types of commit we understand.
18
#
19
class Commit:
20
    def __init__(self, id, parent):
21
        self.id = id
22
        self.parent = parent
23
        self.ismerge = 0
24
        self.treepriority = 0
25
#
26
# Merges are special 
27
#
28
class Merge (Commit):
29
    def __init__(self, id, parent):
30
        Commit.__init__(self, id, parent)
31
        self.ismerge = 1
32
        self.internal = 1 # Two branches within a repo?
33
        self.parents = [ parent ]
34
        
35
    def addparent(self, parentid):
36
        self.parents.append(parentid)
37
38
    def addtree(self, tree):
39
        self.tree = tree
40
        self.internal = 0
41
42
#
43
# Trees: where the commits come from.
44
#
45
class Tree:
46
    def __init__(self, name, url):
47
        self.name = name
48
        self.url = url
49
        self.inputs = [ ]
50
        self.commits = [ ]
51
52
    def addcommit(self, id):
53
        self.commits.append(id)
54
55
    def addinput(self, tree):
56
        if tree not in self.inputs:
57
            self.inputs.append(tree)
58
            # print '%s -> %s' % (tree.name, self.name)
59
60
Mainline = Tree('Mainline',
61
                'git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git')
62
KnownTrees = { Mainline.url: Mainline }
63
64
def NormalizeURL(url):
65
    if url[:4] == 'git:':
66
        return url
67
    if url == '../net-2.6/':
68
        url = 'git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6'
69
    url = url.replace('master.kernel.org:', 'git://git.kernel.org')
70
    if url[-18:] == 'torvalds/linux-2.6':
71
        url += '.git'
72
    if url[:8] == '/pub/scm':
73
        url = 'git://git.kernel.org' + url
74
    return url
75
76
def LookupTree(url):
77
    url = NormalizeURL(url)
78
    try:
79
        return KnownTrees[url]
80
    except KeyError:
81
        tree = Tree(url, url)
82
        KnownTrees[url] = tree
83
        return tree
84
85
#
86
# We track which tree every commit belongs to.
87
#
88
CommitTrees = { }
89
class CTEntry:
90
    def __init__ (self, tree, priority, path):
91
        self.tree = tree
92
        self.priority = priority
93
        self.path = path
94
95
def AddCommitTree(id, entry):
96
#    print 'add: ', id, '[',
97
#    for tree in entry.path:
98
#        print tree.name,
99
#    print ']'
100
    try:
101
        oldentry = CommitTrees[id]
102
        if entry.priority < oldentry.priority:
103
            CommitTrees[id] = entry
104
    except KeyError:
105
        CommitTrees[id] = entry
106
107
108
def LookupCommitTree(id):
109
    try:
110
        return CommitTrees[id]
111
    except KeyError:
112
        print 'Unfound commit %s' % (id)
113
        return CTEntry (Mainline, 0, [])
114
115
#
116
# Input handling with one-line pushback.
117
#
118
SavedLine = None
119
Input = sys.stdin
120
121
def GetLine():
122
    global SavedLine
123
    if SavedLine:
124
        ret = SavedLine
125
        SavedLine = None
126
        return ret
127
    return Input.readline()
128
129
def SaveLine(line):
130
    global SavedLine
131
    SavedLine = line
132
133
#
134
# Pull in a commit and see what it is.
135
#
136
def GetCommit():
137
    #
138
    # Skip junk up to the next commit.
139
    #
140
    while 1:
141
        line = GetLine()
142
        if not line:
143
            return None
144
        m = patterns.Pcommit.match(line)
145
        if m:
146
            break
147
148
    #
149
    # Look at the commit and see how many parents we have.
150
    #
151
    ids = m.group(1).split()
152
    if len(ids) <= 1:
153
        if len(CommitTrees.values()) > 0:
154
            print 'No-Parent commit:', ids[0]
155
            return GetCommit()
156
        print 'Did you run git with --parents?'
157
        print ids
158
        sys.exit(1)
159
    if len(ids) == 2:  # Simple commit
160
        return Commit(ids[0], ids[1])
161
    #
162
    # OK, we have a merge.
163
    #
164
    merge = Merge(ids[0], ids[1])
165
    for id in ids[2:]:
166
        merge.addparent(id)
167
    #
168
    # We need to figure out what kind of merge it is, so read through the
169
    # descriptive text to the merge line.
170
    #
171
    while 1:
172
        line = GetLine()
173
        if not line:
174
            print 'EOF looking for merge line'
175
            return None
176
        #
177
        # Maybe it's an external merge?
178
        #
179
        m = patterns.PExtMerge.match(line)
180
        if m:
181
            merge.addtree(LookupTree(m.group(2)))
182
            return merge
183
        #
184
        # OK, maybe it's internal
185
        #
186
        if patterns.PIntMerge.match(line) or patterns.PIntMerge2.match(line):
187
            #print 'Internal:', line[:-1]
188
            merge.internal = 1
189
            return merge
190
        m = patterns.Pcommit.match(line)
191
        if m:
192
            print 'Hit next commit (%s) looking for merge line' % (m.group(1))
193
            SaveLine(line)
194
            return GetCommit()
195
196
#
197
# Print out a tree and its inputs
198
#
199
def PrintTree(tree, indent = ''):
200
    print '%s%4d %s' % (indent, len(tree.commits), tree.name)
201
    for input in tree.inputs:
202
        PrintTree(input, indent + '    ')
203
204
#
205
# Let's try to build a data structure giving the patch flows.
206
#
207
class FlowNode:
208
    def __init__(self, tree):
209
        self.tree = tree
210
        self.inputs = { }
211
        self.commits = 0
212
213
def BuildFlowTree():
214
    rootnode = FlowNode(Mainline)
215
    notree = Tree('[No tree]', '')
216
    for centry in CommitTrees.values():
217
        path = centry.path
218
        if not path:
219
            path = [ notree ]
220
        FillFlowPath(path, rootnode)
221
    return rootnode
222
223
def FillFlowPath(path, node):
224
    node.commits += 1
225
    if len(path) == 0:
226
        return
227
    next, rest = path[0], path[1:]
228
    try:
229
        nextnode = node.inputs[next.name]
230
    except KeyError:
231
        nextnode = node.inputs[next.name] = FlowNode(next)
232
    return FillFlowPath(rest, nextnode)
233
234
def PrintFlowTree(ftree, indent = ''):
235
    print '%s%3d %s' % (indent, ftree.commits, ftree.tree.name)
236
    inputs = ftree.inputs.values()
237
    inputs.sort(GVSort)
238
    for input in inputs:
239
        PrintFlowTree(input, indent + '  ')
240
241
#
242
# Something for graphviz
243
#
244
GVHeader = '''digraph "runtree" {
245
graph [ label = "Patch flow into the mainline",
246
        concentrate = true,
247
        nodesep = 0.1,
248
        rankdir = LR ];
249
node [shape = polygon,
250
      sides = 4,
251
      height = 0.3
252
      fontsize = 8];
253
'''
254
255
256
MainlineCommits = 0
257
258
def GVTree(ftree):
259
    global MainlineCommits
260
    MainlineCommits = ftree.commits
261
    gvf = open('runtree.gv', 'w')
262
    gvf.write(GVHeader)
263
    inputs = ftree.inputs.values()
264
    inputs.sort(GVSort)
265
    for input in inputs:
266
        GVPrintNode(gvf, input, 'Mainline')
267
    gvf.write('}\n')
268
269
def GVNodeName(treename):
270
    sname = treename.split('/')
271
    if treename.find('kernel.org') >= 0:
272
        return '%s/%s' % (sname[-2], sname[-1])
273
    sep = treename.find ('://')
274
    if sep > 0:
275
        return treename[sep+3:]
276
    return treename
277
278
def GVSort(n1, n2):
279
    return n2.commits - n1.commits
280
281
def GVPrintNode(gvf, node, parent):
282
    name = GVNodeName(node.tree.name)
283
    gvf.write ('"%s" -> "%s" [taillabel = "%d", labelfontsize = 8' % (name, parent, node.commits))
284
    gvf.write (', arrowsize = 0.5')
285
    if MainlineCommits/node.commits < 20:
286
        gvf.write(', color = red')
287
    elif MainlineCommits/node.commits < 100:
288
        gvf.write(', color = orange');
289
    gvf.write(']\n')
290
    inputs = node.inputs.values()
291
    if inputs:
292
        inputs.sort(GVSort)
293
        for input in inputs:
294
            GVPrintNode(gvf, input, name)
295
296
#
297
# Main code.
298
#
299
commit = GetCommit()
300
ncommits = 0
301
while commit:
302
    ncommits += 1
303
    entry = LookupCommitTree(commit.id)
304
    tree = entry.tree
305
    priority = entry.priority
306
    tree.addcommit(commit.id)
307
    #
308
    # For regular commits, just remember the tree involved
309
    #
310
    if not commit.ismerge:
311
        AddCommitTree(commit.parent, entry)
312
    #
313
    # For merges we have to deal with all the parents.
314
    #
315
    else:
316
        AddCommitTree(commit.parents[0], CTEntry (tree, priority, entry.path))
317
        if commit.internal:
318
            for p in commit.parents[1:]:
319
                path = entry.path + [tree]
320
                AddCommitTree(p, CTEntry (tree, priority, entry.path))
321
        else:
322
            for p in commit.parents[1:]:
323
                path = entry.path + [commit.tree]
324
                AddCommitTree(p, CTEntry (commit.tree, priority + 1,  path))
325
                if commit.tree is not Mainline:
326
                    tree.addinput(commit.tree)
327
    commit = GetCommit()
328
        
329
#PrintTree(Mainline)
330
ftree = BuildFlowTree()
331
PrintFlowTree(ftree)
332
GVTree(ftree)
333
print '%d commits total, %d trees' % (MainlineCommits, len (KnownTrees.keys()))