| 1 |
#!/usr/bin/python |
| 2 |
# |
| 3 |
# Create a graph of patch flow into the mainline. |
| 4 |
# |
| 5 |
# This code is part of the LWN git data miner. |
| 6 |
# |
| 7 |
# Copyright 2007-8 LWN.net |
| 8 |
# Copyright 2007-8 Jonathan Corbet <corbet@lwn.net> |
| 9 |
# |
| 10 |
# This file may be distributed under the terms of the GNU General |
| 11 |
# Public License, version 2. |
| 12 |
# |
| 13 |
import sys |
| 14 |
import patterns |
| 15 |
|
| 16 |
# |
| 17 |
# The various types of commit we understand. |
| 18 |
# |
| 19 |
class Commit: |
| 20 |
def __init__(self, id, parent): |
| 21 |
self.id = id |
| 22 |
self.parent = parent |
| 23 |
self.ismerge = 0 |
| 24 |
self.treepriority = 0 |
| 25 |
# |
| 26 |
# Merges are special |
| 27 |
# |
| 28 |
class Merge (Commit): |
| 29 |
def __init__(self, id, parent): |
| 30 |
Commit.__init__(self, id, parent) |
| 31 |
self.ismerge = 1 |
| 32 |
self.internal = 1 # Two branches within a repo? |
| 33 |
self.parents = [ parent ] |
| 34 |
|
| 35 |
def addparent(self, parentid): |
| 36 |
self.parents.append(parentid) |
| 37 |
|
| 38 |
def addtree(self, tree): |
| 39 |
self.tree = tree |
| 40 |
self.internal = 0 |
| 41 |
|
| 42 |
# |
| 43 |
# Trees: where the commits come from. |
| 44 |
# |
| 45 |
class Tree: |
| 46 |
def __init__(self, name, url): |
| 47 |
self.name = name |
| 48 |
self.url = url |
| 49 |
self.inputs = [ ] |
| 50 |
self.commits = [ ] |
| 51 |
|
| 52 |
def addcommit(self, id): |
| 53 |
self.commits.append(id) |
| 54 |
|
| 55 |
def addinput(self, tree): |
| 56 |
if tree not in self.inputs: |
| 57 |
self.inputs.append(tree) |
| 58 |
# print '%s -> %s' % (tree.name, self.name) |
| 59 |
|
| 60 |
Mainline = Tree('Mainline', |
| 61 |
'git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git') |
| 62 |
KnownTrees = { Mainline.url: Mainline } |
| 63 |
|
| 64 |
def NormalizeURL(url): |
| 65 |
if url[:4] == 'git:': |
| 66 |
return url |
| 67 |
if url == '../net-2.6/': |
| 68 |
url = 'git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6' |
| 69 |
url = url.replace('master.kernel.org:', 'git://git.kernel.org') |
| 70 |
if url[-18:] == 'torvalds/linux-2.6': |
| 71 |
url += '.git' |
| 72 |
if url[:8] == '/pub/scm': |
| 73 |
url = 'git://git.kernel.org' + url |
| 74 |
return url |
| 75 |
|
| 76 |
def LookupTree(url): |
| 77 |
url = NormalizeURL(url) |
| 78 |
try: |
| 79 |
return KnownTrees[url] |
| 80 |
except KeyError: |
| 81 |
tree = Tree(url, url) |
| 82 |
KnownTrees[url] = tree |
| 83 |
return tree |
| 84 |
|
| 85 |
# |
| 86 |
# We track which tree every commit belongs to. |
| 87 |
# |
| 88 |
CommitTrees = { } |
| 89 |
class CTEntry: |
| 90 |
def __init__ (self, tree, priority, path): |
| 91 |
self.tree = tree |
| 92 |
self.priority = priority |
| 93 |
self.path = path |
| 94 |
|
| 95 |
def AddCommitTree(id, entry): |
| 96 |
# print 'add: ', id, '[', |
| 97 |
# for tree in entry.path: |
| 98 |
# print tree.name, |
| 99 |
# print ']' |
| 100 |
try: |
| 101 |
oldentry = CommitTrees[id] |
| 102 |
if entry.priority < oldentry.priority: |
| 103 |
CommitTrees[id] = entry |
| 104 |
except KeyError: |
| 105 |
CommitTrees[id] = entry |
| 106 |
|
| 107 |
|
| 108 |
def LookupCommitTree(id): |
| 109 |
try: |
| 110 |
return CommitTrees[id] |
| 111 |
except KeyError: |
| 112 |
print 'Unfound commit %s' % (id) |
| 113 |
return CTEntry (Mainline, 0, []) |
| 114 |
|
| 115 |
# |
| 116 |
# Input handling with one-line pushback. |
| 117 |
# |
| 118 |
SavedLine = None |
| 119 |
Input = sys.stdin |
| 120 |
|
| 121 |
def GetLine(): |
| 122 |
global SavedLine |
| 123 |
if SavedLine: |
| 124 |
ret = SavedLine |
| 125 |
SavedLine = None |
| 126 |
return ret |
| 127 |
return Input.readline() |
| 128 |
|
| 129 |
def SaveLine(line): |
| 130 |
global SavedLine |
| 131 |
SavedLine = line |
| 132 |
|
| 133 |
# |
| 134 |
# Pull in a commit and see what it is. |
| 135 |
# |
| 136 |
def GetCommit(): |
| 137 |
# |
| 138 |
# Skip junk up to the next commit. |
| 139 |
# |
| 140 |
while 1: |
| 141 |
line = GetLine() |
| 142 |
if not line: |
| 143 |
return None |
| 144 |
m = patterns.Pcommit.match(line) |
| 145 |
if m: |
| 146 |
break |
| 147 |
|
| 148 |
# |
| 149 |
# Look at the commit and see how many parents we have. |
| 150 |
# |
| 151 |
ids = m.group(1).split() |
| 152 |
if len(ids) <= 1: |
| 153 |
if len(CommitTrees.values()) > 0: |
| 154 |
print 'No-Parent commit:', ids[0] |
| 155 |
return GetCommit() |
| 156 |
print 'Did you run git with --parents?' |
| 157 |
print ids |
| 158 |
sys.exit(1) |
| 159 |
if len(ids) == 2: # Simple commit |
| 160 |
return Commit(ids[0], ids[1]) |
| 161 |
# |
| 162 |
# OK, we have a merge. |
| 163 |
# |
| 164 |
merge = Merge(ids[0], ids[1]) |
| 165 |
for id in ids[2:]: |
| 166 |
merge.addparent(id) |
| 167 |
# |
| 168 |
# We need to figure out what kind of merge it is, so read through the |
| 169 |
# descriptive text to the merge line. |
| 170 |
# |
| 171 |
while 1: |
| 172 |
line = GetLine() |
| 173 |
if not line: |
| 174 |
print 'EOF looking for merge line' |
| 175 |
return None |
| 176 |
# |
| 177 |
# Maybe it's an external merge? |
| 178 |
# |
| 179 |
m = patterns.PExtMerge.match(line) |
| 180 |
if m: |
| 181 |
merge.addtree(LookupTree(m.group(2))) |
| 182 |
return merge |
| 183 |
# |
| 184 |
# OK, maybe it's internal |
| 185 |
# |
| 186 |
if patterns.PIntMerge.match(line) or patterns.PIntMerge2.match(line): |
| 187 |
#print 'Internal:', line[:-1] |
| 188 |
merge.internal = 1 |
| 189 |
return merge |
| 190 |
m = patterns.Pcommit.match(line) |
| 191 |
if m: |
| 192 |
print 'Hit next commit (%s) looking for merge line' % (m.group(1)) |
| 193 |
SaveLine(line) |
| 194 |
return GetCommit() |
| 195 |
|
| 196 |
# |
| 197 |
# Print out a tree and its inputs |
| 198 |
# |
| 199 |
def PrintTree(tree, indent = ''): |
| 200 |
print '%s%4d %s' % (indent, len(tree.commits), tree.name) |
| 201 |
for input in tree.inputs: |
| 202 |
PrintTree(input, indent + ' ') |
| 203 |
|
| 204 |
# |
| 205 |
# Let's try to build a data structure giving the patch flows. |
| 206 |
# |
| 207 |
class FlowNode: |
| 208 |
def __init__(self, tree): |
| 209 |
self.tree = tree |
| 210 |
self.inputs = { } |
| 211 |
self.commits = 0 |
| 212 |
|
| 213 |
def BuildFlowTree(): |
| 214 |
rootnode = FlowNode(Mainline) |
| 215 |
notree = Tree('[No tree]', '') |
| 216 |
for centry in CommitTrees.values(): |
| 217 |
path = centry.path |
| 218 |
if not path: |
| 219 |
path = [ notree ] |
| 220 |
FillFlowPath(path, rootnode) |
| 221 |
return rootnode |
| 222 |
|
| 223 |
def FillFlowPath(path, node): |
| 224 |
node.commits += 1 |
| 225 |
if len(path) == 0: |
| 226 |
return |
| 227 |
next, rest = path[0], path[1:] |
| 228 |
try: |
| 229 |
nextnode = node.inputs[next.name] |
| 230 |
except KeyError: |
| 231 |
nextnode = node.inputs[next.name] = FlowNode(next) |
| 232 |
return FillFlowPath(rest, nextnode) |
| 233 |
|
| 234 |
def PrintFlowTree(ftree, indent = ''): |
| 235 |
print '%s%3d %s' % (indent, ftree.commits, ftree.tree.name) |
| 236 |
inputs = ftree.inputs.values() |
| 237 |
inputs.sort(GVSort) |
| 238 |
for input in inputs: |
| 239 |
PrintFlowTree(input, indent + ' ') |
| 240 |
|
| 241 |
# |
| 242 |
# Something for graphviz |
| 243 |
# |
| 244 |
GVHeader = '''digraph "runtree" { |
| 245 |
graph [ label = "Patch flow into the mainline", |
| 246 |
concentrate = true, |
| 247 |
nodesep = 0.1, |
| 248 |
rankdir = LR ]; |
| 249 |
node [shape = polygon, |
| 250 |
sides = 4, |
| 251 |
height = 0.3 |
| 252 |
fontsize = 8]; |
| 253 |
''' |
| 254 |
|
| 255 |
|
| 256 |
MainlineCommits = 0 |
| 257 |
|
| 258 |
def GVTree(ftree): |
| 259 |
global MainlineCommits |
| 260 |
MainlineCommits = ftree.commits |
| 261 |
gvf = open('runtree.gv', 'w') |
| 262 |
gvf.write(GVHeader) |
| 263 |
inputs = ftree.inputs.values() |
| 264 |
inputs.sort(GVSort) |
| 265 |
for input in inputs: |
| 266 |
GVPrintNode(gvf, input, 'Mainline') |
| 267 |
gvf.write('}\n') |
| 268 |
|
| 269 |
def GVNodeName(treename): |
| 270 |
sname = treename.split('/') |
| 271 |
if treename.find('kernel.org') >= 0: |
| 272 |
return '%s/%s' % (sname[-2], sname[-1]) |
| 273 |
sep = treename.find ('://') |
| 274 |
if sep > 0: |
| 275 |
return treename[sep+3:] |
| 276 |
return treename |
| 277 |
|
| 278 |
def GVSort(n1, n2): |
| 279 |
return n2.commits - n1.commits |
| 280 |
|
| 281 |
def GVPrintNode(gvf, node, parent): |
| 282 |
name = GVNodeName(node.tree.name) |
| 283 |
gvf.write ('"%s" -> "%s" [taillabel = "%d", labelfontsize = 8' % (name, parent, node.commits)) |
| 284 |
gvf.write (', arrowsize = 0.5') |
| 285 |
if MainlineCommits/node.commits < 20: |
| 286 |
gvf.write(', color = red') |
| 287 |
elif MainlineCommits/node.commits < 100: |
| 288 |
gvf.write(', color = orange'); |
| 289 |
gvf.write(']\n') |
| 290 |
inputs = node.inputs.values() |
| 291 |
if inputs: |
| 292 |
inputs.sort(GVSort) |
| 293 |
for input in inputs: |
| 294 |
GVPrintNode(gvf, input, name) |
| 295 |
|
| 296 |
# |
| 297 |
# Main code. |
| 298 |
# |
| 299 |
commit = GetCommit() |
| 300 |
ncommits = 0 |
| 301 |
while commit: |
| 302 |
ncommits += 1 |
| 303 |
entry = LookupCommitTree(commit.id) |
| 304 |
tree = entry.tree |
| 305 |
priority = entry.priority |
| 306 |
tree.addcommit(commit.id) |
| 307 |
# |
| 308 |
# For regular commits, just remember the tree involved |
| 309 |
# |
| 310 |
if not commit.ismerge: |
| 311 |
AddCommitTree(commit.parent, entry) |
| 312 |
# |
| 313 |
# For merges we have to deal with all the parents. |
| 314 |
# |
| 315 |
else: |
| 316 |
AddCommitTree(commit.parents[0], CTEntry (tree, priority, entry.path)) |
| 317 |
if commit.internal: |
| 318 |
for p in commit.parents[1:]: |
| 319 |
path = entry.path + [tree] |
| 320 |
AddCommitTree(p, CTEntry (tree, priority, entry.path)) |
| 321 |
else: |
| 322 |
for p in commit.parents[1:]: |
| 323 |
path = entry.path + [commit.tree] |
| 324 |
AddCommitTree(p, CTEntry (commit.tree, priority + 1, path)) |
| 325 |
if commit.tree is not Mainline: |
| 326 |
tree.addinput(commit.tree) |
| 327 |
commit = GetCommit() |
| 328 |
|
| 329 |
#PrintTree(Mainline) |
| 330 |
ftree = BuildFlowTree() |
| 331 |
PrintFlowTree(ftree) |
| 332 |
GVTree(ftree) |
| 333 |
print '%d commits total, %d trees' % (MainlineCommits, len (KnownTrees.keys())) |