Browse Source

Instruction-level granularity in callgrind output

When generating callgrind format output, produce cost lines at
instruction granularity. This allows visualizers supporting the
callgrind format to display instruction-level profiling information.

We also need to provide the object file (ob=) in order for tools to find
the object file to disassemble when displaying assembly.

We opportunistically group cost lines corressponding to the same
function together, reducing the number of superfluous description lines.
Subposition compression (relative position numbering) is also used to
reduce the output size.
Michael Pratt 8 years ago
parent
commit
77bbb10981

+ 4
- 1
internal/driver/driver.go View File

130
 	case "proto", "raw":
130
 	case "proto", "raw":
131
 		trim, focus, tagfocus, hide = false, false, false, false
131
 		trim, focus, tagfocus, hide = false, false, false, false
132
 		v.set("addresses", "t")
132
 		v.set("addresses", "t")
133
+	case "callgrind", "kcachegrind":
134
+		trim = false
135
+		v.set("addresses", "t")
133
 	case "disasm", "weblist":
136
 	case "disasm", "weblist":
134
 		trim = false
137
 		trim = false
135
 		v.set("addressnoinlines", "t")
138
 		v.set("addressnoinlines", "t")
136
 	case "peek":
139
 	case "peek":
137
 		trim, focus, hide = false, false, false
140
 		trim, focus, hide = false, false, false
138
-	case "kcachegrind", "callgrind", "list":
141
+	case "list":
139
 		v.set("nodecount", "0")
142
 		v.set("nodecount", "0")
140
 		v.set("lines", "t")
143
 		v.set("lines", "t")
141
 	case "text", "top", "topproto":
144
 	case "text", "top", "topproto":

+ 12
- 3
internal/driver/driver_test.go View File

18
 	"bytes"
18
 	"bytes"
19
 	"fmt"
19
 	"fmt"
20
 	"io/ioutil"
20
 	"io/ioutil"
21
+	"os"
21
 	"regexp"
22
 	"regexp"
22
 	"strconv"
23
 	"strconv"
23
 	"strings"
24
 	"strings"
34
 	// Override weblist command to collect output in buffer
35
 	// Override weblist command to collect output in buffer
35
 	pprofCommands["weblist"].postProcess = nil
36
 	pprofCommands["weblist"].postProcess = nil
36
 
37
 
38
+	// Our mockObjTool.Open will always return success, causing
39
+	// driver.locateBinaries to "find" the binaries below in a non-existant
40
+	// directory. As a workaround, point the search path to the fake
41
+	// directory containing out fake binaries.
42
+	savePath := os.Getenv("PPROF_BINARY_PATH")
43
+	os.Setenv("PPROF_BINARY_PATH", "/path/to")
44
+	defer os.Setenv("PPROF_BINARY_PATH", savePath)
45
+
37
 	testcase := []struct {
46
 	testcase := []struct {
38
 		flags, source string
47
 		flags, source string
39
 	}{
48
 	}{
445
 			ID:              1,
454
 			ID:              1,
446
 			Start:           0x1000,
455
 			Start:           0x1000,
447
 			Limit:           0x4000,
456
 			Limit:           0x4000,
448
-			File:            "testbinary",
457
+			File:            "/path/to/testbinary",
449
 			HasFunctions:    true,
458
 			HasFunctions:    true,
450
 			HasFilenames:    true,
459
 			HasFilenames:    true,
451
 			HasLineNumbers:  true,
460
 			HasLineNumbers:  true,
564
 			ID:              1,
573
 			ID:              1,
565
 			Start:           0x1000,
574
 			Start:           0x1000,
566
 			Limit:           0x4000,
575
 			Limit:           0x4000,
567
-			File:            "testbinary",
576
+			File:            "/path/to/testbinary",
568
 			HasFunctions:    true,
577
 			HasFunctions:    true,
569
 			HasFilenames:    true,
578
 			HasFilenames:    true,
570
 			HasLineNumbers:  true,
579
 			HasLineNumbers:  true,
865
 			ID:    1,
874
 			ID:    1,
866
 			Start: testStart,
875
 			Start: testStart,
867
 			Limit: 0x4000,
876
 			Limit: 0x4000,
868
-			File:  "testbinary",
877
+			File:  "/path/to/testbinary",
869
 		},
878
 		},
870
 	}
879
 	}
871
 
880
 

+ 3
- 2
internal/driver/interactive_test.go View File

266
 				"tagignore": "ignore1|ignore2",
266
 				"tagignore": "ignore1|ignore2",
267
 			},
267
 			},
268
 		},
268
 		},
269
-		{"weblist  find -test",
269
+		{
270
+			"weblist  find -test",
270
 			map[string]string{
271
 			map[string]string{
271
 				"functions":        "false",
272
 				"functions":        "false",
272
 				"addressnoinlines": "true",
273
 				"addressnoinlines": "true",
280
 			"callgrind   fun -ignore  >out",
281
 			"callgrind   fun -ignore  >out",
281
 			map[string]string{
282
 			map[string]string{
282
 				"functions": "false",
283
 				"functions": "false",
283
-				"lines":     "true",
284
+				"addresses": "true",
284
 				"nodecount": "0",
285
 				"nodecount": "0",
285
 				"cum":       "false",
286
 				"cum":       "false",
286
 				"flat":      "true",
287
 				"flat":      "true",

+ 46
- 35
internal/driver/testdata/pprof.cpu.callgrind View File

1
+positions: instr line
1
 events: cpu(ms)
2
 events: cpu(ms)
3
+
4
+ob=(1) /path/to/testbinary
2
 fl=(1) testdata/file1000.src
5
 fl=(1) testdata/file1000.src
3
 fn=(1) line1000
6
 fn=(1) line1000
4
-1 1100
7
+0x1000 1 1100
5
 
8
 
9
+ob=(1)
6
 fl=(2) testdata/file2000.src
10
 fl=(2) testdata/file2000.src
7
 fn=(2) line2001
11
 fn=(2) line2001
8
-9 10
12
++4096 9 10
9
 cfl=(1)
13
 cfl=(1)
10
 cfn=(1)
14
 cfn=(1)
11
-calls=0 1
12
-9 1000
15
+calls=0 * 1
16
+* * 1000
13
 
17
 
18
+ob=(1)
14
 fl=(3) testdata/file3000.src
19
 fl=(3) testdata/file3000.src
15
 fn=(3) line3002
20
 fn=(3) line3002
16
-2 10
21
++4096 2 10
17
 cfl=(2)
22
 cfl=(2)
18
 cfn=(4) line2000
23
 cfn=(4) line2000
19
-calls=0 4
20
-2 1000
24
+calls=0 * 4
25
+* * 1000
21
 
26
 
27
+ob=(1)
22
 fl=(2)
28
 fl=(2)
23
 fn=(4)
29
 fn=(4)
24
-4 0
30
+-4096 4 0
25
 cfl=(2)
31
 cfl=(2)
26
 cfn=(2)
32
 cfn=(2)
27
-calls=0 9
28
-4 1010
33
+calls=0 -4096 9
34
+* * 1010
29
 
35
 
36
+ob=(1)
30
 fl=(3)
37
 fl=(3)
31
 fn=(5) line3000
38
 fn=(5) line3000
32
-6 0
39
++4096 6 0
33
 cfl=(3)
40
 cfl=(3)
34
 cfn=(6) line3001
41
 cfn=(6) line3001
35
-calls=0 5
36
-6 1010
42
+calls=0 +4096 5
43
+* * 1010
37
 
44
 
45
+ob=(1)
38
 fl=(3)
46
 fl=(3)
39
-fn=(5)
40
-7 0
47
+fn=(6)
48
+* 5 0
41
 cfl=(3)
49
 cfl=(3)
42
 cfn=(3)
50
 cfn=(3)
43
-calls=0 5
44
-7 10
51
+calls=0 * 2
52
+* * 1010
45
 
53
 
54
+ob=(1)
46
 fl=(3)
55
 fl=(3)
47
 fn=(5)
56
 fn=(5)
48
-9 0
57
++1 9 0
49
 cfl=(3)
58
 cfl=(3)
50
 cfn=(6)
59
 cfn=(6)
51
-calls=0 8
52
-9 100
53
-
54
-fl=(3)
55
-fn=(6)
56
-5 0
57
-cfl=(3)
58
-cfn=(3)
59
-calls=0 2
60
-5 1010
60
+calls=0 +1 8
61
+* * 100
61
 
62
 
63
+ob=(1)
62
 fl=(3)
64
 fl=(3)
63
 fn=(6)
65
 fn=(6)
64
-8 0
66
+* 8 0
65
 cfl=(1)
67
 cfl=(1)
66
 cfn=(1)
68
 cfn=(1)
67
-calls=0 1
68
-8 100
69
+calls=0 -8193 1
70
+* * 100
69
 
71
 
72
+ob=(1)
73
+fl=(3)
74
+fn=(5)
75
++1 7 0
76
+cfl=(3)
77
+cfn=(3)
78
+calls=0 +1 5
79
+* * 10
80
+
81
+ob=(1)
70
 fl=(3)
82
 fl=(3)
71
 fn=(3)
83
 fn=(3)
72
-5 0
84
+* 5 0
73
 cfl=(2)
85
 cfl=(2)
74
 cfn=(4)
86
 cfn=(4)
75
-calls=0 4
76
-5 10
77
-
87
+calls=0 -4098 4
88
+* * 10

+ 57
- 22
internal/driver/testdata/pprof.heap.callgrind View File

1
+positions: instr line
1
 events: inuse_space(MB)
2
 events: inuse_space(MB)
3
+
4
+ob=
2
 fl=(1) testdata/file2000.src
5
 fl=(1) testdata/file2000.src
3
 fn=(1) line2001
6
 fn=(1) line2001
4
-2 62
7
+0x2000 2 62
5
 cfl=(2) testdata/file1000.src
8
 cfl=(2) testdata/file1000.src
6
 cfn=(2) line1000
9
 cfn=(2) line1000
7
-calls=0 1
8
-2 0
10
+calls=0 0x1000 1
11
+* * 0
9
 
12
 
13
+ob=
10
 fl=(3) testdata/file3000.src
14
 fl=(3) testdata/file3000.src
11
 fn=(3) line3002
15
 fn=(3) line3002
12
-3 31
16
++4096 3 31
13
 cfl=(1)
17
 cfl=(1)
14
 cfn=(4) line2000
18
 cfn=(4) line2000
15
-calls=0 3
16
-3 63
19
+calls=0 * 3
20
+* * 0
17
 
21
 
22
+ob=
18
 fl=(2)
23
 fl=(2)
19
 fn=(2)
24
 fn=(2)
20
-1 4
25
+-8192 1 4
21
 
26
 
27
+ob=
22
 fl=(1)
28
 fl=(1)
23
 fn=(4)
29
 fn=(4)
24
-3 0
30
++4096 3 0
25
 cfl=(1)
31
 cfl=(1)
26
 cfn=(1)
32
 cfn=(1)
27
-calls=0 2
28
-3 63
33
+calls=0 +4096 2
34
+* * 63
29
 
35
 
36
+ob=
30
 fl=(3)
37
 fl=(3)
31
 fn=(5) line3000
38
 fn=(5) line3000
32
-4 0
33
-cfl=(3)
34
-cfn=(3)
35
-calls=0 3
36
-4 62
39
++4096 4 0
37
 cfl=(3)
40
 cfl=(3)
38
 cfn=(6) line3001
41
 cfn=(6) line3001
39
-calls=0 2
40
-4 36
42
+calls=0 +4096 2
43
+* * 32
41
 
44
 
45
+ob=
42
 fl=(3)
46
 fl=(3)
43
 fn=(6)
47
 fn=(6)
44
-2 0
48
+* 2 0
45
 cfl=(3)
49
 cfl=(3)
46
 cfn=(3)
50
 cfn=(3)
47
-calls=0 3
48
-2 32
51
+calls=0 * 3
52
+* * 32
53
+
54
+ob=
55
+fl=(3)
56
+fn=(5)
57
++1 4 0
58
+cfl=(3)
59
+cfn=(6)
60
+calls=0 +1 2
61
+* * 3
62
+
63
+ob=
64
+fl=(3)
65
+fn=(6)
66
+* 2 0
49
 cfl=(2)
67
 cfl=(2)
50
 cfn=(2)
68
 cfn=(2)
51
-calls=0 1
52
-2 3
69
+calls=0 -8193 1
70
+* * 3
53
 
71
 
72
+ob=
73
+fl=(3)
74
+fn=(5)
75
++1 4 0
76
+cfl=(3)
77
+cfn=(3)
78
+calls=0 +1 3
79
+* * 62
80
+
81
+ob=
82
+fl=(3)
83
+fn=(3)
84
+* 3 0
85
+cfl=(1)
86
+cfn=(4)
87
+calls=0 -4098 3
88
+* * 62

+ 2
- 2
internal/graph/graph.go View File

138
 		// User requested function name. It was already included.
138
 		// User requested function name. It was already included.
139
 	case i.Objfile != "":
139
 	case i.Objfile != "":
140
 		// Only binary name is available
140
 		// Only binary name is available
141
-		name = append(name, "["+i.Objfile+"]")
141
+		name = append(name, "["+filepath.Base(i.Objfile)+"]")
142
 	default:
142
 	default:
143
 		// Do not leave it empty if there is no information at all.
143
 		// Do not leave it empty if there is no information at all.
144
 		name = append(name, "<unknown>")
144
 		name = append(name, "<unknown>")
481
 func (nm NodeMap) findOrInsertLine(l *profile.Location, li profile.Line, o *Options) *Node {
481
 func (nm NodeMap) findOrInsertLine(l *profile.Location, li profile.Line, o *Options) *Node {
482
 	var objfile string
482
 	var objfile string
483
 	if m := l.Mapping; m != nil && m.File != "" {
483
 	if m := l.Mapping; m != nil && m.File != "" {
484
-		objfile = filepath.Base(m.File)
484
+		objfile = m.File
485
 	}
485
 	}
486
 
486
 
487
 	if ni := nodeInfo(l, li, objfile, o); ni != nil {
487
 	if ni := nodeInfo(l, li, objfile, o); ni != nil {

+ 52
- 8
internal/report/report.go View File

211
 	// Only keep binary names for disassembly-based reports, otherwise
211
 	// Only keep binary names for disassembly-based reports, otherwise
212
 	// remove it to allow merging of functions across binaries.
212
 	// remove it to allow merging of functions across binaries.
213
 	switch o.OutputFormat {
213
 	switch o.OutputFormat {
214
-	case Raw, List, WebList, Dis:
214
+	case Raw, List, WebList, Dis, Callgrind:
215
 		gopt.ObjNames = true
215
 		gopt.ObjNames = true
216
 	}
216
 	}
217
 
217
 
340
 	// Walk all mappings looking for matching functions with samples.
340
 	// Walk all mappings looking for matching functions with samples.
341
 	var objSyms []*objSymbol
341
 	var objSyms []*objSymbol
342
 	for _, m := range prof.Mapping {
342
 	for _, m := range prof.Mapping {
343
-		if !hasSamples[filepath.Base(m.File)] {
343
+		if !hasSamples[m.File] {
344
 			if address == nil || !(m.Start <= *address && *address <= m.Limit) {
344
 			if address == nil || !(m.Start <= *address && *address <= m.Limit) {
345
 				continue
345
 				continue
346
 			}
346
 			}
628
 	rpt.selectOutputUnit(g)
628
 	rpt.selectOutputUnit(g)
629
 
629
 
630
 	nodeNames := getDisambiguatedNames(g)
630
 	nodeNames := getDisambiguatedNames(g)
631
+
632
+	fmt.Fprintln(w, "positions: instr line")
631
 	fmt.Fprintln(w, "events:", o.SampleType+"("+o.OutputUnit+")")
633
 	fmt.Fprintln(w, "events:", o.SampleType+"("+o.OutputUnit+")")
632
 
634
 
635
+	objfiles := make(map[string]int)
633
 	files := make(map[string]int)
636
 	files := make(map[string]int)
634
 	names := make(map[string]int)
637
 	names := make(map[string]int)
638
+
639
+	// prevInfo points to the previous NodeInfo.
640
+	// It is used to group cost lines together as much as possible.
641
+	var prevInfo *graph.NodeInfo
635
 	for _, n := range g.Nodes {
642
 	for _, n := range g.Nodes {
636
-		fmt.Fprintln(w, "fl="+callgrindName(files, n.Info.File))
637
-		fmt.Fprintln(w, "fn="+callgrindName(names, nodeNames[n]))
643
+		if prevInfo == nil || n.Info.Objfile != prevInfo.Objfile || n.Info.File != prevInfo.File || n.Info.Name != prevInfo.Name {
644
+			fmt.Fprintln(w)
645
+			fmt.Fprintln(w, "ob="+callgrindName(objfiles, n.Info.Objfile))
646
+			fmt.Fprintln(w, "fl="+callgrindName(files, n.Info.File))
647
+			fmt.Fprintln(w, "fn="+callgrindName(names, n.Info.Name))
648
+		}
649
+
650
+		addr := callgrindAddress(prevInfo, n.Info.Address)
638
 		sv, _ := measurement.Scale(n.Flat, o.SampleUnit, o.OutputUnit)
651
 		sv, _ := measurement.Scale(n.Flat, o.SampleUnit, o.OutputUnit)
639
-		fmt.Fprintf(w, "%d %d\n", n.Info.Lineno, int64(sv))
652
+		fmt.Fprintf(w, "%s %d %d\n", addr, n.Info.Lineno, int64(sv))
640
 
653
 
641
 		// Print outgoing edges.
654
 		// Print outgoing edges.
642
 		for _, out := range n.Out.Sort() {
655
 		for _, out := range n.Out.Sort() {
645
 			fmt.Fprintln(w, "cfl="+callgrindName(files, callee.Info.File))
658
 			fmt.Fprintln(w, "cfl="+callgrindName(files, callee.Info.File))
646
 			fmt.Fprintln(w, "cfn="+callgrindName(names, nodeNames[callee]))
659
 			fmt.Fprintln(w, "cfn="+callgrindName(names, nodeNames[callee]))
647
 			// pprof doesn't have a flat weight for a call, leave as 0.
660
 			// pprof doesn't have a flat weight for a call, leave as 0.
648
-			fmt.Fprintln(w, "calls=0", callee.Info.Lineno)
649
-			fmt.Fprintln(w, n.Info.Lineno, int64(c))
661
+			fmt.Fprintf(w, "calls=0 %s %d\n", callgrindAddress(prevInfo, callee.Info.Address), callee.Info.Lineno)
662
+			// TODO: This address may be in the middle of a call
663
+			// instruction. It would be best to find the beginning
664
+			// of the instruction, but the tools seem to handle
665
+			// this OK.
666
+			fmt.Fprintf(w, "* * %d\n", int64(c))
650
 		}
667
 		}
651
-		fmt.Fprintln(w)
668
+
669
+		prevInfo = &n.Info
652
 	}
670
 	}
653
 
671
 
654
 	return nil
672
 	return nil
712
 	return fmt.Sprintf("(%d) %s", id, name)
730
 	return fmt.Sprintf("(%d) %s", id, name)
713
 }
731
 }
714
 
732
 
733
+// callgrindAddress implements the callgrind subposition compression scheme if
734
+// possible. If prevInfo != nil, it contains the previous address. The current
735
+// address can be given relative to the previous address, with an explicit +/-
736
+// to indicate it is relative, or * for the same address.
737
+func callgrindAddress(prevInfo *graph.NodeInfo, curr uint64) string {
738
+	abs := fmt.Sprintf("%#x", curr)
739
+	if prevInfo == nil {
740
+		return abs
741
+	}
742
+
743
+	prev := prevInfo.Address
744
+	if prev == curr {
745
+		return "*"
746
+	}
747
+
748
+	diff := int64(curr - prev)
749
+	relative := fmt.Sprintf("%+d", diff)
750
+
751
+	// Only bother to use the relative address if it is actually shorter.
752
+	if len(relative) < len(abs) {
753
+		return relative
754
+	}
755
+
756
+	return abs
757
+}
758
+
715
 // printTree prints a tree-based report in text form.
759
 // printTree prints a tree-based report in text form.
716
 func printTree(w io.Writer, rpt *Report) error {
760
 func printTree(w io.Writer, rpt *Report) error {
717
 	const separator = "----------------------------------------------------------+-------------"
761
 	const separator = "----------------------------------------------------------+-------------"

+ 1
- 1
internal/report/source.go View File

253
 func findMatchingSymbol(objSyms []*objSymbol, ns graph.Nodes) *objSymbol {
253
 func findMatchingSymbol(objSyms []*objSymbol, ns graph.Nodes) *objSymbol {
254
 	for _, n := range ns {
254
 	for _, n := range ns {
255
 		for _, o := range objSyms {
255
 		for _, o := range objSyms {
256
-			if filepath.Base(o.sym.File) == n.Info.Objfile &&
256
+			if filepath.Base(o.sym.File) == filepath.Base(n.Info.Objfile) &&
257
 				o.sym.Start <= n.Info.Address-o.base &&
257
 				o.sym.Start <= n.Info.Address-o.base &&
258
 				n.Info.Address-o.base <= o.sym.End {
258
 				n.Info.Address-o.base <= o.sym.End {
259
 				return o
259
 				return o