瀏覽代碼

Make binutils thread safe. (#231)

* Make binutils thread safe.

binutils is now used from multiple goroutines when the
web interface is enabled (since http server spins up a new
goroutine for each request). Added appropriate synchronization
to various objects in binutils to provide the appropriate
thread-safety.

In addition, added a bunch of tests:

. Test parsing of nm output.
. Tests for Binutils methods.
. Test llvm-symbolizer interface (using an llvm-symbolizer emulator).

* Fix go vet errors about unkeyed literals.

* copy stderr to figure out test failures

* Make shell script portable.

* Update comments to match new var name.

* Give a name to the embedded Mutex field in addr2Liner.
Sanjay Ghemawat 7 年之前
父節點
當前提交
fedd7592f6

+ 16
- 3
internal/binutils/addr2liner.go 查看文件

@@ -21,6 +21,7 @@ import (
21 21
 	"os/exec"
22 22
 	"strconv"
23 23
 	"strings"
24
+	"sync"
24 25
 
25 26
 	"github.com/google/pprof/internal/plugin"
26 27
 )
@@ -36,6 +37,7 @@ const (
36 37
 // addr2Liner is a connection to an addr2line command for obtaining
37 38
 // address and line number information from a binary.
38 39
 type addr2Liner struct {
40
+	mu   sync.Mutex
39 41
 	rw   lineReaderWriter
40 42
 	base uint64
41 43
 
@@ -170,9 +172,10 @@ func (d *addr2Liner) readFrame() (plugin.Frame, bool) {
170 172
 		Line: linenumber}, false
171 173
 }
172 174
 
173
-// addrInfo returns the stack frame information for a specific program
174
-// address. It returns nil if the address could not be identified.
175
-func (d *addr2Liner) addrInfo(addr uint64) ([]plugin.Frame, error) {
175
+func (d *addr2Liner) rawAddrInfo(addr uint64) ([]plugin.Frame, error) {
176
+	d.mu.Lock()
177
+	defer d.mu.Unlock()
178
+
176 179
 	if err := d.rw.write(fmt.Sprintf("%x", addr-d.base)); err != nil {
177 180
 		return nil, err
178 181
 	}
@@ -201,6 +204,16 @@ func (d *addr2Liner) addrInfo(addr uint64) ([]plugin.Frame, error) {
201 204
 			stack = append(stack, frame)
202 205
 		}
203 206
 	}
207
+	return stack, err
208
+}
209
+
210
+// addrInfo returns the stack frame information for a specific program
211
+// address. It returns nil if the address could not be identified.
212
+func (d *addr2Liner) addrInfo(addr uint64) ([]plugin.Frame, error) {
213
+	stack, err := d.rawAddrInfo(addr)
214
+	if err != nil {
215
+		return nil, err
216
+	}
204 217
 
205 218
 	// Get better name from nm if possible.
206 219
 	if len(stack) > 0 && d.nm != nil {

+ 5
- 0
internal/binutils/addr2liner_llvm.go 查看文件

@@ -21,6 +21,7 @@ import (
21 21
 	"os/exec"
22 22
 	"strconv"
23 23
 	"strings"
24
+	"sync"
24 25
 
25 26
 	"github.com/google/pprof/internal/plugin"
26 27
 )
@@ -32,6 +33,7 @@ const (
32 33
 // llvmSymbolizer is a connection to an llvm-symbolizer command for
33 34
 // obtaining address and line number information from a binary.
34 35
 type llvmSymbolizer struct {
36
+	sync.Mutex
35 37
 	filename string
36 38
 	rw       lineReaderWriter
37 39
 	base     uint64
@@ -150,6 +152,9 @@ func (d *llvmSymbolizer) readFrame() (plugin.Frame, bool) {
150 152
 // addrInfo returns the stack frame information for a specific program
151 153
 // address. It returns nil if the address could not be identified.
152 154
 func (d *llvmSymbolizer) addrInfo(addr uint64) ([]plugin.Frame, error) {
155
+	d.Lock()
156
+	defer d.Unlock()
157
+
153 158
 	if err := d.rw.write(fmt.Sprintf("%s 0x%x", d.filename, addr-d.base)); err != nil {
154 159
 		return nil, err
155 160
 	}

+ 8
- 7
internal/binutils/addr2liner_nm.go 查看文件

@@ -48,22 +48,23 @@ func newAddr2LinerNM(cmd, file string, base uint64) (*addr2LinerNM, error) {
48 48
 	if cmd == "" {
49 49
 		cmd = defaultNM
50 50
 	}
51
-
52
-	a := &addr2LinerNM{
53
-		m: []symbolInfo{},
54
-	}
55
-
56 51
 	var b bytes.Buffer
57 52
 	c := exec.Command(cmd, "-n", file)
58 53
 	c.Stdout = &b
59
-
60 54
 	if err := c.Run(); err != nil {
61 55
 		return nil, err
62 56
 	}
57
+	return parseAddr2LinerNM(base, &b)
58
+}
59
+
60
+func parseAddr2LinerNM(base uint64, nm io.Reader) (*addr2LinerNM, error) {
61
+	a := &addr2LinerNM{
62
+		m: []symbolInfo{},
63
+	}
63 64
 
64 65
 	// Parse nm output and populate symbol map.
65 66
 	// Skip lines we fail to parse.
66
-	buf := bufio.NewReader(&b)
67
+	buf := bufio.NewReader(nm)
67 68
 	for {
68 69
 		line, err := buf.ReadString('\n')
69 70
 		if line == "" && err != nil {

+ 59
- 21
internal/binutils/binutils.go 查看文件

@@ -24,14 +24,21 @@ import (
24 24
 	"path/filepath"
25 25
 	"regexp"
26 26
 	"strings"
27
+	"sync"
27 28
 
28 29
 	"github.com/google/pprof/internal/elfexec"
29 30
 	"github.com/google/pprof/internal/plugin"
30 31
 )
31 32
 
32 33
 // A Binutils implements plugin.ObjTool by invoking the GNU binutils.
33
-// SetConfig must be called before any of the other methods.
34 34
 type Binutils struct {
35
+	sync.Mutex
36
+	rep *binrep
37
+}
38
+
39
+// binrep is an immutable representation for Binutils.  It is atomically
40
+// replaced on every mutation to provide thread-safe access.
41
+type binrep struct {
35 42
 	// Commands to invoke.
36 43
 	llvmSymbolizer      string
37 44
 	llvmSymbolizerFound bool
@@ -47,11 +54,38 @@ type Binutils struct {
47 54
 	fast bool
48 55
 }
49 56
 
57
+// get returns the current representation for bu, initializing it if necessary.
58
+func (bu *Binutils) get() *binrep {
59
+	bu.Mutex.Lock()
60
+	r := bu.rep
61
+	if r == nil {
62
+		r = &binrep{}
63
+		initTools(r, "")
64
+		bu.rep = r
65
+	}
66
+	bu.Mutex.Unlock()
67
+	return r
68
+}
69
+
70
+// update modifies the rep for bu via the supplied function.
71
+func (bu *Binutils) update(fn func(r *binrep)) {
72
+	r := &binrep{}
73
+	bu.Mutex.Lock()
74
+	defer bu.Mutex.Unlock()
75
+	if bu.rep == nil {
76
+		initTools(r, "")
77
+	} else {
78
+		*r = *bu.rep
79
+	}
80
+	fn(r)
81
+	bu.rep = r
82
+}
83
+
50 84
 // SetFastSymbolization sets a toggle that makes binutils use fast
51 85
 // symbolization (using nm), which is much faster than addr2line but
52 86
 // provides only symbol name information (no file/line).
53
-func (b *Binutils) SetFastSymbolization(fast bool) {
54
-	b.fast = fast
87
+func (bu *Binutils) SetFastSymbolization(fast bool) {
88
+	bu.update(func(r *binrep) { r.fast = fast })
55 89
 }
56 90
 
57 91
 // SetTools processes the contents of the tools option. It
@@ -59,7 +93,11 @@ func (b *Binutils) SetFastSymbolization(fast bool) {
59 93
 // of the form t:path, where cmd will be used to look only for the
60 94
 // tool named t. If t is not specified, the path is searched for all
61 95
 // tools.
62
-func (b *Binutils) SetTools(config string) {
96
+func (bu *Binutils) SetTools(config string) {
97
+	bu.update(func(r *binrep) { initTools(r, config) })
98
+}
99
+
100
+func initTools(b *binrep, config string) {
63 101
 	// paths collect paths per tool; Key "" contains the default.
64 102
 	paths := make(map[string][]string)
65 103
 	for _, t := range strings.Split(config, ",") {
@@ -91,11 +129,8 @@ func findExe(cmd string, paths []string) (string, bool) {
91 129
 
92 130
 // Disasm returns the assembly instructions for the specified address range
93 131
 // of a binary.
94
-func (b *Binutils) Disasm(file string, start, end uint64) ([]plugin.Inst, error) {
95
-	if b.addr2line == "" {
96
-		// Update the command invocations if not initialized.
97
-		b.SetTools("")
98
-	}
132
+func (bu *Binutils) Disasm(file string, start, end uint64) ([]plugin.Inst, error) {
133
+	b := bu.get()
99 134
 	cmd := exec.Command(b.objdump, "-d", "-C", "--no-show-raw-insn", "-l",
100 135
 		fmt.Sprintf("--start-address=%#x", start),
101 136
 		fmt.Sprintf("--stop-address=%#x", end),
@@ -109,11 +144,8 @@ func (b *Binutils) Disasm(file string, start, end uint64) ([]plugin.Inst, error)
109 144
 }
110 145
 
111 146
 // Open satisfies the plugin.ObjTool interface.
112
-func (b *Binutils) Open(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
113
-	if b.addr2line == "" {
114
-		// Update the command invocations if not initialized.
115
-		b.SetTools("")
116
-	}
147
+func (bu *Binutils) Open(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
148
+	b := bu.get()
117 149
 
118 150
 	// Make sure file is a supported executable.
119 151
 	// The pprof driver uses Open to sniff the difference
@@ -140,7 +172,7 @@ func (b *Binutils) Open(name string, start, limit, offset uint64) (plugin.ObjFil
140 172
 	return nil, fmt.Errorf("unrecognized binary: %s", name)
141 173
 }
142 174
 
143
-func (b *Binutils) openMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
175
+func (b *binrep) openMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
144 176
 	of, err := macho.Open(name)
145 177
 	if err != nil {
146 178
 		return nil, fmt.Errorf("Parsing %s: %v", name, err)
@@ -153,7 +185,7 @@ func (b *Binutils) openMachO(name string, start, limit, offset uint64) (plugin.O
153 185
 	return &fileAddr2Line{file: file{b: b, name: name}}, nil
154 186
 }
155 187
 
156
-func (b *Binutils) openELF(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
188
+func (b *binrep) openELF(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
157 189
 	ef, err := elf.Open(name)
158 190
 	if err != nil {
159 191
 		return nil, fmt.Errorf("Parsing %s: %v", name, err)
@@ -202,7 +234,7 @@ func (b *Binutils) openELF(name string, start, limit, offset uint64) (plugin.Obj
202 234
 
203 235
 // file implements the binutils.ObjFile interface.
204 236
 type file struct {
205
-	b       *Binutils
237
+	b       *binrep
206 238
 	name    string
207 239
 	base    uint64
208 240
 	buildID string
@@ -263,22 +295,27 @@ func (f *fileNM) SourceLine(addr uint64) ([]plugin.Frame, error) {
263 295
 // information). It can be slow for large binaries with debug
264 296
 // information.
265 297
 type fileAddr2Line struct {
298
+	once sync.Once
266 299
 	file
267 300
 	addr2liner     *addr2Liner
268 301
 	llvmSymbolizer *llvmSymbolizer
269 302
 }
270 303
 
271 304
 func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
305
+	f.once.Do(f.init)
272 306
 	if f.llvmSymbolizer != nil {
273 307
 		return f.llvmSymbolizer.addrInfo(addr)
274 308
 	}
275 309
 	if f.addr2liner != nil {
276 310
 		return f.addr2liner.addrInfo(addr)
277 311
 	}
312
+	return nil, fmt.Errorf("could not find local addr2liner")
313
+}
278 314
 
315
+func (f *fileAddr2Line) init() {
279 316
 	if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base); err == nil {
280 317
 		f.llvmSymbolizer = llvmSymbolizer
281
-		return f.llvmSymbolizer.addrInfo(addr)
318
+		return
282 319
 	}
283 320
 
284 321
 	if addr2liner, err := newAddr2Liner(f.b.addr2line, f.name, f.base); err == nil {
@@ -290,13 +327,14 @@ func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
290 327
 		if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil {
291 328
 			f.addr2liner.nm = nm
292 329
 		}
293
-		return f.addr2liner.addrInfo(addr)
294 330
 	}
295
-
296
-	return nil, fmt.Errorf("could not find local addr2liner")
297 331
 }
298 332
 
299 333
 func (f *fileAddr2Line) Close() error {
334
+	if f.llvmSymbolizer != nil {
335
+		f.llvmSymbolizer.rw.close()
336
+		f.llvmSymbolizer = nil
337
+	}
300 338
 	if f.addr2liner != nil {
301 339
 		f.addr2liner.rw.close()
302 340
 		f.addr2liner = nil

+ 142
- 19
internal/binutils/binutils_test.go 查看文件

@@ -15,7 +15,13 @@
15 15
 package binutils
16 16
 
17 17
 import (
18
+	"bytes"
18 19
 	"fmt"
20
+	"math"
21
+	"path/filepath"
22
+	"reflect"
23
+	"regexp"
24
+	"runtime"
19 25
 	"testing"
20 26
 
21 27
 	"github.com/google/pprof/internal/plugin"
@@ -37,7 +43,7 @@ func functionName(level int) (name string) {
37 43
 func TestAddr2Liner(t *testing.T) {
38 44
 	const offset = 0x500
39 45
 
40
-	a := addr2Liner{&mockAddr2liner{}, offset, nil}
46
+	a := addr2Liner{rw: &mockAddr2liner{}, base: offset}
41 47
 	for i := 1; i < 8; i++ {
42 48
 		addr := i*0x1000 + offset
43 49
 		s, err := a.addrInfo(uint64(addr))
@@ -112,24 +118,23 @@ func (a *mockAddr2liner) close() {
112 118
 }
113 119
 
114 120
 func TestAddr2LinerLookup(t *testing.T) {
115
-	oddSizedMap := addr2LinerNM{
116
-		m: []symbolInfo{
117
-			{0x1000, "0x1000"},
118
-			{0x2000, "0x2000"},
119
-			{0x3000, "0x3000"},
120
-		},
121
-	}
122
-	evenSizedMap := addr2LinerNM{
123
-		m: []symbolInfo{
124
-			{0x1000, "0x1000"},
125
-			{0x2000, "0x2000"},
126
-			{0x3000, "0x3000"},
127
-			{0x4000, "0x4000"},
128
-		},
129
-	}
130
-	for _, a := range []*addr2LinerNM{
131
-		&oddSizedMap, &evenSizedMap,
132
-	} {
121
+	const oddSizedData = `
122
+00001000 T 0x1000
123
+00002000 T 0x2000
124
+00003000 T 0x3000
125
+`
126
+	const evenSizedData = `
127
+0000000000001000 T 0x1000
128
+0000000000002000 T 0x2000
129
+0000000000003000 T 0x3000
130
+0000000000004000 T 0x4000
131
+`
132
+	for _, d := range []string{oddSizedData, evenSizedData} {
133
+		a, err := parseAddr2LinerNM(0, bytes.NewBufferString(d))
134
+		if err != nil {
135
+			t.Errorf("nm parse error: %v", err)
136
+			continue
137
+		}
133 138
 		for address, want := range map[uint64]string{
134 139
 			0x1000: "0x1000",
135 140
 			0x1001: "0x1000",
@@ -141,6 +146,11 @@ func TestAddr2LinerLookup(t *testing.T) {
141 146
 				t.Errorf("%x: got %v, want %s", address, got, want)
142 147
 			}
143 148
 		}
149
+		for _, unknown := range []uint64{0x0fff, 0x4001} {
150
+			if got, _ := a.addrInfo(unknown); got != nil {
151
+				t.Errorf("%x: got %v, want nil", unknown, got)
152
+			}
153
+		}
144 154
 	}
145 155
 }
146 156
 
@@ -150,3 +160,116 @@ func checkAddress(got []plugin.Frame, address uint64, want string) bool {
150 160
 	}
151 161
 	return got[0].Func == want
152 162
 }
163
+
164
+func TestSetTools(t *testing.T) {
165
+	// Test that multiple calls work.
166
+	bu := &Binutils{}
167
+	bu.SetTools("")
168
+	bu.SetTools("")
169
+}
170
+
171
+func TestSetFastSymbolization(t *testing.T) {
172
+	// Test that multiple calls work.
173
+	bu := &Binutils{}
174
+	bu.SetFastSymbolization(true)
175
+	bu.SetFastSymbolization(false)
176
+}
177
+
178
+func skipUnlessLinuxAmd64(t *testing.T) {
179
+	if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
180
+		t.Skip("Disasm only tested on x86-64 linux")
181
+	}
182
+}
183
+
184
+func TestDisasm(t *testing.T) {
185
+	skipUnlessLinuxAmd64(t)
186
+	bu := &Binutils{}
187
+	insts, err := bu.Disasm(filepath.Join("testdata", "hello"), 0, math.MaxUint64)
188
+	if err != nil {
189
+		t.Fatalf("Disasm: unexpected error %v", err)
190
+	}
191
+	mainCount := 0
192
+	for _, x := range insts {
193
+		if x.Function == "main" {
194
+			mainCount++
195
+		}
196
+	}
197
+	if mainCount == 0 {
198
+		t.Error("Disasm: found no main instructions")
199
+	}
200
+}
201
+
202
+func TestObjFile(t *testing.T) {
203
+	skipUnlessLinuxAmd64(t)
204
+	bu := &Binutils{}
205
+	f, err := bu.Open(filepath.Join("testdata", "hello"), 0, math.MaxUint64, 0)
206
+	if err != nil {
207
+		t.Fatalf("Open: unexpected error %v", err)
208
+	}
209
+	defer f.Close()
210
+	syms, err := f.Symbols(regexp.MustCompile("main"), 0)
211
+	if err != nil {
212
+		t.Fatalf("Symbols: unexpected error %v", err)
213
+	}
214
+
215
+	find := func(name string) *plugin.Sym {
216
+		for _, s := range syms {
217
+			for _, n := range s.Name {
218
+				if n == name {
219
+					return s
220
+				}
221
+			}
222
+		}
223
+		return nil
224
+	}
225
+	m := find("main")
226
+	if m == nil {
227
+		t.Fatalf("Symbols: did not find main")
228
+	}
229
+	frames, err := f.SourceLine(m.Start)
230
+	if err != nil {
231
+		t.Fatalf("SourceLine: unexpected error %v", err)
232
+	}
233
+	expect := []plugin.Frame{
234
+		{Func: "main", File: "/tmp/hello.c", Line: 3},
235
+	}
236
+	if !reflect.DeepEqual(frames, expect) {
237
+		t.Fatalf("SourceLine for main: expect %v; got %v\n", expect, frames)
238
+	}
239
+}
240
+
241
+func TestLLVMSymbolizer(t *testing.T) {
242
+	if runtime.GOOS != "linux" {
243
+		t.Skip("testtdata/llvm-symbolizer has only been tested on linux")
244
+	}
245
+
246
+	cmd := filepath.Join("testdata", "fake-llvm-symbolizer")
247
+	symbolizer, err := newLLVMSymbolizer(cmd, "foo", 0)
248
+	if err != nil {
249
+		t.Fatalf("newLLVMSymbolizer: unexpected error %v", err)
250
+	}
251
+	defer symbolizer.rw.close()
252
+
253
+	for _, c := range []struct {
254
+		addr   uint64
255
+		frames []plugin.Frame
256
+	}{
257
+		{0x10, []plugin.Frame{
258
+			{Func: "Inlined_0x10", File: "foo.h", Line: 0},
259
+			{Func: "Func_0x10", File: "foo.c", Line: 2},
260
+		}},
261
+		{0x20, []plugin.Frame{
262
+			{Func: "Inlined_0x20", File: "foo.h", Line: 0},
263
+			{Func: "Func_0x20", File: "foo.c", Line: 2},
264
+		}},
265
+	} {
266
+		frames, err := symbolizer.addrInfo(c.addr)
267
+		if err != nil {
268
+			t.Errorf("LLVM: unexpected error %v", err)
269
+			continue
270
+		}
271
+		if !reflect.DeepEqual(frames, c.frames) {
272
+			t.Errorf("LLVM: expect %v; got %v\n", c.frames, frames)
273
+		}
274
+	}
275
+}

+ 34
- 0
internal/binutils/testdata/fake-llvm-symbolizer 查看文件

@@ -0,0 +1,34 @@
1
+#!/bin/sh
2
+#
3
+# Copyright 2014 Google Inc. All Rights Reserved.
4
+#
5
+# Licensed under the Apache License, Version 2.0 (the "License");
6
+# you may not use this file except in compliance with the License.
7
+# You may obtain a copy of the License at
8
+#
9
+#     http://www.apache.org/licenses/LICENSE-2.0
10
+#
11
+# Unless required by applicable law or agreed to in writing, software
12
+# distributed under the License is distributed on an "AS IS" BASIS,
13
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+# See the License for the specific language governing permissions and
15
+# limitations under the License.
16
+#
17
+# Fake llvm-symbolizer to use in tests
18
+
19
+set -f
20
+IFS=" "
21
+
22
+while read line; do
23
+  # line has form:
24
+  #    filename 0xaddr
25
+  # Emit dummy output that matches llvm-symbolizer output format.
26
+  set -- $line
27
+  fname=$1
28
+  addr=$2
29
+  echo "Inlined_$addr"
30
+  echo "$fname.h"
31
+  echo "Func_$addr"
32
+  echo "$fname.c:2"
33
+  echo
34
+done

二進制
internal/binutils/testdata/hello 查看文件