Sfoglia il codice sorgente

Add support for LLVM objdump (#534)

Add support for LLVM objdump.
Kalyana Chadalavada 4 anni fa
parent
commit
03e1cf38a0
No account linked to committer's email address

+ 83
- 5
internal/binutils/binutils.go Vedi File

19
 	"debug/elf"
19
 	"debug/elf"
20
 	"debug/macho"
20
 	"debug/macho"
21
 	"encoding/binary"
21
 	"encoding/binary"
22
+	"errors"
22
 	"fmt"
23
 	"fmt"
23
 	"io"
24
 	"io"
24
 	"os"
25
 	"os"
26
 	"path/filepath"
27
 	"path/filepath"
27
 	"regexp"
28
 	"regexp"
28
 	"runtime"
29
 	"runtime"
30
+	"strconv"
29
 	"strings"
31
 	"strings"
30
 	"sync"
32
 	"sync"
31
 
33
 
39
 	rep *binrep
41
 	rep *binrep
40
 }
42
 }
41
 
43
 
44
+var objdumpLLVMVerRE = regexp.MustCompile(`LLVM version (?:(\d*)\.(\d*)\.(\d*)|.*(trunk).*)`)
45
+
42
 // binrep is an immutable representation for Binutils.  It is atomically
46
 // binrep is an immutable representation for Binutils.  It is atomically
43
 // replaced on every mutation to provide thread-safe access.
47
 // replaced on every mutation to provide thread-safe access.
44
 type binrep struct {
48
 type binrep struct {
51
 	nmFound             bool
55
 	nmFound             bool
52
 	objdump             string
56
 	objdump             string
53
 	objdumpFound        bool
57
 	objdumpFound        bool
58
+	isLLVMObjdump       bool
54
 
59
 
55
 	// if fast, perform symbolization using nm (symbol names only),
60
 	// if fast, perform symbolization using nm (symbol names only),
56
 	// instead of file-line detail from the slower addr2line.
61
 	// instead of file-line detail from the slower addr2line.
140
 		b.addr2line, b.addr2lineFound = findExe("gaddr2line", append(paths["addr2line"], defaultPath...))
145
 		b.addr2line, b.addr2lineFound = findExe("gaddr2line", append(paths["addr2line"], defaultPath...))
141
 	}
146
 	}
142
 	b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...))
147
 	b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...))
143
-	b.objdump, b.objdumpFound = findExe("objdump", append(paths["objdump"], defaultPath...))
148
+	b.objdump, b.objdumpFound, b.isLLVMObjdump = findObjdump(append(paths["objdump"], defaultPath...))
149
+}
150
+
151
+// findObjdump finds and returns path to preferred objdump binary.
152
+// Order of preference is: llvm-objdump, objdump.
153
+// On MacOS only, also looks for gobjdump with least preference.
154
+// Accepts a list of paths and returns:
155
+// a string with path to the preferred objdump binary if found,
156
+// or an empty string if not found;
157
+// a boolean if any acceptable objdump was found;
158
+// a boolen indicating if it is an LLVM objdump.
159
+func findObjdump(paths []string) (string, bool, bool) {
160
+	objdumpNames := []string{"llvm-objdump", "objdump"}
161
+	if runtime.GOOS == "darwin" {
162
+		objdumpNames = append(objdumpNames, "gobjdump")
163
+	}
164
+
165
+	for _, objdumpName := range objdumpNames {
166
+		if objdump, objdumpFound := findExe(objdumpName, paths); objdumpFound {
167
+			cmdOut, err := exec.Command(objdump, "--version").Output()
168
+			if err != nil {
169
+				continue
170
+			}
171
+			if isLLVMObjdump(string(cmdOut)) {
172
+				return objdump, true, true
173
+			}
174
+			if isBuObjdump(string(cmdOut)) {
175
+				return objdump, true, false
176
+			}
177
+		}
178
+	}
179
+	return "", false, false
180
+}
181
+
182
+// isLLVMObjdump accepts a string with path to an objdump binary,
183
+// and returns a boolean indicating if the given binary is an LLVM
184
+// objdump binary of an acceptable version.
185
+func isLLVMObjdump(output string) bool {
186
+	fields := objdumpLLVMVerRE.FindStringSubmatch(output)
187
+	if len(fields) != 5 {
188
+		return false
189
+	}
190
+	if fields[4] == "trunk" {
191
+		return true
192
+	}
193
+	verMajor, err := strconv.Atoi(fields[1])
194
+	if err != nil {
195
+		return false
196
+	}
197
+	verPatch, err := strconv.Atoi(fields[3])
198
+	if err != nil {
199
+		return false
200
+	}
201
+	if runtime.GOOS == "linux" && verMajor >= 8 {
202
+		// Ensure LLVM objdump is at least version 8.0 on Linux.
203
+		// Some flags, like --demangle, and double dashes for options are
204
+		// not supported by previous versions.
205
+		return true
206
+	}
207
+	if runtime.GOOS == "darwin" {
208
+		// Ensure LLVM objdump is at least version 10.0.1 on MacOS.
209
+		return verMajor > 10 || (verMajor == 10 && verPatch >= 1)
210
+	}
211
+	return false
212
+}
213
+
214
+// isBuObjdump accepts a string with path to an objdump binary,
215
+// and returns a boolean indicating if the given binary is a GNU
216
+// binutils objdump binary. No version check is performed.
217
+func isBuObjdump(output string) bool {
218
+	return strings.Contains(output, "GNU objdump") && strings.Contains(output, "Binutils")
144
 }
219
 }
145
 
220
 
146
 // findExe looks for an executable command on a set of paths.
221
 // findExe looks for an executable command on a set of paths.
159
 // of a binary.
234
 // of a binary.
160
 func (bu *Binutils) Disasm(file string, start, end uint64, intelSyntax bool) ([]plugin.Inst, error) {
235
 func (bu *Binutils) Disasm(file string, start, end uint64, intelSyntax bool) ([]plugin.Inst, error) {
161
 	b := bu.get()
236
 	b := bu.get()
162
-	args := []string{"-d", "-C", "--no-show-raw-insn", "-l",
163
-		fmt.Sprintf("--start-address=%#x", start),
237
+	if !b.objdumpFound {
238
+		return nil, errors.New("cannot disasm: no objdump tool available")
239
+	}
240
+	args := []string{"--disassemble-all", "--demangle", "--no-show-raw-insn",
241
+		"--line-numbers", fmt.Sprintf("--start-address=%#x", start),
164
 		fmt.Sprintf("--stop-address=%#x", end)}
242
 		fmt.Sprintf("--stop-address=%#x", end)}
165
 
243
 
166
 	if intelSyntax {
244
 	if intelSyntax {
167
-		if runtime.GOOS == "darwin" {
168
-			args = append(args, "-x86-asm-syntax=intel")
245
+		if b.isLLVMObjdump {
246
+			args = append(args, "--x86-asm-syntax=intel")
169
 		} else {
247
 		} else {
170
 			args = append(args, "-M", "intel")
248
 			args = append(args, "-M", "intel")
171
 		}
249
 		}

+ 105
- 3
internal/binutils/binutils_test.go Vedi File

189
 }
189
 }
190
 
190
 
191
 func testDisasm(t *testing.T, intelSyntax bool) {
191
 func testDisasm(t *testing.T, intelSyntax bool) {
192
+	_, llvmObjdump, buObjdump := findObjdump([]string{""})
193
+	if !(llvmObjdump || buObjdump) {
194
+		t.Skip("cannot disasm: no objdump tool available")
195
+	}
196
+
192
 	bu := &Binutils{}
197
 	bu := &Binutils{}
193
-	insts, err := bu.Disasm(filepath.Join("testdata", "exe_linux_64"), 0, math.MaxUint64, intelSyntax)
198
+	testexe := "exe_linux_64"
199
+	if runtime.GOOS == "darwin" {
200
+		testexe = "exe_mac_64"
201
+	}
202
+
203
+	insts, err := bu.Disasm(filepath.Join("testdata", testexe), 0, math.MaxUint64, intelSyntax)
194
 	if err != nil {
204
 	if err != nil {
195
 		t.Fatalf("Disasm: unexpected error %v", err)
205
 		t.Fatalf("Disasm: unexpected error %v", err)
196
 	}
206
 	}
197
 	mainCount := 0
207
 	mainCount := 0
198
 	for _, x := range insts {
208
 	for _, x := range insts {
199
-		if x.Function == "main" {
209
+		// Mac symbols have a leading underscore.
210
+		if x.Function == "main" || x.Function == "_main" {
200
 			mainCount++
211
 			mainCount++
201
 		}
212
 		}
202
 	}
213
 	}
206
 }
217
 }
207
 
218
 
208
 func TestDisasm(t *testing.T) {
219
 func TestDisasm(t *testing.T) {
209
-	skipUnlessLinuxAmd64(t)
220
+	if runtime.GOOS != "linux" && runtime.GOOS != "darwin" {
221
+		t.Skip("This test only works on Linux or Mac")
222
+	}
210
 	testDisasm(t, true)
223
 	testDisasm(t, true)
211
 	testDisasm(t, false)
224
 	testDisasm(t, false)
212
 }
225
 }
401
 		t.Errorf("Open: got %v, want error containing 'Mach-O'", err)
414
 		t.Errorf("Open: got %v, want error containing 'Mach-O'", err)
402
 	}
415
 	}
403
 }
416
 }
417
+
418
+func TestObjdumpVersionChecks(t *testing.T) {
419
+	// Test that the objdump version strings are parsed properly.
420
+	type testcase struct {
421
+		desc string
422
+		os   string
423
+		ver  string
424
+		want bool
425
+	}
426
+
427
+	for _, tc := range []testcase{
428
+		{
429
+			desc: "Valid Apple LLVM version string with usable version",
430
+			os:   "darwin",
431
+			ver:  "Apple LLVM version 11.0.3 (clang-1103.0.32.62)\nOptimized build.",
432
+			want: true,
433
+		},
434
+		{
435
+			desc: "Valid Apple LLVM version string with unusable version",
436
+			os:   "darwin",
437
+			ver:  "Apple LLVM version 10.0.0 (clang-1000.11.45.5)\nOptimized build.",
438
+			want: false,
439
+		},
440
+		{
441
+			desc: "Invalid Apple LLVM version string with usable version",
442
+			os:   "darwin",
443
+			ver:  "Apple LLVM versions 11.0.3 (clang-1103.0.32.62)\nOptimized build.",
444
+			want: false,
445
+		},
446
+		{
447
+			desc: "Valid LLVM version string with usable version",
448
+			os:   "linux",
449
+			ver:  "LLVM (http://llvm.org/):\nLLVM version 9.0.1\n\nOptimized build.",
450
+			want: true,
451
+		},
452
+		{
453
+			desc: "Valid LLVM version string with unusable version",
454
+			os:   "linux",
455
+			ver:  "LLVM (http://llvm.org/):\nLLVM version 6.0.1\n\nOptimized build.",
456
+			want: false,
457
+		},
458
+		{
459
+			desc: "Invalid LLVM version string with usable version",
460
+			os:   "linux",
461
+			ver:  "LLVM (http://llvm.org/):\nLLVM versions 9.0.1\n\nOptimized build.",
462
+			want: false,
463
+		},
464
+		{
465
+			desc: "Valid LLVM objdump version string with trunk",
466
+			os:   runtime.GOOS,
467
+			ver:  "LLVM (http://llvm.org/):\nLLVM version custom-trunk 124ffeb592a00bfe\nOptimized build.",
468
+			want: true,
469
+		},
470
+		{
471
+			desc: "Invalid LLVM objdump version string with trunk",
472
+			os:   runtime.GOOS,
473
+			ver:  "LLVM (http://llvm.org/):\nLLVM version custom-trank 124ffeb592a00bfe\nOptimized build.",
474
+			want: false,
475
+		},
476
+		{
477
+			desc: "Invalid LLVM objdump version string with trunk",
478
+			os:   runtime.GOOS,
479
+			ver:  "LLVM (http://llvm.org/):\nllvm version custom-trunk 124ffeb592a00bfe\nOptimized build.",
480
+			want: false,
481
+		},
482
+	} {
483
+		if runtime.GOOS == tc.os {
484
+			if got := isLLVMObjdump(tc.ver); got != tc.want {
485
+				t.Errorf("%v: got %v, want %v", tc.desc, got, tc.want)
486
+			}
487
+		}
488
+	}
489
+	for _, tc := range []testcase{
490
+		{
491
+			desc: "Valid GNU objdump version string",
492
+			ver:  "GNU objdump (GNU Binutils) 2.34\nCopyright (C) 2020 Free Software Foundation, Inc.",
493
+			want: true,
494
+		},
495
+		{
496
+			desc: "Invalid GNU objdump version string",
497
+			ver:  "GNU objdump (GNU Banutils) 2.34\nCopyright (C) 2020 Free Software Foundation, Inc.",
498
+			want: false,
499
+		},
500
+	} {
501
+		if got := isBuObjdump(tc.ver); got != tc.want {
502
+			t.Errorf("%v: got %v, want %v", tc.desc, got, tc.want)
503
+		}
504
+	}
505
+}

+ 10
- 4
internal/binutils/disasm.go Vedi File

25
 )
25
 )
26
 
26
 
27
 var (
27
 var (
28
-	nmOutputRE            = regexp.MustCompile(`^\s*([[:xdigit:]]+)\s+(.)\s+(.*)`)
29
-	objdumpAsmOutputRE    = regexp.MustCompile(`^\s*([[:xdigit:]]+):\s+(.*)`)
30
-	objdumpOutputFileLine = regexp.MustCompile(`^(.*):([0-9]+)`)
31
-	objdumpOutputFunction = regexp.MustCompile(`^(\S.*)\(\):`)
28
+	nmOutputRE                = regexp.MustCompile(`^\s*([[:xdigit:]]+)\s+(.)\s+(.*)`)
29
+	objdumpAsmOutputRE        = regexp.MustCompile(`^\s*([[:xdigit:]]+):\s+(.*)`)
30
+	objdumpOutputFileLine     = regexp.MustCompile(`^;?\s?(.*):([0-9]+)`)
31
+	objdumpOutputFunction     = regexp.MustCompile(`^;?\s?(\S.*)\(\):`)
32
+	objdumpOutputFunctionLLVM = regexp.MustCompile(`^([[:xdigit:]]+)?\s?(.*):`)
32
 )
33
 )
33
 
34
 
34
 func findSymbols(syms []byte, file string, r *regexp.Regexp, address uint64) ([]*plugin.Sym, error) {
35
 func findSymbols(syms []byte, file string, r *regexp.Regexp, address uint64) ([]*plugin.Sym, error) {
143
 		if fields := objdumpOutputFunction.FindStringSubmatch(input); len(fields) == 2 {
144
 		if fields := objdumpOutputFunction.FindStringSubmatch(input); len(fields) == 2 {
144
 			function = fields[1]
145
 			function = fields[1]
145
 			continue
146
 			continue
147
+		} else {
148
+			if fields := objdumpOutputFunctionLLVM.FindStringSubmatch(input); len(fields) == 3 {
149
+				function = fields[2]
150
+				continue
151
+			}
146
 		}
152
 		}
147
 		// Reset on unrecognized lines.
153
 		// Reset on unrecognized lines.
148
 		function, file, line = "", "", 0
154
 		function, file, line = "", "", 0

+ 16
- 8
internal/binutils/disasm_test.go Vedi File

110
 	testcases := []testcase{
110
 	testcases := []testcase{
111
 		{
111
 		{
112
 			plugin.Sym{Name: []string{"symbol1"}, Start: 0x1000, End: 0x1FFF},
112
 			plugin.Sym{Name: []string{"symbol1"}, Start: 0x1000, End: 0x1FFF},
113
-			`  1000: instruction one
114
-  1001: instruction two
115
-  1002: instruction three
116
-  1003: instruction four
117
-`,
113
+			"  1000: instruction one\n  1001: instruction two\n  1002: instruction three\n  1003: instruction four",
118
 			[]plugin.Inst{
114
 			[]plugin.Inst{
119
 				{Addr: 0x1000, Text: "instruction one"},
115
 				{Addr: 0x1000, Text: "instruction one"},
120
 				{Addr: 0x1001, Text: "instruction two"},
116
 				{Addr: 0x1001, Text: "instruction two"},
124
 		},
120
 		},
125
 		{
121
 		{
126
 			plugin.Sym{Name: []string{"symbol2"}, Start: 0x2000, End: 0x2FFF},
122
 			plugin.Sym{Name: []string{"symbol2"}, Start: 0x2000, End: 0x2FFF},
127
-			`  2000: instruction one
128
-  2001: instruction two
129
-`,
123
+			"  2000: instruction one\n  2001: instruction two",
130
 			[]plugin.Inst{
124
 			[]plugin.Inst{
131
 				{Addr: 0x2000, Text: "instruction one"},
125
 				{Addr: 0x2000, Text: "instruction one"},
132
 				{Addr: 0x2001, Text: "instruction two"},
126
 				{Addr: 0x2001, Text: "instruction two"},
133
 			},
127
 			},
134
 		},
128
 		},
129
+		{
130
+			plugin.Sym{Name: []string{"_main"}, Start: 0x30000, End: 0x3FFF},
131
+			"_main:\n; /tmp/hello.c:3\n30001:	push   %rbp",
132
+			[]plugin.Inst{
133
+				{Addr: 0x30001, Text: "push   %rbp", Function: "_main", File: "/tmp/hello.c", Line: 3},
134
+			},
135
+		},
136
+		{
137
+			plugin.Sym{Name: []string{"main"}, Start: 0x4000, End: 0x4FFF},
138
+			"000000000040052d <main>:\nmain():\n/tmp/hello.c:3\n40001:	push   %rbp",
139
+			[]plugin.Inst{
140
+				{Addr: 0x40001, Text: "push   %rbp", Function: "main", File: "/tmp/hello.c", Line: 3},
141
+			},
142
+		},
135
 	}
143
 	}
136
 
144
 
137
 	for _, tc := range testcases {
145
 	for _, tc := range testcases {