Kaynağa Gözat

Add support for LLVM objdump (#534)

Add support for LLVM objdump.
Kalyana Chadalavada 4 yıl önce
ebeveyn
işleme
03e1cf38a0
No account linked to committer's email address

+ 83
- 5
internal/binutils/binutils.go Dosyayı Görüntüle

@@ -19,6 +19,7 @@ import (
19 19
 	"debug/elf"
20 20
 	"debug/macho"
21 21
 	"encoding/binary"
22
+	"errors"
22 23
 	"fmt"
23 24
 	"io"
24 25
 	"os"
@@ -26,6 +27,7 @@ import (
26 27
 	"path/filepath"
27 28
 	"regexp"
28 29
 	"runtime"
30
+	"strconv"
29 31
 	"strings"
30 32
 	"sync"
31 33
 
@@ -39,6 +41,8 @@ type Binutils struct {
39 41
 	rep *binrep
40 42
 }
41 43
 
44
+var objdumpLLVMVerRE = regexp.MustCompile(`LLVM version (?:(\d*)\.(\d*)\.(\d*)|.*(trunk).*)`)
45
+
42 46
 // binrep is an immutable representation for Binutils.  It is atomically
43 47
 // replaced on every mutation to provide thread-safe access.
44 48
 type binrep struct {
@@ -51,6 +55,7 @@ type binrep struct {
51 55
 	nmFound             bool
52 56
 	objdump             string
53 57
 	objdumpFound        bool
58
+	isLLVMObjdump       bool
54 59
 
55 60
 	// if fast, perform symbolization using nm (symbol names only),
56 61
 	// instead of file-line detail from the slower addr2line.
@@ -140,7 +145,77 @@ func initTools(b *binrep, config string) {
140 145
 		b.addr2line, b.addr2lineFound = findExe("gaddr2line", append(paths["addr2line"], defaultPath...))
141 146
 	}
142 147
 	b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...))
143
-	b.objdump, b.objdumpFound = findExe("objdump", append(paths["objdump"], defaultPath...))
148
+	b.objdump, b.objdumpFound, b.isLLVMObjdump = findObjdump(append(paths["objdump"], defaultPath...))
149
+}
150
+
151
+// findObjdump finds and returns path to preferred objdump binary.
152
+// Order of preference is: llvm-objdump, objdump.
153
+// On MacOS only, also looks for gobjdump with least preference.
154
+// Accepts a list of paths and returns:
155
+// a string with path to the preferred objdump binary if found,
156
+// or an empty string if not found;
157
+// a boolean if any acceptable objdump was found;
158
+// a boolen indicating if it is an LLVM objdump.
159
+func findObjdump(paths []string) (string, bool, bool) {
160
+	objdumpNames := []string{"llvm-objdump", "objdump"}
161
+	if runtime.GOOS == "darwin" {
162
+		objdumpNames = append(objdumpNames, "gobjdump")
163
+	}
164
+
165
+	for _, objdumpName := range objdumpNames {
166
+		if objdump, objdumpFound := findExe(objdumpName, paths); objdumpFound {
167
+			cmdOut, err := exec.Command(objdump, "--version").Output()
168
+			if err != nil {
169
+				continue
170
+			}
171
+			if isLLVMObjdump(string(cmdOut)) {
172
+				return objdump, true, true
173
+			}
174
+			if isBuObjdump(string(cmdOut)) {
175
+				return objdump, true, false
176
+			}
177
+		}
178
+	}
179
+	return "", false, false
180
+}
181
+
182
+// isLLVMObjdump accepts a string with path to an objdump binary,
183
+// and returns a boolean indicating if the given binary is an LLVM
184
+// objdump binary of an acceptable version.
185
+func isLLVMObjdump(output string) bool {
186
+	fields := objdumpLLVMVerRE.FindStringSubmatch(output)
187
+	if len(fields) != 5 {
188
+		return false
189
+	}
190
+	if fields[4] == "trunk" {
191
+		return true
192
+	}
193
+	verMajor, err := strconv.Atoi(fields[1])
194
+	if err != nil {
195
+		return false
196
+	}
197
+	verPatch, err := strconv.Atoi(fields[3])
198
+	if err != nil {
199
+		return false
200
+	}
201
+	if runtime.GOOS == "linux" && verMajor >= 8 {
202
+		// Ensure LLVM objdump is at least version 8.0 on Linux.
203
+		// Some flags, like --demangle, and double dashes for options are
204
+		// not supported by previous versions.
205
+		return true
206
+	}
207
+	if runtime.GOOS == "darwin" {
208
+		// Ensure LLVM objdump is at least version 10.0.1 on MacOS.
209
+		return verMajor > 10 || (verMajor == 10 && verPatch >= 1)
210
+	}
211
+	return false
212
+}
213
+
214
+// isBuObjdump accepts a string with path to an objdump binary,
215
+// and returns a boolean indicating if the given binary is a GNU
216
+// binutils objdump binary. No version check is performed.
217
+func isBuObjdump(output string) bool {
218
+	return strings.Contains(output, "GNU objdump") && strings.Contains(output, "Binutils")
144 219
 }
145 220
 
146 221
 // findExe looks for an executable command on a set of paths.
@@ -159,13 +234,16 @@ func findExe(cmd string, paths []string) (string, bool) {
159 234
 // of a binary.
160 235
 func (bu *Binutils) Disasm(file string, start, end uint64, intelSyntax bool) ([]plugin.Inst, error) {
161 236
 	b := bu.get()
162
-	args := []string{"-d", "-C", "--no-show-raw-insn", "-l",
163
-		fmt.Sprintf("--start-address=%#x", start),
237
+	if !b.objdumpFound {
238
+		return nil, errors.New("cannot disasm: no objdump tool available")
239
+	}
240
+	args := []string{"--disassemble-all", "--demangle", "--no-show-raw-insn",
241
+		"--line-numbers", fmt.Sprintf("--start-address=%#x", start),
164 242
 		fmt.Sprintf("--stop-address=%#x", end)}
165 243
 
166 244
 	if intelSyntax {
167
-		if runtime.GOOS == "darwin" {
168
-			args = append(args, "-x86-asm-syntax=intel")
245
+		if b.isLLVMObjdump {
246
+			args = append(args, "--x86-asm-syntax=intel")
169 247
 		} else {
170 248
 			args = append(args, "-M", "intel")
171 249
 		}

+ 105
- 3
internal/binutils/binutils_test.go Dosyayı Görüntüle

@@ -189,14 +189,25 @@ func skipUnlessDarwinAmd64(t *testing.T) {
189 189
 }
190 190
 
191 191
 func testDisasm(t *testing.T, intelSyntax bool) {
192
+	_, llvmObjdump, buObjdump := findObjdump([]string{""})
193
+	if !(llvmObjdump || buObjdump) {
194
+		t.Skip("cannot disasm: no objdump tool available")
195
+	}
196
+
192 197
 	bu := &Binutils{}
193
-	insts, err := bu.Disasm(filepath.Join("testdata", "exe_linux_64"), 0, math.MaxUint64, intelSyntax)
198
+	testexe := "exe_linux_64"
199
+	if runtime.GOOS == "darwin" {
200
+		testexe = "exe_mac_64"
201
+	}
202
+
203
+	insts, err := bu.Disasm(filepath.Join("testdata", testexe), 0, math.MaxUint64, intelSyntax)
194 204
 	if err != nil {
195 205
 		t.Fatalf("Disasm: unexpected error %v", err)
196 206
 	}
197 207
 	mainCount := 0
198 208
 	for _, x := range insts {
199
-		if x.Function == "main" {
209
+		// Mac symbols have a leading underscore.
210
+		if x.Function == "main" || x.Function == "_main" {
200 211
 			mainCount++
201 212
 		}
202 213
 	}
@@ -206,7 +217,9 @@ func testDisasm(t *testing.T, intelSyntax bool) {
206 217
 }
207 218
 
208 219
 func TestDisasm(t *testing.T) {
209
-	skipUnlessLinuxAmd64(t)
220
+	if runtime.GOOS != "linux" && runtime.GOOS != "darwin" {
221
+		t.Skip("This test only works on Linux or Mac")
222
+	}
210 223
 	testDisasm(t, true)
211 224
 	testDisasm(t, false)
212 225
 }
@@ -401,3 +414,92 @@ func TestOpenMalformedMachO(t *testing.T) {
401 414
 		t.Errorf("Open: got %v, want error containing 'Mach-O'", err)
402 415
 	}
403 416
 }
417
+
418
+func TestObjdumpVersionChecks(t *testing.T) {
419
+	// Test that the objdump version strings are parsed properly.
420
+	type testcase struct {
421
+		desc string
422
+		os   string
423
+		ver  string
424
+		want bool
425
+	}
426
+
427
+	for _, tc := range []testcase{
428
+		{
429
+			desc: "Valid Apple LLVM version string with usable version",
430
+			os:   "darwin",
431
+			ver:  "Apple LLVM version 11.0.3 (clang-1103.0.32.62)\nOptimized build.",
432
+			want: true,
433
+		},
434
+		{
435
+			desc: "Valid Apple LLVM version string with unusable version",
436
+			os:   "darwin",
437
+			ver:  "Apple LLVM version 10.0.0 (clang-1000.11.45.5)\nOptimized build.",
438
+			want: false,
439
+		},
440
+		{
441
+			desc: "Invalid Apple LLVM version string with usable version",
442
+			os:   "darwin",
443
+			ver:  "Apple LLVM versions 11.0.3 (clang-1103.0.32.62)\nOptimized build.",
444
+			want: false,
445
+		},
446
+		{
447
+			desc: "Valid LLVM version string with usable version",
448
+			os:   "linux",
449
+			ver:  "LLVM (http://llvm.org/):\nLLVM version 9.0.1\n\nOptimized build.",
450
+			want: true,
451
+		},
452
+		{
453
+			desc: "Valid LLVM version string with unusable version",
454
+			os:   "linux",
455
+			ver:  "LLVM (http://llvm.org/):\nLLVM version 6.0.1\n\nOptimized build.",
456
+			want: false,
457
+		},
458
+		{
459
+			desc: "Invalid LLVM version string with usable version",
460
+			os:   "linux",
461
+			ver:  "LLVM (http://llvm.org/):\nLLVM versions 9.0.1\n\nOptimized build.",
462
+			want: false,
463
+		},
464
+		{
465
+			desc: "Valid LLVM objdump version string with trunk",
466
+			os:   runtime.GOOS,
467
+			ver:  "LLVM (http://llvm.org/):\nLLVM version custom-trunk 124ffeb592a00bfe\nOptimized build.",
468
+			want: true,
469
+		},
470
+		{
471
+			desc: "Invalid LLVM objdump version string with trunk",
472
+			os:   runtime.GOOS,
473
+			ver:  "LLVM (http://llvm.org/):\nLLVM version custom-trank 124ffeb592a00bfe\nOptimized build.",
474
+			want: false,
475
+		},
476
+		{
477
+			desc: "Invalid LLVM objdump version string with trunk",
478
+			os:   runtime.GOOS,
479
+			ver:  "LLVM (http://llvm.org/):\nllvm version custom-trunk 124ffeb592a00bfe\nOptimized build.",
480
+			want: false,
481
+		},
482
+	} {
483
+		if runtime.GOOS == tc.os {
484
+			if got := isLLVMObjdump(tc.ver); got != tc.want {
485
+				t.Errorf("%v: got %v, want %v", tc.desc, got, tc.want)
486
+			}
487
+		}
488
+	}
489
+	for _, tc := range []testcase{
490
+		{
491
+			desc: "Valid GNU objdump version string",
492
+			ver:  "GNU objdump (GNU Binutils) 2.34\nCopyright (C) 2020 Free Software Foundation, Inc.",
493
+			want: true,
494
+		},
495
+		{
496
+			desc: "Invalid GNU objdump version string",
497
+			ver:  "GNU objdump (GNU Banutils) 2.34\nCopyright (C) 2020 Free Software Foundation, Inc.",
498
+			want: false,
499
+		},
500
+	} {
501
+		if got := isBuObjdump(tc.ver); got != tc.want {
502
+			t.Errorf("%v: got %v, want %v", tc.desc, got, tc.want)
503
+		}
504
+	}
505
+}

+ 10
- 4
internal/binutils/disasm.go Dosyayı Görüntüle

@@ -25,10 +25,11 @@ import (
25 25
 )
26 26
 
27 27
 var (
28
-	nmOutputRE            = regexp.MustCompile(`^\s*([[:xdigit:]]+)\s+(.)\s+(.*)`)
29
-	objdumpAsmOutputRE    = regexp.MustCompile(`^\s*([[:xdigit:]]+):\s+(.*)`)
30
-	objdumpOutputFileLine = regexp.MustCompile(`^(.*):([0-9]+)`)
31
-	objdumpOutputFunction = regexp.MustCompile(`^(\S.*)\(\):`)
28
+	nmOutputRE                = regexp.MustCompile(`^\s*([[:xdigit:]]+)\s+(.)\s+(.*)`)
29
+	objdumpAsmOutputRE        = regexp.MustCompile(`^\s*([[:xdigit:]]+):\s+(.*)`)
30
+	objdumpOutputFileLine     = regexp.MustCompile(`^;?\s?(.*):([0-9]+)`)
31
+	objdumpOutputFunction     = regexp.MustCompile(`^;?\s?(\S.*)\(\):`)
32
+	objdumpOutputFunctionLLVM = regexp.MustCompile(`^([[:xdigit:]]+)?\s?(.*):`)
32 33
 )
33 34
 
34 35
 func findSymbols(syms []byte, file string, r *regexp.Regexp, address uint64) ([]*plugin.Sym, error) {
@@ -143,6 +144,11 @@ func disassemble(asm []byte) ([]plugin.Inst, error) {
143 144
 		if fields := objdumpOutputFunction.FindStringSubmatch(input); len(fields) == 2 {
144 145
 			function = fields[1]
145 146
 			continue
147
+		} else {
148
+			if fields := objdumpOutputFunctionLLVM.FindStringSubmatch(input); len(fields) == 3 {
149
+				function = fields[2]
150
+				continue
151
+			}
146 152
 		}
147 153
 		// Reset on unrecognized lines.
148 154
 		function, file, line = "", "", 0

+ 16
- 8
internal/binutils/disasm_test.go Dosyayı Görüntüle

@@ -110,11 +110,7 @@ func TestFunctionAssembly(t *testing.T) {
110 110
 	testcases := []testcase{
111 111
 		{
112 112
 			plugin.Sym{Name: []string{"symbol1"}, Start: 0x1000, End: 0x1FFF},
113
-			`  1000: instruction one
114
-  1001: instruction two
115
-  1002: instruction three
116
-  1003: instruction four
117
-`,
113
+			"  1000: instruction one\n  1001: instruction two\n  1002: instruction three\n  1003: instruction four",
118 114
 			[]plugin.Inst{
119 115
 				{Addr: 0x1000, Text: "instruction one"},
120 116
 				{Addr: 0x1001, Text: "instruction two"},
@@ -124,14 +120,26 @@ func TestFunctionAssembly(t *testing.T) {
124 120
 		},
125 121
 		{
126 122
 			plugin.Sym{Name: []string{"symbol2"}, Start: 0x2000, End: 0x2FFF},
127
-			`  2000: instruction one
128
-  2001: instruction two
129
-`,
123
+			"  2000: instruction one\n  2001: instruction two",
130 124
 			[]plugin.Inst{
131 125
 				{Addr: 0x2000, Text: "instruction one"},
132 126
 				{Addr: 0x2001, Text: "instruction two"},
133 127
 			},
134 128
 		},
129
+		{
130
+			plugin.Sym{Name: []string{"_main"}, Start: 0x30000, End: 0x3FFF},
131
+			"_main:\n; /tmp/hello.c:3\n30001:	push   %rbp",
132
+			[]plugin.Inst{
133
+				{Addr: 0x30001, Text: "push   %rbp", Function: "_main", File: "/tmp/hello.c", Line: 3},
134
+			},
135
+		},
136
+		{
137
+			plugin.Sym{Name: []string{"main"}, Start: 0x4000, End: 0x4FFF},
138
+			"000000000040052d <main>:\nmain():\n/tmp/hello.c:3\n40001:	push   %rbp",
139
+			[]plugin.Inst{
140
+				{Addr: 0x40001, Text: "push   %rbp", Function: "main", File: "/tmp/hello.c", Line: 3},
141
+			},
142
+		},
135 143
 	}
136 144
 
137 145
 	for _, tc := range testcases {