暂无描述

binutils.go 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Package binutils provides access to the GNU binutils.
  15. package binutils
  16. import (
  17. "debug/elf"
  18. "debug/macho"
  19. "encoding/binary"
  20. "errors"
  21. "fmt"
  22. "io"
  23. "os"
  24. "os/exec"
  25. "path/filepath"
  26. "regexp"
  27. "runtime"
  28. "strconv"
  29. "strings"
  30. "sync"
  31. "github.com/google/pprof/pkg/elfexec"
  32. "github.com/google/pprof/pkg/plugin"
  33. )
  34. // A Binutils implements plugin.ObjTool by invoking the GNU binutils.
  35. type Binutils struct {
  36. mu sync.Mutex
  37. rep *binrep
  38. }
  39. var objdumpLLVMVerRE = regexp.MustCompile(`LLVM version (?:(\d*)\.(\d*)\.(\d*)|.*(trunk).*)`)
  40. // binrep is an immutable representation for Binutils. It is atomically
  41. // replaced on every mutation to provide thread-safe access.
  42. type binrep struct {
  43. // Commands to invoke.
  44. llvmSymbolizer string
  45. llvmSymbolizerFound bool
  46. addr2line string
  47. addr2lineFound bool
  48. nm string
  49. nmFound bool
  50. objdump string
  51. objdumpFound bool
  52. isLLVMObjdump bool
  53. // if fast, perform symbolization using nm (symbol names only),
  54. // instead of file-line detail from the slower addr2line.
  55. fast bool
  56. }
  57. // get returns the current representation for bu, initializing it if necessary.
  58. func (bu *Binutils) get() *binrep {
  59. bu.mu.Lock()
  60. r := bu.rep
  61. if r == nil {
  62. r = &binrep{}
  63. initTools(r, "")
  64. bu.rep = r
  65. }
  66. bu.mu.Unlock()
  67. return r
  68. }
  69. // update modifies the rep for bu via the supplied function.
  70. func (bu *Binutils) update(fn func(r *binrep)) {
  71. r := &binrep{}
  72. bu.mu.Lock()
  73. defer bu.mu.Unlock()
  74. if bu.rep == nil {
  75. initTools(r, "")
  76. } else {
  77. *r = *bu.rep
  78. }
  79. fn(r)
  80. bu.rep = r
  81. }
  82. // String returns string representation of the binutils state for debug logging.
  83. func (bu *Binutils) String() string {
  84. r := bu.get()
  85. var llvmSymbolizer, addr2line, nm, objdump string
  86. if r.llvmSymbolizerFound {
  87. llvmSymbolizer = r.llvmSymbolizer
  88. }
  89. if r.addr2lineFound {
  90. addr2line = r.addr2line
  91. }
  92. if r.nmFound {
  93. nm = r.nm
  94. }
  95. if r.objdumpFound {
  96. objdump = r.objdump
  97. }
  98. return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q fast=%t",
  99. llvmSymbolizer, addr2line, nm, objdump, r.fast)
  100. }
  101. // SetFastSymbolization sets a toggle that makes binutils use fast
  102. // symbolization (using nm), which is much faster than addr2line but
  103. // provides only symbol name information (no file/line).
  104. func (bu *Binutils) SetFastSymbolization(fast bool) {
  105. bu.update(func(r *binrep) { r.fast = fast })
  106. }
  107. // SetTools processes the contents of the tools option. It
  108. // expects a set of entries separated by commas; each entry is a pair
  109. // of the form t:path, where cmd will be used to look only for the
  110. // tool named t. If t is not specified, the path is searched for all
  111. // tools.
  112. func (bu *Binutils) SetTools(config string) {
  113. bu.update(func(r *binrep) { initTools(r, config) })
  114. }
  115. func initTools(b *binrep, config string) {
  116. // paths collect paths per tool; Key "" contains the default.
  117. paths := make(map[string][]string)
  118. for _, t := range strings.Split(config, ",") {
  119. name, path := "", t
  120. if ct := strings.SplitN(t, ":", 2); len(ct) == 2 {
  121. name, path = ct[0], ct[1]
  122. }
  123. paths[name] = append(paths[name], path)
  124. }
  125. defaultPath := paths[""]
  126. b.llvmSymbolizer, b.llvmSymbolizerFound = chooseExe([]string{"llvm-symbolizer"}, []string{}, append(paths["llvm-symbolizer"], defaultPath...))
  127. b.addr2line, b.addr2lineFound = chooseExe([]string{"addr2line"}, []string{"gaddr2line"}, append(paths["addr2line"], defaultPath...))
  128. // The "-n" option is supported by LLVM since 2011. The output of llvm-nm
  129. // and GNU nm with "-n" option is interchangeable for our purposes, so we do
  130. // not need to differrentiate them.
  131. b.nm, b.nmFound = chooseExe([]string{"llvm-nm", "nm"}, []string{"gnm"}, append(paths["nm"], defaultPath...))
  132. b.objdump, b.objdumpFound, b.isLLVMObjdump = findObjdump(append(paths["objdump"], defaultPath...))
  133. }
  134. // findObjdump finds and returns path to preferred objdump binary.
  135. // Order of preference is: llvm-objdump, objdump.
  136. // On MacOS only, also looks for gobjdump with least preference.
  137. // Accepts a list of paths and returns:
  138. // a string with path to the preferred objdump binary if found,
  139. // or an empty string if not found;
  140. // a boolean if any acceptable objdump was found;
  141. // a boolean indicating if it is an LLVM objdump.
  142. func findObjdump(paths []string) (string, bool, bool) {
  143. objdumpNames := []string{"llvm-objdump", "objdump"}
  144. if runtime.GOOS == "darwin" {
  145. objdumpNames = append(objdumpNames, "gobjdump")
  146. }
  147. for _, objdumpName := range objdumpNames {
  148. if objdump, objdumpFound := findExe(objdumpName, paths); objdumpFound {
  149. cmdOut, err := exec.Command(objdump, "--version").Output()
  150. if err != nil {
  151. continue
  152. }
  153. if isLLVMObjdump(string(cmdOut)) {
  154. return objdump, true, true
  155. }
  156. if isBuObjdump(string(cmdOut)) {
  157. return objdump, true, false
  158. }
  159. }
  160. }
  161. return "", false, false
  162. }
  163. // chooseExe finds and returns path to preferred binary. names is a list of
  164. // names to search on both Linux and OSX. osxNames is a list of names specific
  165. // to OSX. names always has a higher priority than osxNames. The order of
  166. // the name within each list decides its priority (e.g. the first name has a
  167. // higher priority than the second name in the list).
  168. //
  169. // It returns a string with path to the binary and a boolean indicating if any
  170. // acceptable binary was found.
  171. func chooseExe(names, osxNames []string, paths []string) (string, bool) {
  172. if runtime.GOOS == "darwin" {
  173. names = append(names, osxNames...)
  174. }
  175. for _, name := range names {
  176. if binary, found := findExe(name, paths); found {
  177. return binary, true
  178. }
  179. }
  180. return "", false
  181. }
  182. // isLLVMObjdump accepts a string with path to an objdump binary,
  183. // and returns a boolean indicating if the given binary is an LLVM
  184. // objdump binary of an acceptable version.
  185. func isLLVMObjdump(output string) bool {
  186. fields := objdumpLLVMVerRE.FindStringSubmatch(output)
  187. if len(fields) != 5 {
  188. return false
  189. }
  190. if fields[4] == "trunk" {
  191. return true
  192. }
  193. verMajor, err := strconv.Atoi(fields[1])
  194. if err != nil {
  195. return false
  196. }
  197. verPatch, err := strconv.Atoi(fields[3])
  198. if err != nil {
  199. return false
  200. }
  201. if runtime.GOOS == "linux" && verMajor >= 8 {
  202. // Ensure LLVM objdump is at least version 8.0 on Linux.
  203. // Some flags, like --demangle, and double dashes for options are
  204. // not supported by previous versions.
  205. return true
  206. }
  207. if runtime.GOOS == "darwin" {
  208. // Ensure LLVM objdump is at least version 10.0.1 on MacOS.
  209. return verMajor > 10 || (verMajor == 10 && verPatch >= 1)
  210. }
  211. return false
  212. }
  213. // isBuObjdump accepts a string with path to an objdump binary,
  214. // and returns a boolean indicating if the given binary is a GNU
  215. // binutils objdump binary. No version check is performed.
  216. func isBuObjdump(output string) bool {
  217. return strings.Contains(output, "GNU objdump")
  218. }
  219. // findExe looks for an executable command on a set of paths.
  220. // If it cannot find it, returns cmd.
  221. func findExe(cmd string, paths []string) (string, bool) {
  222. for _, p := range paths {
  223. cp := filepath.Join(p, cmd)
  224. if c, err := exec.LookPath(cp); err == nil {
  225. return c, true
  226. }
  227. }
  228. return cmd, false
  229. }
  230. // Disasm returns the assembly instructions for the specified address range
  231. // of a binary.
  232. func (bu *Binutils) Disasm(file string, start, end uint64, intelSyntax bool) ([]plugin.Inst, error) {
  233. b := bu.get()
  234. if !b.objdumpFound {
  235. return nil, errors.New("cannot disasm: no objdump tool available")
  236. }
  237. args := []string{"--disassemble-all", "--demangle", "--no-show-raw-insn",
  238. "--line-numbers", fmt.Sprintf("--start-address=%#x", start),
  239. fmt.Sprintf("--stop-address=%#x", end)}
  240. if intelSyntax {
  241. if b.isLLVMObjdump {
  242. args = append(args, "--x86-asm-syntax=intel")
  243. } else {
  244. args = append(args, "-M", "intel")
  245. }
  246. }
  247. args = append(args, file)
  248. cmd := exec.Command(b.objdump, args...)
  249. out, err := cmd.Output()
  250. if err != nil {
  251. return nil, fmt.Errorf("%v: %v", cmd.Args, err)
  252. }
  253. return disassemble(out)
  254. }
  255. // Open satisfies the plugin.ObjTool interface.
  256. func (bu *Binutils) Open(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  257. b := bu.get()
  258. // Make sure file is a supported executable.
  259. // This uses magic numbers, mainly to provide better error messages but
  260. // it should also help speed.
  261. if _, err := os.Stat(name); err != nil {
  262. // For testing, do not require file name to exist.
  263. if strings.Contains(b.addr2line, "testdata/") {
  264. return &fileAddr2Line{file: file{b: b, name: name}}, nil
  265. }
  266. return nil, err
  267. }
  268. // Read the first 4 bytes of the file.
  269. f, err := os.Open(name)
  270. if err != nil {
  271. return nil, fmt.Errorf("error opening %s: %v", name, err)
  272. }
  273. defer f.Close()
  274. var header [4]byte
  275. if _, err = io.ReadFull(f, header[:]); err != nil {
  276. return nil, fmt.Errorf("error reading magic number from %s: %v", name, err)
  277. }
  278. elfMagic := string(header[:])
  279. // Match against supported file types.
  280. if elfMagic == elf.ELFMAG {
  281. f, err := b.openELF(name, start, limit, offset)
  282. if err != nil {
  283. return nil, fmt.Errorf("error reading ELF file %s: %v", name, err)
  284. }
  285. return f, nil
  286. }
  287. // Mach-O magic numbers can be big or little endian.
  288. machoMagicLittle := binary.LittleEndian.Uint32(header[:])
  289. machoMagicBig := binary.BigEndian.Uint32(header[:])
  290. if machoMagicLittle == macho.Magic32 || machoMagicLittle == macho.Magic64 ||
  291. machoMagicBig == macho.Magic32 || machoMagicBig == macho.Magic64 {
  292. f, err := b.openMachO(name, start, limit, offset)
  293. if err != nil {
  294. return nil, fmt.Errorf("error reading Mach-O file %s: %v", name, err)
  295. }
  296. return f, nil
  297. }
  298. if machoMagicLittle == macho.MagicFat || machoMagicBig == macho.MagicFat {
  299. f, err := b.openFatMachO(name, start, limit, offset)
  300. if err != nil {
  301. return nil, fmt.Errorf("error reading fat Mach-O file %s: %v", name, err)
  302. }
  303. return f, nil
  304. }
  305. return nil, fmt.Errorf("unrecognized binary format: %s", name)
  306. }
  307. func (b *binrep) openMachOCommon(name string, of *macho.File, start, limit, offset uint64) (plugin.ObjFile, error) {
  308. // Subtract the load address of the __TEXT section. Usually 0 for shared
  309. // libraries or 0x100000000 for executables. You can check this value by
  310. // running `objdump -private-headers <file>`.
  311. textSegment := of.Segment("__TEXT")
  312. if textSegment == nil {
  313. return nil, fmt.Errorf("could not identify base for %s: no __TEXT segment", name)
  314. }
  315. if textSegment.Addr > start {
  316. return nil, fmt.Errorf("could not identify base for %s: __TEXT segment address (0x%x) > mapping start address (0x%x)",
  317. name, textSegment.Addr, start)
  318. }
  319. base := start - textSegment.Addr
  320. if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
  321. return &fileNM{file: file{b: b, name: name, base: base}}, nil
  322. }
  323. return &fileAddr2Line{file: file{b: b, name: name, base: base}}, nil
  324. }
  325. func (b *binrep) openFatMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  326. of, err := macho.OpenFat(name)
  327. if err != nil {
  328. return nil, fmt.Errorf("error parsing %s: %v", name, err)
  329. }
  330. defer of.Close()
  331. if len(of.Arches) == 0 {
  332. return nil, fmt.Errorf("empty fat Mach-O file: %s", name)
  333. }
  334. var arch macho.Cpu
  335. // Use the host architecture.
  336. // TODO: This is not ideal because the host architecture may not be the one
  337. // that was profiled. E.g. an amd64 host can profile a 386 program.
  338. switch runtime.GOARCH {
  339. case "386":
  340. arch = macho.Cpu386
  341. case "amd64", "amd64p32":
  342. arch = macho.CpuAmd64
  343. case "arm", "armbe", "arm64", "arm64be":
  344. arch = macho.CpuArm
  345. case "ppc":
  346. arch = macho.CpuPpc
  347. case "ppc64", "ppc64le":
  348. arch = macho.CpuPpc64
  349. default:
  350. return nil, fmt.Errorf("unsupported host architecture for %s: %s", name, runtime.GOARCH)
  351. }
  352. for i := range of.Arches {
  353. if of.Arches[i].Cpu == arch {
  354. return b.openMachOCommon(name, of.Arches[i].File, start, limit, offset)
  355. }
  356. }
  357. return nil, fmt.Errorf("architecture not found in %s: %s", name, runtime.GOARCH)
  358. }
  359. func (b *binrep) openMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  360. of, err := macho.Open(name)
  361. if err != nil {
  362. return nil, fmt.Errorf("error parsing %s: %v", name, err)
  363. }
  364. defer of.Close()
  365. return b.openMachOCommon(name, of, start, limit, offset)
  366. }
  367. func (b *binrep) openELF(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  368. ef, err := elf.Open(name)
  369. if err != nil {
  370. return nil, fmt.Errorf("error parsing %s: %v", name, err)
  371. }
  372. defer ef.Close()
  373. var stextOffset *uint64
  374. var pageAligned = func(addr uint64) bool { return addr%4096 == 0 }
  375. if strings.Contains(name, "vmlinux") || !pageAligned(start) || !pageAligned(limit) || !pageAligned(offset) {
  376. // Reading all Symbols is expensive, and we only rarely need it so
  377. // we don't want to do it every time. But if _stext happens to be
  378. // page-aligned but isn't the same as Vaddr, we would symbolize
  379. // wrong. So if the name the addresses aren't page aligned, or if
  380. // the name is "vmlinux" we read _stext. We can be wrong if: (1)
  381. // someone passes a kernel path that doesn't contain "vmlinux" AND
  382. // (2) _stext is page-aligned AND (3) _stext is not at Vaddr
  383. symbols, err := ef.Symbols()
  384. if err != nil && err != elf.ErrNoSymbols {
  385. return nil, err
  386. }
  387. for _, s := range symbols {
  388. if s.Name == "_stext" {
  389. // The kernel may use _stext as the mapping start address.
  390. stextOffset = &s.Value
  391. break
  392. }
  393. }
  394. }
  395. base, err := elfexec.GetBase(&ef.FileHeader, elfexec.FindTextProgHeader(ef), stextOffset, start, limit, offset)
  396. if err != nil {
  397. return nil, fmt.Errorf("could not identify base for %s: %v", name, err)
  398. }
  399. buildID := ""
  400. if f, err := os.Open(name); err == nil {
  401. if id, err := elfexec.GetBuildID(f); err == nil {
  402. buildID = fmt.Sprintf("%x", id)
  403. }
  404. }
  405. if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
  406. return &fileNM{file: file{b, name, base, buildID}}, nil
  407. }
  408. return &fileAddr2Line{file: file{b, name, base, buildID}}, nil
  409. }
  410. // file implements the binutils.ObjFile interface.
  411. type file struct {
  412. b *binrep
  413. name string
  414. base uint64
  415. buildID string
  416. }
  417. func (f *file) Name() string {
  418. return f.name
  419. }
  420. func (f *file) Base() uint64 {
  421. return f.base
  422. }
  423. func (f *file) BuildID() string {
  424. return f.buildID
  425. }
  426. func (f *file) SourceLine(addr uint64) ([]plugin.Frame, error) {
  427. return []plugin.Frame{}, nil
  428. }
  429. func (f *file) Close() error {
  430. return nil
  431. }
  432. func (f *file) Symbols(r *regexp.Regexp, addr uint64) ([]*plugin.Sym, error) {
  433. // Get from nm a list of symbols sorted by address.
  434. cmd := exec.Command(f.b.nm, "-n", f.name)
  435. out, err := cmd.Output()
  436. if err != nil {
  437. return nil, fmt.Errorf("%v: %v", cmd.Args, err)
  438. }
  439. return findSymbols(out, f.name, r, addr)
  440. }
  441. // fileNM implements the binutils.ObjFile interface, using 'nm' to map
  442. // addresses to symbols (without file/line number information). It is
  443. // faster than fileAddr2Line.
  444. type fileNM struct {
  445. file
  446. addr2linernm *addr2LinerNM
  447. }
  448. func (f *fileNM) SourceLine(addr uint64) ([]plugin.Frame, error) {
  449. if f.addr2linernm == nil {
  450. addr2liner, err := newAddr2LinerNM(f.b.nm, f.name, f.base)
  451. if err != nil {
  452. return nil, err
  453. }
  454. f.addr2linernm = addr2liner
  455. }
  456. return f.addr2linernm.addrInfo(addr)
  457. }
  458. // fileAddr2Line implements the binutils.ObjFile interface, using
  459. // llvm-symbolizer, if that's available, or addr2line to map addresses to
  460. // symbols (with file/line number information). It can be slow for large
  461. // binaries with debug information.
  462. type fileAddr2Line struct {
  463. once sync.Once
  464. file
  465. addr2liner *addr2Liner
  466. llvmSymbolizer *llvmSymbolizer
  467. }
  468. func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
  469. f.once.Do(f.init)
  470. if f.llvmSymbolizer != nil {
  471. return f.llvmSymbolizer.addrInfo(addr)
  472. }
  473. if f.addr2liner != nil {
  474. return f.addr2liner.addrInfo(addr)
  475. }
  476. return nil, fmt.Errorf("could not find local addr2liner")
  477. }
  478. func (f *fileAddr2Line) init() {
  479. if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base); err == nil {
  480. f.llvmSymbolizer = llvmSymbolizer
  481. return
  482. }
  483. if addr2liner, err := newAddr2Liner(f.b.addr2line, f.name, f.base); err == nil {
  484. f.addr2liner = addr2liner
  485. // When addr2line encounters some gcc compiled binaries, it
  486. // drops interesting parts of names in anonymous namespaces.
  487. // Fallback to NM for better function names.
  488. if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil {
  489. f.addr2liner.nm = nm
  490. }
  491. }
  492. }
  493. func (f *fileAddr2Line) Close() error {
  494. if f.llvmSymbolizer != nil {
  495. f.llvmSymbolizer.rw.close()
  496. f.llvmSymbolizer = nil
  497. }
  498. if f.addr2liner != nil {
  499. f.addr2liner.rw.close()
  500. f.addr2liner = nil
  501. }
  502. return nil
  503. }