Без опису

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Package binutils provides access to the GNU binutils.
  15. package binutils
  16. import (
  17. "debug/elf"
  18. "debug/macho"
  19. "encoding/binary"
  20. "errors"
  21. "fmt"
  22. "io"
  23. "os"
  24. "os/exec"
  25. "path/filepath"
  26. "regexp"
  27. "runtime"
  28. "strconv"
  29. "strings"
  30. "sync"
  31. "github.com/google/pprof/internal/elfexec"
  32. "github.com/google/pprof/internal/plugin"
  33. )
  34. // A Binutils implements plugin.ObjTool by invoking the GNU binutils.
  35. type Binutils struct {
  36. mu sync.Mutex
  37. rep *binrep
  38. }
  39. var objdumpLLVMVerRE = regexp.MustCompile(`LLVM version (?:(\d*)\.(\d*)\.(\d*)|.*(trunk).*)`)
  40. // binrep is an immutable representation for Binutils. It is atomically
  41. // replaced on every mutation to provide thread-safe access.
  42. type binrep struct {
  43. // Commands to invoke.
  44. llvmSymbolizer string
  45. llvmSymbolizerFound bool
  46. addr2line string
  47. addr2lineFound bool
  48. nm string
  49. nmFound bool
  50. objdump string
  51. objdumpFound bool
  52. isLLVMObjdump bool
  53. // if fast, perform symbolization using nm (symbol names only),
  54. // instead of file-line detail from the slower addr2line.
  55. fast bool
  56. }
  57. // get returns the current representation for bu, initializing it if necessary.
  58. func (bu *Binutils) get() *binrep {
  59. bu.mu.Lock()
  60. r := bu.rep
  61. if r == nil {
  62. r = &binrep{}
  63. initTools(r, "")
  64. bu.rep = r
  65. }
  66. bu.mu.Unlock()
  67. return r
  68. }
  69. // update modifies the rep for bu via the supplied function.
  70. func (bu *Binutils) update(fn func(r *binrep)) {
  71. r := &binrep{}
  72. bu.mu.Lock()
  73. defer bu.mu.Unlock()
  74. if bu.rep == nil {
  75. initTools(r, "")
  76. } else {
  77. *r = *bu.rep
  78. }
  79. fn(r)
  80. bu.rep = r
  81. }
  82. // String returns string representation of the binutils state for debug logging.
  83. func (bu *Binutils) String() string {
  84. r := bu.get()
  85. var llvmSymbolizer, addr2line, nm, objdump string
  86. if r.llvmSymbolizerFound {
  87. llvmSymbolizer = r.llvmSymbolizer
  88. }
  89. if r.addr2lineFound {
  90. addr2line = r.addr2line
  91. }
  92. if r.nmFound {
  93. nm = r.nm
  94. }
  95. if r.objdumpFound {
  96. objdump = r.objdump
  97. }
  98. return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q fast=%t",
  99. llvmSymbolizer, addr2line, nm, objdump, r.fast)
  100. }
  101. // SetFastSymbolization sets a toggle that makes binutils use fast
  102. // symbolization (using nm), which is much faster than addr2line but
  103. // provides only symbol name information (no file/line).
  104. func (bu *Binutils) SetFastSymbolization(fast bool) {
  105. bu.update(func(r *binrep) { r.fast = fast })
  106. }
  107. // SetTools processes the contents of the tools option. It
  108. // expects a set of entries separated by commas; each entry is a pair
  109. // of the form t:path, where cmd will be used to look only for the
  110. // tool named t. If t is not specified, the path is searched for all
  111. // tools.
  112. func (bu *Binutils) SetTools(config string) {
  113. bu.update(func(r *binrep) { initTools(r, config) })
  114. }
  115. func initTools(b *binrep, config string) {
  116. // paths collect paths per tool; Key "" contains the default.
  117. paths := make(map[string][]string)
  118. for _, t := range strings.Split(config, ",") {
  119. name, path := "", t
  120. if ct := strings.SplitN(t, ":", 2); len(ct) == 2 {
  121. name, path = ct[0], ct[1]
  122. }
  123. paths[name] = append(paths[name], path)
  124. }
  125. defaultPath := paths[""]
  126. b.llvmSymbolizer, b.llvmSymbolizerFound = findExe("llvm-symbolizer", append(paths["llvm-symbolizer"], defaultPath...))
  127. b.addr2line, b.addr2lineFound = findExe("addr2line", append(paths["addr2line"], defaultPath...))
  128. if !b.addr2lineFound {
  129. // On MacOS, brew installs addr2line under gaddr2line name, so search for
  130. // that if the tool is not found by its default name.
  131. b.addr2line, b.addr2lineFound = findExe("gaddr2line", append(paths["addr2line"], defaultPath...))
  132. }
  133. b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...))
  134. b.objdump, b.objdumpFound, b.isLLVMObjdump = findObjdump(append(paths["objdump"], defaultPath...))
  135. }
  136. // findObjdump finds and returns path to preferred objdump binary.
  137. // Order of preference is: llvm-objdump, objdump.
  138. // On MacOS only, also looks for gobjdump with least preference.
  139. // Accepts a list of paths and returns:
  140. // a string with path to the preferred objdump binary if found,
  141. // or an empty string if not found;
  142. // a boolean if any acceptable objdump was found;
  143. // a boolen indicating if it is an LLVM objdump.
  144. func findObjdump(paths []string) (string, bool, bool) {
  145. objdumpNames := []string{"llvm-objdump", "objdump"}
  146. if runtime.GOOS == "darwin" {
  147. objdumpNames = append(objdumpNames, "gobjdump")
  148. }
  149. for _, objdumpName := range objdumpNames {
  150. if objdump, objdumpFound := findExe(objdumpName, paths); objdumpFound {
  151. cmdOut, err := exec.Command(objdump, "--version").Output()
  152. if err != nil {
  153. continue
  154. }
  155. if isLLVMObjdump(string(cmdOut)) {
  156. return objdump, true, true
  157. }
  158. if isBuObjdump(string(cmdOut)) {
  159. return objdump, true, false
  160. }
  161. }
  162. }
  163. return "", false, false
  164. }
  165. // isLLVMObjdump accepts a string with path to an objdump binary,
  166. // and returns a boolean indicating if the given binary is an LLVM
  167. // objdump binary of an acceptable version.
  168. func isLLVMObjdump(output string) bool {
  169. fields := objdumpLLVMVerRE.FindStringSubmatch(output)
  170. if len(fields) != 5 {
  171. return false
  172. }
  173. if fields[4] == "trunk" {
  174. return true
  175. }
  176. verMajor, err := strconv.Atoi(fields[1])
  177. if err != nil {
  178. return false
  179. }
  180. verPatch, err := strconv.Atoi(fields[3])
  181. if err != nil {
  182. return false
  183. }
  184. if runtime.GOOS == "linux" && verMajor >= 8 {
  185. // Ensure LLVM objdump is at least version 8.0 on Linux.
  186. // Some flags, like --demangle, and double dashes for options are
  187. // not supported by previous versions.
  188. return true
  189. }
  190. if runtime.GOOS == "darwin" {
  191. // Ensure LLVM objdump is at least version 10.0.1 on MacOS.
  192. return verMajor > 10 || (verMajor == 10 && verPatch >= 1)
  193. }
  194. return false
  195. }
  196. // isBuObjdump accepts a string with path to an objdump binary,
  197. // and returns a boolean indicating if the given binary is a GNU
  198. // binutils objdump binary. No version check is performed.
  199. func isBuObjdump(output string) bool {
  200. return strings.Contains(output, "GNU objdump") && strings.Contains(output, "Binutils")
  201. }
  202. // findExe looks for an executable command on a set of paths.
  203. // If it cannot find it, returns cmd.
  204. func findExe(cmd string, paths []string) (string, bool) {
  205. for _, p := range paths {
  206. cp := filepath.Join(p, cmd)
  207. if c, err := exec.LookPath(cp); err == nil {
  208. return c, true
  209. }
  210. }
  211. return cmd, false
  212. }
  213. // Disasm returns the assembly instructions for the specified address range
  214. // of a binary.
  215. func (bu *Binutils) Disasm(file string, start, end uint64, intelSyntax bool) ([]plugin.Inst, error) {
  216. b := bu.get()
  217. if !b.objdumpFound {
  218. return nil, errors.New("cannot disasm: no objdump tool available")
  219. }
  220. args := []string{"--disassemble-all", "--demangle", "--no-show-raw-insn",
  221. "--line-numbers", fmt.Sprintf("--start-address=%#x", start),
  222. fmt.Sprintf("--stop-address=%#x", end)}
  223. if intelSyntax {
  224. if b.isLLVMObjdump {
  225. args = append(args, "--x86-asm-syntax=intel")
  226. } else {
  227. args = append(args, "-M", "intel")
  228. }
  229. }
  230. args = append(args, file)
  231. cmd := exec.Command(b.objdump, args...)
  232. out, err := cmd.Output()
  233. if err != nil {
  234. return nil, fmt.Errorf("%v: %v", cmd.Args, err)
  235. }
  236. return disassemble(out)
  237. }
  238. // Open satisfies the plugin.ObjTool interface.
  239. func (bu *Binutils) Open(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  240. b := bu.get()
  241. // Make sure file is a supported executable.
  242. // This uses magic numbers, mainly to provide better error messages but
  243. // it should also help speed.
  244. if _, err := os.Stat(name); err != nil {
  245. // For testing, do not require file name to exist.
  246. if strings.Contains(b.addr2line, "testdata/") {
  247. return &fileAddr2Line{file: file{b: b, name: name}}, nil
  248. }
  249. return nil, err
  250. }
  251. // Read the first 4 bytes of the file.
  252. f, err := os.Open(name)
  253. if err != nil {
  254. return nil, fmt.Errorf("error opening %s: %v", name, err)
  255. }
  256. defer f.Close()
  257. var header [4]byte
  258. if _, err = io.ReadFull(f, header[:]); err != nil {
  259. return nil, fmt.Errorf("error reading magic number from %s: %v", name, err)
  260. }
  261. elfMagic := string(header[:])
  262. // Match against supported file types.
  263. if elfMagic == elf.ELFMAG {
  264. f, err := b.openELF(name, start, limit, offset)
  265. if err != nil {
  266. return nil, fmt.Errorf("error reading ELF file %s: %v", name, err)
  267. }
  268. return f, nil
  269. }
  270. // Mach-O magic numbers can be big or little endian.
  271. machoMagicLittle := binary.LittleEndian.Uint32(header[:])
  272. machoMagicBig := binary.BigEndian.Uint32(header[:])
  273. if machoMagicLittle == macho.Magic32 || machoMagicLittle == macho.Magic64 ||
  274. machoMagicBig == macho.Magic32 || machoMagicBig == macho.Magic64 {
  275. f, err := b.openMachO(name, start, limit, offset)
  276. if err != nil {
  277. return nil, fmt.Errorf("error reading Mach-O file %s: %v", name, err)
  278. }
  279. return f, nil
  280. }
  281. if machoMagicLittle == macho.MagicFat || machoMagicBig == macho.MagicFat {
  282. f, err := b.openFatMachO(name, start, limit, offset)
  283. if err != nil {
  284. return nil, fmt.Errorf("error reading fat Mach-O file %s: %v", name, err)
  285. }
  286. return f, nil
  287. }
  288. return nil, fmt.Errorf("unrecognized binary format: %s", name)
  289. }
  290. func (b *binrep) openMachOCommon(name string, of *macho.File, start, limit, offset uint64) (plugin.ObjFile, error) {
  291. // Subtract the load address of the __TEXT section. Usually 0 for shared
  292. // libraries or 0x100000000 for executables. You can check this value by
  293. // running `objdump -private-headers <file>`.
  294. textSegment := of.Segment("__TEXT")
  295. if textSegment == nil {
  296. return nil, fmt.Errorf("could not identify base for %s: no __TEXT segment", name)
  297. }
  298. if textSegment.Addr > start {
  299. return nil, fmt.Errorf("could not identify base for %s: __TEXT segment address (0x%x) > mapping start address (0x%x)",
  300. name, textSegment.Addr, start)
  301. }
  302. base := start - textSegment.Addr
  303. if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
  304. return &fileNM{file: file{b: b, name: name, base: base}}, nil
  305. }
  306. return &fileAddr2Line{file: file{b: b, name: name, base: base}}, nil
  307. }
  308. func (b *binrep) openFatMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  309. of, err := macho.OpenFat(name)
  310. if err != nil {
  311. return nil, fmt.Errorf("error parsing %s: %v", name, err)
  312. }
  313. defer of.Close()
  314. if len(of.Arches) == 0 {
  315. return nil, fmt.Errorf("empty fat Mach-O file: %s", name)
  316. }
  317. var arch macho.Cpu
  318. // Use the host architecture.
  319. // TODO: This is not ideal because the host architecture may not be the one
  320. // that was profiled. E.g. an amd64 host can profile a 386 program.
  321. switch runtime.GOARCH {
  322. case "386":
  323. arch = macho.Cpu386
  324. case "amd64", "amd64p32":
  325. arch = macho.CpuAmd64
  326. case "arm", "armbe", "arm64", "arm64be":
  327. arch = macho.CpuArm
  328. case "ppc":
  329. arch = macho.CpuPpc
  330. case "ppc64", "ppc64le":
  331. arch = macho.CpuPpc64
  332. default:
  333. return nil, fmt.Errorf("unsupported host architecture for %s: %s", name, runtime.GOARCH)
  334. }
  335. for i := range of.Arches {
  336. if of.Arches[i].Cpu == arch {
  337. return b.openMachOCommon(name, of.Arches[i].File, start, limit, offset)
  338. }
  339. }
  340. return nil, fmt.Errorf("architecture not found in %s: %s", name, runtime.GOARCH)
  341. }
  342. func (b *binrep) openMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  343. of, err := macho.Open(name)
  344. if err != nil {
  345. return nil, fmt.Errorf("error parsing %s: %v", name, err)
  346. }
  347. defer of.Close()
  348. return b.openMachOCommon(name, of, start, limit, offset)
  349. }
  350. func (b *binrep) openELF(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  351. ef, err := elf.Open(name)
  352. if err != nil {
  353. return nil, fmt.Errorf("error parsing %s: %v", name, err)
  354. }
  355. defer ef.Close()
  356. var stextOffset *uint64
  357. var pageAligned = func(addr uint64) bool { return addr%4096 == 0 }
  358. if strings.Contains(name, "vmlinux") || !pageAligned(start) || !pageAligned(limit) || !pageAligned(offset) {
  359. // Reading all Symbols is expensive, and we only rarely need it so
  360. // we don't want to do it every time. But if _stext happens to be
  361. // page-aligned but isn't the same as Vaddr, we would symbolize
  362. // wrong. So if the name the addresses aren't page aligned, or if
  363. // the name is "vmlinux" we read _stext. We can be wrong if: (1)
  364. // someone passes a kernel path that doesn't contain "vmlinux" AND
  365. // (2) _stext is page-aligned AND (3) _stext is not at Vaddr
  366. symbols, err := ef.Symbols()
  367. if err != nil && err != elf.ErrNoSymbols {
  368. return nil, err
  369. }
  370. for _, s := range symbols {
  371. if s.Name == "_stext" {
  372. // The kernel may use _stext as the mapping start address.
  373. stextOffset = &s.Value
  374. break
  375. }
  376. }
  377. }
  378. base, err := elfexec.GetBase(&ef.FileHeader, elfexec.FindTextProgHeader(ef), stextOffset, start, limit, offset)
  379. if err != nil {
  380. return nil, fmt.Errorf("could not identify base for %s: %v", name, err)
  381. }
  382. buildID := ""
  383. if f, err := os.Open(name); err == nil {
  384. if id, err := elfexec.GetBuildID(f); err == nil {
  385. buildID = fmt.Sprintf("%x", id)
  386. }
  387. }
  388. if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
  389. return &fileNM{file: file{b, name, base, buildID}}, nil
  390. }
  391. return &fileAddr2Line{file: file{b, name, base, buildID}}, nil
  392. }
  393. // file implements the binutils.ObjFile interface.
  394. type file struct {
  395. b *binrep
  396. name string
  397. base uint64
  398. buildID string
  399. }
  400. func (f *file) Name() string {
  401. return f.name
  402. }
  403. func (f *file) Base() uint64 {
  404. return f.base
  405. }
  406. func (f *file) BuildID() string {
  407. return f.buildID
  408. }
  409. func (f *file) SourceLine(addr uint64) ([]plugin.Frame, error) {
  410. return []plugin.Frame{}, nil
  411. }
  412. func (f *file) Close() error {
  413. return nil
  414. }
  415. func (f *file) Symbols(r *regexp.Regexp, addr uint64) ([]*plugin.Sym, error) {
  416. // Get from nm a list of symbols sorted by address.
  417. cmd := exec.Command(f.b.nm, "-n", f.name)
  418. out, err := cmd.Output()
  419. if err != nil {
  420. return nil, fmt.Errorf("%v: %v", cmd.Args, err)
  421. }
  422. return findSymbols(out, f.name, r, addr)
  423. }
  424. // fileNM implements the binutils.ObjFile interface, using 'nm' to map
  425. // addresses to symbols (without file/line number information). It is
  426. // faster than fileAddr2Line.
  427. type fileNM struct {
  428. file
  429. addr2linernm *addr2LinerNM
  430. }
  431. func (f *fileNM) SourceLine(addr uint64) ([]plugin.Frame, error) {
  432. if f.addr2linernm == nil {
  433. addr2liner, err := newAddr2LinerNM(f.b.nm, f.name, f.base)
  434. if err != nil {
  435. return nil, err
  436. }
  437. f.addr2linernm = addr2liner
  438. }
  439. return f.addr2linernm.addrInfo(addr)
  440. }
  441. // fileAddr2Line implements the binutils.ObjFile interface, using
  442. // llvm-symbolizer, if that's available, or addr2line to map addresses to
  443. // symbols (with file/line number information). It can be slow for large
  444. // binaries with debug information.
  445. type fileAddr2Line struct {
  446. once sync.Once
  447. file
  448. addr2liner *addr2Liner
  449. llvmSymbolizer *llvmSymbolizer
  450. }
  451. func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
  452. f.once.Do(f.init)
  453. if f.llvmSymbolizer != nil {
  454. return f.llvmSymbolizer.addrInfo(addr)
  455. }
  456. if f.addr2liner != nil {
  457. return f.addr2liner.addrInfo(addr)
  458. }
  459. return nil, fmt.Errorf("could not find local addr2liner")
  460. }
  461. func (f *fileAddr2Line) init() {
  462. if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base); err == nil {
  463. f.llvmSymbolizer = llvmSymbolizer
  464. return
  465. }
  466. if addr2liner, err := newAddr2Liner(f.b.addr2line, f.name, f.base); err == nil {
  467. f.addr2liner = addr2liner
  468. // When addr2line encounters some gcc compiled binaries, it
  469. // drops interesting parts of names in anonymous namespaces.
  470. // Fallback to NM for better function names.
  471. if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil {
  472. f.addr2liner.nm = nm
  473. }
  474. }
  475. }
  476. func (f *fileAddr2Line) Close() error {
  477. if f.llvmSymbolizer != nil {
  478. f.llvmSymbolizer.rw.close()
  479. f.llvmSymbolizer = nil
  480. }
  481. if f.addr2liner != nil {
  482. f.addr2liner.rw.close()
  483. f.addr2liner = nil
  484. }
  485. return nil
  486. }