暂无描述

binutils.go 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Package binutils provides access to the GNU binutils.
  15. package binutils
  16. import (
  17. "debug/elf"
  18. "debug/macho"
  19. "fmt"
  20. "os"
  21. "os/exec"
  22. "path/filepath"
  23. "regexp"
  24. "strings"
  25. "sync"
  26. "github.com/google/pprof/internal/elfexec"
  27. "github.com/google/pprof/internal/plugin"
  28. )
  29. // A Binutils implements plugin.ObjTool by invoking the GNU binutils.
  30. type Binutils struct {
  31. mu sync.Mutex
  32. rep *binrep
  33. }
  34. // binrep is an immutable representation for Binutils. It is atomically
  35. // replaced on every mutation to provide thread-safe access.
  36. type binrep struct {
  37. // Commands to invoke.
  38. llvmSymbolizer string
  39. llvmSymbolizerFound bool
  40. addr2line string
  41. addr2lineFound bool
  42. nm string
  43. nmFound bool
  44. objdump string
  45. objdumpFound bool
  46. // if fast, perform symbolization using nm (symbol names only),
  47. // instead of file-line detail from the slower addr2line.
  48. fast bool
  49. }
  50. // get returns the current representation for bu, initializing it if necessary.
  51. func (bu *Binutils) get() *binrep {
  52. bu.mu.Lock()
  53. r := bu.rep
  54. if r == nil {
  55. r = &binrep{}
  56. initTools(r, "")
  57. bu.rep = r
  58. }
  59. bu.mu.Unlock()
  60. return r
  61. }
  62. // update modifies the rep for bu via the supplied function.
  63. func (bu *Binutils) update(fn func(r *binrep)) {
  64. r := &binrep{}
  65. bu.mu.Lock()
  66. defer bu.mu.Unlock()
  67. if bu.rep == nil {
  68. initTools(r, "")
  69. } else {
  70. *r = *bu.rep
  71. }
  72. fn(r)
  73. bu.rep = r
  74. }
  75. // String returns string representation of the binutils state for debug logging.
  76. func (bu *Binutils) String() string {
  77. r := bu.get()
  78. var llvmSymbolizer, addr2line, nm, objdump string
  79. if r.llvmSymbolizerFound {
  80. llvmSymbolizer = r.llvmSymbolizer
  81. }
  82. if r.addr2lineFound {
  83. addr2line = r.addr2line
  84. }
  85. if r.nmFound {
  86. nm = r.nm
  87. }
  88. if r.objdumpFound {
  89. objdump = r.objdump
  90. }
  91. return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q fast=%t",
  92. llvmSymbolizer, addr2line, nm, objdump, r.fast)
  93. }
  94. // SetFastSymbolization sets a toggle that makes binutils use fast
  95. // symbolization (using nm), which is much faster than addr2line but
  96. // provides only symbol name information (no file/line).
  97. func (bu *Binutils) SetFastSymbolization(fast bool) {
  98. bu.update(func(r *binrep) { r.fast = fast })
  99. }
  100. // SetTools processes the contents of the tools option. It
  101. // expects a set of entries separated by commas; each entry is a pair
  102. // of the form t:path, where cmd will be used to look only for the
  103. // tool named t. If t is not specified, the path is searched for all
  104. // tools.
  105. func (bu *Binutils) SetTools(config string) {
  106. bu.update(func(r *binrep) { initTools(r, config) })
  107. }
  108. func initTools(b *binrep, config string) {
  109. // paths collect paths per tool; Key "" contains the default.
  110. paths := make(map[string][]string)
  111. for _, t := range strings.Split(config, ",") {
  112. name, path := "", t
  113. if ct := strings.SplitN(t, ":", 2); len(ct) == 2 {
  114. name, path = ct[0], ct[1]
  115. }
  116. paths[name] = append(paths[name], path)
  117. }
  118. defaultPath := paths[""]
  119. b.llvmSymbolizer, b.llvmSymbolizerFound = findExe("llvm-symbolizer", append(paths["llvm-symbolizer"], defaultPath...))
  120. b.addr2line, b.addr2lineFound = findExe("addr2line", append(paths["addr2line"], defaultPath...))
  121. if !b.addr2lineFound {
  122. // On MacOS, brew installs addr2line under gaddr2line name, so search for
  123. // that if the tool is not found by its default name.
  124. b.addr2line, b.addr2lineFound = findExe("gaddr2line", append(paths["addr2line"], defaultPath...))
  125. }
  126. b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...))
  127. b.objdump, b.objdumpFound = findExe("objdump", append(paths["objdump"], defaultPath...))
  128. }
  129. // findExe looks for an executable command on a set of paths.
  130. // If it cannot find it, returns cmd.
  131. func findExe(cmd string, paths []string) (string, bool) {
  132. for _, p := range paths {
  133. cp := filepath.Join(p, cmd)
  134. if c, err := exec.LookPath(cp); err == nil {
  135. return c, true
  136. }
  137. }
  138. return cmd, false
  139. }
  140. // Disasm returns the assembly instructions for the specified address range
  141. // of a binary.
  142. func (bu *Binutils) Disasm(file string, start, end uint64) ([]plugin.Inst, error) {
  143. b := bu.get()
  144. cmd := exec.Command(b.objdump, "-d", "-C", "--no-show-raw-insn", "-l",
  145. fmt.Sprintf("--start-address=%#x", start),
  146. fmt.Sprintf("--stop-address=%#x", end),
  147. file)
  148. out, err := cmd.Output()
  149. if err != nil {
  150. return nil, fmt.Errorf("%v: %v", cmd.Args, err)
  151. }
  152. return disassemble(out)
  153. }
  154. // Open satisfies the plugin.ObjTool interface.
  155. func (bu *Binutils) Open(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  156. b := bu.get()
  157. // Make sure file is a supported executable.
  158. // The pprof driver uses Open to sniff the difference
  159. // between an executable and a profile.
  160. // For now, only ELF is supported.
  161. // Could read the first few bytes of the file and
  162. // use a table of prefixes if we need to support other
  163. // systems at some point.
  164. if _, err := os.Stat(name); err != nil {
  165. // For testing, do not require file name to exist.
  166. if strings.Contains(b.addr2line, "testdata/") {
  167. return &fileAddr2Line{file: file{b: b, name: name}}, nil
  168. }
  169. return nil, err
  170. }
  171. if f, err := b.openELF(name, start, limit, offset); err == nil {
  172. return f, nil
  173. }
  174. if f, err := b.openMachO(name, start, limit, offset); err == nil {
  175. return f, nil
  176. }
  177. return nil, fmt.Errorf("unrecognized binary: %s", name)
  178. }
  179. func (b *binrep) openMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  180. of, err := macho.Open(name)
  181. if err != nil {
  182. return nil, fmt.Errorf("error parsing %s: %v", name, err)
  183. }
  184. defer of.Close()
  185. // Subtract the load address of the __TEXT section. Usually 0 for shared
  186. // libraries or 0x100000000 for executables. You can check this value by
  187. // running `objdump -private-headers <file>`.
  188. textSegment := of.Segment("__TEXT")
  189. if textSegment == nil {
  190. return nil, fmt.Errorf("could not identify base for %s: no __TEXT segment", name)
  191. }
  192. if textSegment.Addr > start {
  193. return nil, fmt.Errorf("could not identify base for %s: __TEXT segment address (0x%x) > mapping start address (0x%x)",
  194. name, textSegment.Addr, start)
  195. }
  196. base := start - textSegment.Addr
  197. if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
  198. return &fileNM{file: file{b: b, name: name, base: base}}, nil
  199. }
  200. return &fileAddr2Line{file: file{b: b, name: name, base: base}}, nil
  201. }
  202. func (b *binrep) openELF(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
  203. ef, err := elf.Open(name)
  204. if err != nil {
  205. return nil, fmt.Errorf("error parsing %s: %v", name, err)
  206. }
  207. defer ef.Close()
  208. var stextOffset *uint64
  209. var pageAligned = func(addr uint64) bool { return addr%4096 == 0 }
  210. if strings.Contains(name, "vmlinux") || !pageAligned(start) || !pageAligned(limit) || !pageAligned(offset) {
  211. // Reading all Symbols is expensive, and we only rarely need it so
  212. // we don't want to do it every time. But if _stext happens to be
  213. // page-aligned but isn't the same as Vaddr, we would symbolize
  214. // wrong. So if the name the addresses aren't page aligned, or if
  215. // the name is "vmlinux" we read _stext. We can be wrong if: (1)
  216. // someone passes a kernel path that doesn't contain "vmlinux" AND
  217. // (2) _stext is page-aligned AND (3) _stext is not at Vaddr
  218. symbols, err := ef.Symbols()
  219. if err != nil {
  220. return nil, err
  221. }
  222. for _, s := range symbols {
  223. if s.Name == "_stext" {
  224. // The kernel may use _stext as the mapping start address.
  225. stextOffset = &s.Value
  226. break
  227. }
  228. }
  229. }
  230. base, err := elfexec.GetBase(&ef.FileHeader, elfexec.FindTextProgHeader(ef), stextOffset, start, limit, offset)
  231. if err != nil {
  232. return nil, fmt.Errorf("could not identify base for %s: %v", name, err)
  233. }
  234. buildID := ""
  235. if f, err := os.Open(name); err == nil {
  236. if id, err := elfexec.GetBuildID(f); err == nil {
  237. buildID = fmt.Sprintf("%x", id)
  238. }
  239. }
  240. if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
  241. return &fileNM{file: file{b, name, base, buildID}}, nil
  242. }
  243. return &fileAddr2Line{file: file{b, name, base, buildID}}, nil
  244. }
  245. // file implements the binutils.ObjFile interface.
  246. type file struct {
  247. b *binrep
  248. name string
  249. base uint64
  250. buildID string
  251. }
  252. func (f *file) Name() string {
  253. return f.name
  254. }
  255. func (f *file) Base() uint64 {
  256. return f.base
  257. }
  258. func (f *file) BuildID() string {
  259. return f.buildID
  260. }
  261. func (f *file) SourceLine(addr uint64) ([]plugin.Frame, error) {
  262. return []plugin.Frame{}, nil
  263. }
  264. func (f *file) Close() error {
  265. return nil
  266. }
  267. func (f *file) Symbols(r *regexp.Regexp, addr uint64) ([]*plugin.Sym, error) {
  268. // Get from nm a list of symbols sorted by address.
  269. cmd := exec.Command(f.b.nm, "-n", f.name)
  270. out, err := cmd.Output()
  271. if err != nil {
  272. return nil, fmt.Errorf("%v: %v", cmd.Args, err)
  273. }
  274. return findSymbols(out, f.name, r, addr)
  275. }
  276. // fileNM implements the binutils.ObjFile interface, using 'nm' to map
  277. // addresses to symbols (without file/line number information). It is
  278. // faster than fileAddr2Line.
  279. type fileNM struct {
  280. file
  281. addr2linernm *addr2LinerNM
  282. }
  283. func (f *fileNM) SourceLine(addr uint64) ([]plugin.Frame, error) {
  284. if f.addr2linernm == nil {
  285. addr2liner, err := newAddr2LinerNM(f.b.nm, f.name, f.base)
  286. if err != nil {
  287. return nil, err
  288. }
  289. f.addr2linernm = addr2liner
  290. }
  291. return f.addr2linernm.addrInfo(addr)
  292. }
  293. // fileAddr2Line implements the binutils.ObjFile interface, using
  294. // llvm-symbolizer, if that's available, or addr2line to map addresses to
  295. // symbols (with file/line number information). It can be slow for large
  296. // binaries with debug information.
  297. type fileAddr2Line struct {
  298. once sync.Once
  299. file
  300. addr2liner *addr2Liner
  301. llvmSymbolizer *llvmSymbolizer
  302. }
  303. func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
  304. f.once.Do(f.init)
  305. if f.llvmSymbolizer != nil {
  306. return f.llvmSymbolizer.addrInfo(addr)
  307. }
  308. if f.addr2liner != nil {
  309. return f.addr2liner.addrInfo(addr)
  310. }
  311. return nil, fmt.Errorf("could not find local addr2liner")
  312. }
  313. func (f *fileAddr2Line) init() {
  314. if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base); err == nil {
  315. f.llvmSymbolizer = llvmSymbolizer
  316. return
  317. }
  318. if addr2liner, err := newAddr2Liner(f.b.addr2line, f.name, f.base); err == nil {
  319. f.addr2liner = addr2liner
  320. // When addr2line encounters some gcc compiled binaries, it
  321. // drops interesting parts of names in anonymous namespaces.
  322. // Fallback to NM for better function names.
  323. if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil {
  324. f.addr2liner.nm = nm
  325. }
  326. }
  327. }
  328. func (f *fileAddr2Line) Close() error {
  329. if f.llvmSymbolizer != nil {
  330. f.llvmSymbolizer.rw.close()
  331. f.llvmSymbolizer = nil
  332. }
  333. if f.addr2liner != nil {
  334. f.addr2liner.rw.close()
  335. f.addr2liner = nil
  336. }
  337. return nil
  338. }