Sin descripción

legacy_profile.go 32KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // This file implements parsers to convert legacy profiles into the
  15. // profile.proto format.
  16. package profile
  17. import (
  18. "bufio"
  19. "bytes"
  20. "fmt"
  21. "io"
  22. "math"
  23. "regexp"
  24. "strconv"
  25. "strings"
  26. )
  27. var (
  28. countStartRE = regexp.MustCompile(`\A(\w+) profile: total \d+\z`)
  29. countRE = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\z`)
  30. heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
  31. heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
  32. contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
  33. hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
  34. growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz?`)
  35. fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz?`)
  36. threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
  37. threadStartRE = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
  38. // Regular expressions to parse process mappings. Support the format used by Linux /proc/.../maps and other tools.
  39. // Recommended format:
  40. // Start End object file name offset(optional) linker build id
  41. // 0x40000-0x80000 /path/to/binary (@FF00) abc123456
  42. spaceDigits = `\s+[[:digit:]]+`
  43. hexPair = `\s+[[:xdigit:]]+:[[:xdigit:]]+`
  44. oSpace = `\s*`
  45. // Capturing expressions.
  46. cHex = `(?:0x)?([[:xdigit:]]+)`
  47. cHexRange = `\s*` + cHex + `[\s-]?` + oSpace + cHex + `:?`
  48. cSpaceString = `(?:\s+(\S+))?`
  49. cSpaceHex = `(?:\s+([[:xdigit:]]+))?`
  50. cSpaceAtOffset = `(?:\s+\(@([[:xdigit:]]+)\))?`
  51. cPerm = `(?:\s+([-rwxp]+))?`
  52. procMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceHex + hexPair + spaceDigits + cSpaceString)
  53. briefMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceString + cSpaceAtOffset + cSpaceHex)
  54. )
  55. func isSpaceOrComment(line string) bool {
  56. trimmed := strings.TrimSpace(line)
  57. return len(trimmed) == 0 || trimmed[0] == '#'
  58. }
  59. // parseGoCount parses a Go count profile (e.g., threadcreate or
  60. // goroutine) and returns a new Profile.
  61. func parseGoCount(b []byte) (*Profile, error) {
  62. s := bufio.NewScanner(bytes.NewBuffer(b))
  63. // Skip comments at the beginning of the file.
  64. for s.Scan() && isSpaceOrComment(s.Text()) {
  65. }
  66. if err := s.Err(); err != nil {
  67. return nil, err
  68. }
  69. m := countStartRE.FindStringSubmatch(s.Text())
  70. if m == nil {
  71. return nil, errUnrecognized
  72. }
  73. profileType := m[1]
  74. p := &Profile{
  75. PeriodType: &ValueType{Type: profileType, Unit: "count"},
  76. Period: 1,
  77. SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
  78. }
  79. locations := make(map[uint64]*Location)
  80. for s.Scan() {
  81. line := s.Text()
  82. if isSpaceOrComment(line) {
  83. continue
  84. }
  85. if strings.HasPrefix(line, "---") {
  86. break
  87. }
  88. m := countRE.FindStringSubmatch(line)
  89. if m == nil {
  90. return nil, errMalformed
  91. }
  92. n, err := strconv.ParseInt(m[1], 0, 64)
  93. if err != nil {
  94. return nil, errMalformed
  95. }
  96. fields := strings.Fields(m[2])
  97. locs := make([]*Location, 0, len(fields))
  98. for _, stk := range fields {
  99. addr, err := strconv.ParseUint(stk, 0, 64)
  100. if err != nil {
  101. return nil, errMalformed
  102. }
  103. // Adjust all frames by -1 to land on top of the call instruction.
  104. addr--
  105. loc := locations[addr]
  106. if loc == nil {
  107. loc = &Location{
  108. Address: addr,
  109. }
  110. locations[addr] = loc
  111. p.Location = append(p.Location, loc)
  112. }
  113. locs = append(locs, loc)
  114. }
  115. p.Sample = append(p.Sample, &Sample{
  116. Location: locs,
  117. Value: []int64{n},
  118. })
  119. }
  120. if err := s.Err(); err != nil {
  121. return nil, err
  122. }
  123. if err := parseAdditionalSections(s, p); err != nil {
  124. return nil, err
  125. }
  126. return p, nil
  127. }
  128. // remapLocationIDs ensures there is a location for each address
  129. // referenced by a sample, and remaps the samples to point to the new
  130. // location ids.
  131. func (p *Profile) remapLocationIDs() {
  132. seen := make(map[*Location]bool, len(p.Location))
  133. var locs []*Location
  134. for _, s := range p.Sample {
  135. for _, l := range s.Location {
  136. if seen[l] {
  137. continue
  138. }
  139. l.ID = uint64(len(locs) + 1)
  140. locs = append(locs, l)
  141. seen[l] = true
  142. }
  143. }
  144. p.Location = locs
  145. }
  146. func (p *Profile) remapFunctionIDs() {
  147. seen := make(map[*Function]bool, len(p.Function))
  148. var fns []*Function
  149. for _, l := range p.Location {
  150. for _, ln := range l.Line {
  151. fn := ln.Function
  152. if fn == nil || seen[fn] {
  153. continue
  154. }
  155. fn.ID = uint64(len(fns) + 1)
  156. fns = append(fns, fn)
  157. seen[fn] = true
  158. }
  159. }
  160. p.Function = fns
  161. }
  162. // remapMappingIDs matches location addresses with existing mappings
  163. // and updates them appropriately. This is O(N*M), if this ever shows
  164. // up as a bottleneck, evaluate sorting the mappings and doing a
  165. // binary search, which would make it O(N*log(M)).
  166. func (p *Profile) remapMappingIDs() {
  167. // Some profile handlers will incorrectly set regions for the main
  168. // executable if its section is remapped. Fix them through heuristics.
  169. if len(p.Mapping) > 0 {
  170. // Remove the initial mapping if named '/anon_hugepage' and has a
  171. // consecutive adjacent mapping.
  172. if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
  173. if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
  174. p.Mapping = p.Mapping[1:]
  175. }
  176. }
  177. }
  178. // Subtract the offset from the start of the main mapping if it
  179. // ends up at a recognizable start address.
  180. if len(p.Mapping) > 0 {
  181. const expectedStart = 0x400000
  182. if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
  183. m.Start = expectedStart
  184. m.Offset = 0
  185. }
  186. }
  187. // Associate each location with an address to the corresponding
  188. // mapping. Create fake mapping if a suitable one isn't found.
  189. var fake *Mapping
  190. nextLocation:
  191. for _, l := range p.Location {
  192. a := l.Address
  193. if l.Mapping != nil || a == 0 {
  194. continue
  195. }
  196. for _, m := range p.Mapping {
  197. if m.Start <= a && a < m.Limit {
  198. l.Mapping = m
  199. continue nextLocation
  200. }
  201. }
  202. // Work around legacy handlers failing to encode the first
  203. // part of mappings split into adjacent ranges.
  204. for _, m := range p.Mapping {
  205. if m.Offset != 0 && m.Start-m.Offset <= a && a < m.Start {
  206. m.Start -= m.Offset
  207. m.Offset = 0
  208. l.Mapping = m
  209. continue nextLocation
  210. }
  211. }
  212. // If there is still no mapping, create a fake one.
  213. // This is important for the Go legacy handler, which produced
  214. // no mappings.
  215. if fake == nil {
  216. fake = &Mapping{
  217. ID: 1,
  218. Limit: ^uint64(0),
  219. }
  220. p.Mapping = append(p.Mapping, fake)
  221. }
  222. l.Mapping = fake
  223. }
  224. // Reset all mapping IDs.
  225. for i, m := range p.Mapping {
  226. m.ID = uint64(i + 1)
  227. }
  228. }
  229. var cpuInts = []func([]byte) (uint64, []byte){
  230. get32l,
  231. get32b,
  232. get64l,
  233. get64b,
  234. }
  235. func get32l(b []byte) (uint64, []byte) {
  236. if len(b) < 4 {
  237. return 0, nil
  238. }
  239. return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
  240. }
  241. func get32b(b []byte) (uint64, []byte) {
  242. if len(b) < 4 {
  243. return 0, nil
  244. }
  245. return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
  246. }
  247. func get64l(b []byte) (uint64, []byte) {
  248. if len(b) < 8 {
  249. return 0, nil
  250. }
  251. return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
  252. }
  253. func get64b(b []byte) (uint64, []byte) {
  254. if len(b) < 8 {
  255. return 0, nil
  256. }
  257. return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
  258. }
  259. // parseCPU parses a profilez legacy profile and returns a newly
  260. // populated Profile.
  261. //
  262. // The general format for profilez samples is a sequence of words in
  263. // binary format. The first words are a header with the following data:
  264. // 1st word -- 0
  265. // 2nd word -- 3
  266. // 3rd word -- 0 if a c++ application, 1 if a java application.
  267. // 4th word -- Sampling period (in microseconds).
  268. // 5th word -- Padding.
  269. func parseCPU(b []byte) (*Profile, error) {
  270. var parse func([]byte) (uint64, []byte)
  271. var n1, n2, n3, n4, n5 uint64
  272. for _, parse = range cpuInts {
  273. var tmp []byte
  274. n1, tmp = parse(b)
  275. n2, tmp = parse(tmp)
  276. n3, tmp = parse(tmp)
  277. n4, tmp = parse(tmp)
  278. n5, tmp = parse(tmp)
  279. if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
  280. b = tmp
  281. return cpuProfile(b, int64(n4), parse)
  282. }
  283. if tmp != nil && n1 == 0 && n2 == 3 && n3 == 1 && n4 > 0 && n5 == 0 {
  284. b = tmp
  285. return javaCPUProfile(b, int64(n4), parse)
  286. }
  287. }
  288. return nil, errUnrecognized
  289. }
  290. // cpuProfile returns a new Profile from C++ profilez data.
  291. // b is the profile bytes after the header, period is the profiling
  292. // period, and parse is a function to parse 8-byte chunks from the
  293. // profile in its native endianness.
  294. func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
  295. p := &Profile{
  296. Period: period * 1000,
  297. PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
  298. SampleType: []*ValueType{
  299. {Type: "samples", Unit: "count"},
  300. {Type: "cpu", Unit: "nanoseconds"},
  301. },
  302. }
  303. var err error
  304. if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
  305. return nil, err
  306. }
  307. // If *most* samples have the same second-to-the-bottom frame, it
  308. // strongly suggests that it is an uninteresting artifact of
  309. // measurement -- a stack frame pushed by the signal handler. The
  310. // bottom frame is always correct as it is picked up from the signal
  311. // structure, not the stack. Check if this is the case and if so,
  312. // remove.
  313. // Remove up to two frames.
  314. maxiter := 2
  315. // Allow one different sample for this many samples with the same
  316. // second-to-last frame.
  317. similarSamples := 32
  318. margin := len(p.Sample) / similarSamples
  319. for iter := 0; iter < maxiter; iter++ {
  320. addr1 := make(map[uint64]int)
  321. for _, s := range p.Sample {
  322. if len(s.Location) > 1 {
  323. a := s.Location[1].Address
  324. addr1[a] = addr1[a] + 1
  325. }
  326. }
  327. for id1, count := range addr1 {
  328. if count >= len(p.Sample)-margin {
  329. // Found uninteresting frame, strip it out from all samples
  330. for _, s := range p.Sample {
  331. if len(s.Location) > 1 && s.Location[1].Address == id1 {
  332. s.Location = append(s.Location[:1], s.Location[2:]...)
  333. }
  334. }
  335. break
  336. }
  337. }
  338. }
  339. if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
  340. return nil, err
  341. }
  342. cleanupDuplicateLocations(p)
  343. return p, nil
  344. }
  345. func cleanupDuplicateLocations(p *Profile) {
  346. // The profile handler may duplicate the leaf frame, because it gets
  347. // its address both from stack unwinding and from the signal
  348. // context. Detect this and delete the duplicate, which has been
  349. // adjusted by -1. The leaf address should not be adjusted as it is
  350. // not a call.
  351. for _, s := range p.Sample {
  352. if len(s.Location) > 1 && s.Location[0].Address == s.Location[1].Address+1 {
  353. s.Location = append(s.Location[:1], s.Location[2:]...)
  354. }
  355. }
  356. }
  357. // parseCPUSamples parses a collection of profilez samples from a
  358. // profile.
  359. //
  360. // profilez samples are a repeated sequence of stack frames of the
  361. // form:
  362. // 1st word -- The number of times this stack was encountered.
  363. // 2nd word -- The size of the stack (StackSize).
  364. // 3rd word -- The first address on the stack.
  365. // ...
  366. // StackSize + 2 -- The last address on the stack
  367. // The last stack trace is of the form:
  368. // 1st word -- 0
  369. // 2nd word -- 1
  370. // 3rd word -- 0
  371. //
  372. // Addresses from stack traces may point to the next instruction after
  373. // each call. Optionally adjust by -1 to land somewhere on the actual
  374. // call (except for the leaf, which is not a call).
  375. func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
  376. locs := make(map[uint64]*Location)
  377. for len(b) > 0 {
  378. var count, nstk uint64
  379. count, b = parse(b)
  380. nstk, b = parse(b)
  381. if b == nil || nstk > uint64(len(b)/4) {
  382. return nil, nil, errUnrecognized
  383. }
  384. var sloc []*Location
  385. addrs := make([]uint64, nstk)
  386. for i := 0; i < int(nstk); i++ {
  387. addrs[i], b = parse(b)
  388. }
  389. if count == 0 && nstk == 1 && addrs[0] == 0 {
  390. // End of data marker
  391. break
  392. }
  393. for i, addr := range addrs {
  394. if adjust && i > 0 {
  395. addr--
  396. }
  397. loc := locs[addr]
  398. if loc == nil {
  399. loc = &Location{
  400. Address: addr,
  401. }
  402. locs[addr] = loc
  403. p.Location = append(p.Location, loc)
  404. }
  405. sloc = append(sloc, loc)
  406. }
  407. p.Sample = append(p.Sample,
  408. &Sample{
  409. Value: []int64{int64(count), int64(count) * p.Period},
  410. Location: sloc,
  411. })
  412. }
  413. // Reached the end without finding the EOD marker.
  414. return b, locs, nil
  415. }
  416. // parseHeap parses a heapz legacy or a growthz profile and
  417. // returns a newly populated Profile.
  418. func parseHeap(b []byte) (p *Profile, err error) {
  419. s := bufio.NewScanner(bytes.NewBuffer(b))
  420. if !s.Scan() {
  421. if err := s.Err(); err != nil {
  422. return nil, err
  423. }
  424. return nil, errUnrecognized
  425. }
  426. p = &Profile{}
  427. sampling := ""
  428. hasAlloc := false
  429. line := s.Text()
  430. p.PeriodType = &ValueType{Type: "space", Unit: "bytes"}
  431. if header := heapHeaderRE.FindStringSubmatch(line); header != nil {
  432. sampling, p.Period, hasAlloc, err = parseHeapHeader(line)
  433. if err != nil {
  434. return nil, err
  435. }
  436. } else if header = growthHeaderRE.FindStringSubmatch(line); header != nil {
  437. p.Period = 1
  438. } else if header = fragmentationHeaderRE.FindStringSubmatch(line); header != nil {
  439. p.Period = 1
  440. } else {
  441. return nil, errUnrecognized
  442. }
  443. if hasAlloc {
  444. // Put alloc before inuse so that default pprof selection
  445. // will prefer inuse_space.
  446. p.SampleType = []*ValueType{
  447. {Type: "alloc_objects", Unit: "count"},
  448. {Type: "alloc_space", Unit: "bytes"},
  449. {Type: "inuse_objects", Unit: "count"},
  450. {Type: "inuse_space", Unit: "bytes"},
  451. }
  452. } else {
  453. p.SampleType = []*ValueType{
  454. {Type: "objects", Unit: "count"},
  455. {Type: "space", Unit: "bytes"},
  456. }
  457. }
  458. locs := make(map[uint64]*Location)
  459. for s.Scan() {
  460. line := strings.TrimSpace(s.Text())
  461. if isSpaceOrComment(line) {
  462. continue
  463. }
  464. if isMemoryMapSentinel(line) {
  465. break
  466. }
  467. value, blocksize, addrs, err := parseHeapSample(line, p.Period, sampling, hasAlloc)
  468. if err != nil {
  469. return nil, err
  470. }
  471. var sloc []*Location
  472. for _, addr := range addrs {
  473. // Addresses from stack traces point to the next instruction after
  474. // each call. Adjust by -1 to land somewhere on the actual call.
  475. addr--
  476. loc := locs[addr]
  477. if locs[addr] == nil {
  478. loc = &Location{
  479. Address: addr,
  480. }
  481. p.Location = append(p.Location, loc)
  482. locs[addr] = loc
  483. }
  484. sloc = append(sloc, loc)
  485. }
  486. p.Sample = append(p.Sample, &Sample{
  487. Value: value,
  488. Location: sloc,
  489. NumLabel: map[string][]int64{"bytes": {blocksize}},
  490. })
  491. }
  492. if err := s.Err(); err != nil {
  493. return nil, err
  494. }
  495. if err := parseAdditionalSections(s, p); err != nil {
  496. return nil, err
  497. }
  498. return p, nil
  499. }
  500. func parseHeapHeader(line string) (sampling string, period int64, hasAlloc bool, err error) {
  501. header := heapHeaderRE.FindStringSubmatch(line)
  502. if header == nil {
  503. return "", 0, false, errUnrecognized
  504. }
  505. if len(header[6]) > 0 {
  506. if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
  507. return "", 0, false, errUnrecognized
  508. }
  509. }
  510. if (header[3] != header[1] && header[3] != "0") || (header[4] != header[2] && header[4] != "0") {
  511. hasAlloc = true
  512. }
  513. switch header[5] {
  514. case "heapz_v2", "heap_v2":
  515. return "v2", period, hasAlloc, nil
  516. case "heapprofile":
  517. return "", 1, hasAlloc, nil
  518. case "heap":
  519. return "v2", period / 2, hasAlloc, nil
  520. default:
  521. return "", 0, false, errUnrecognized
  522. }
  523. }
  524. // parseHeapSample parses a single row from a heap profile into a new Sample.
  525. func parseHeapSample(line string, rate int64, sampling string, includeAlloc bool) (value []int64, blocksize int64, addrs []uint64, err error) {
  526. sampleData := heapSampleRE.FindStringSubmatch(line)
  527. if len(sampleData) != 6 {
  528. return nil, 0, nil, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
  529. }
  530. // This is a local-scoped helper function to avoid needing to pass
  531. // around rate, sampling and many return parameters.
  532. addValues := func(countString, sizeString string, label string) error {
  533. count, err := strconv.ParseInt(countString, 10, 64)
  534. if err != nil {
  535. return fmt.Errorf("malformed sample: %s: %v", line, err)
  536. }
  537. size, err := strconv.ParseInt(sizeString, 10, 64)
  538. if err != nil {
  539. return fmt.Errorf("malformed sample: %s: %v", line, err)
  540. }
  541. if count == 0 && size != 0 {
  542. return fmt.Errorf("%s count was 0 but %s bytes was %d", label, label, size)
  543. }
  544. if count != 0 {
  545. blocksize = size / count
  546. if sampling == "v2" {
  547. count, size = scaleHeapSample(count, size, rate)
  548. }
  549. }
  550. value = append(value, count, size)
  551. return nil
  552. }
  553. if includeAlloc {
  554. if err := addValues(sampleData[3], sampleData[4], "allocation"); err != nil {
  555. return nil, 0, nil, err
  556. }
  557. }
  558. if err := addValues(sampleData[1], sampleData[2], "inuse"); err != nil {
  559. return nil, 0, nil, err
  560. }
  561. addrs = parseHexAddresses(sampleData[5])
  562. return value, blocksize, addrs, nil
  563. }
  564. // extractHexAddresses extracts hex numbers from a string and returns
  565. // them, together with their numeric value, in a slice.
  566. func extractHexAddresses(s string) ([]string, []uint64) {
  567. hexStrings := hexNumberRE.FindAllString(s, -1)
  568. var ids []uint64
  569. for _, s := range hexStrings {
  570. if id, err := strconv.ParseUint(s, 0, 64); err == nil {
  571. ids = append(ids, id)
  572. } else {
  573. // Do not expect any parsing failures due to the regexp matching.
  574. panic("failed to parse hex value:" + s)
  575. }
  576. }
  577. return hexStrings, ids
  578. }
  579. // parseHexAddresses parses hex numbers from a string and returns them
  580. // in a slice.
  581. func parseHexAddresses(s string) []uint64 {
  582. _, ids := extractHexAddresses(s)
  583. return ids
  584. }
  585. // scaleHeapSample adjusts the data from a heapz Sample to
  586. // account for its probability of appearing in the collected
  587. // data. heapz profiles are a sampling of the memory allocations
  588. // requests in a program. We estimate the unsampled value by dividing
  589. // each collected sample by its probability of appearing in the
  590. // profile. heapz v2 profiles rely on a poisson process to determine
  591. // which samples to collect, based on the desired average collection
  592. // rate R. The probability of a sample of size S to appear in that
  593. // profile is 1-exp(-S/R).
  594. func scaleHeapSample(count, size, rate int64) (int64, int64) {
  595. if count == 0 || size == 0 {
  596. return 0, 0
  597. }
  598. if rate <= 1 {
  599. // if rate==1 all samples were collected so no adjustment is needed.
  600. // if rate<1 treat as unknown and skip scaling.
  601. return count, size
  602. }
  603. avgSize := float64(size) / float64(count)
  604. scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
  605. return int64(float64(count) * scale), int64(float64(size) * scale)
  606. }
  607. // parseContention parses a contentionz profile and returns a newly
  608. // populated Profile.
  609. func parseContention(b []byte) (p *Profile, err error) {
  610. s := bufio.NewScanner(bytes.NewBuffer(b))
  611. if !s.Scan() {
  612. if err := s.Err(); err != nil {
  613. return nil, err
  614. }
  615. return nil, errUnrecognized
  616. }
  617. line := s.Text()
  618. if !strings.HasPrefix(line, "--- contention") {
  619. return nil, errUnrecognized
  620. }
  621. p = &Profile{
  622. PeriodType: &ValueType{Type: "contentions", Unit: "count"},
  623. Period: 1,
  624. SampleType: []*ValueType{
  625. {Type: "contentions", Unit: "count"},
  626. {Type: "delay", Unit: "nanoseconds"},
  627. },
  628. }
  629. var cpuHz int64
  630. // Parse text of the form "attribute = value" before the samples.
  631. const delimiter = "="
  632. for s.Scan() {
  633. line := s.Text()
  634. if line = strings.TrimSpace(line); line == "" {
  635. continue
  636. }
  637. if strings.HasPrefix(line, "---") {
  638. break
  639. }
  640. attr := strings.SplitN(line, delimiter, 2)
  641. if len(attr) != 2 {
  642. break
  643. }
  644. key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
  645. var err error
  646. switch key {
  647. case "cycles/second":
  648. if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
  649. return nil, errUnrecognized
  650. }
  651. case "sampling period":
  652. if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
  653. return nil, errUnrecognized
  654. }
  655. case "ms since reset":
  656. ms, err := strconv.ParseInt(val, 0, 64)
  657. if err != nil {
  658. return nil, errUnrecognized
  659. }
  660. p.DurationNanos = ms * 1000 * 1000
  661. case "format":
  662. // CPP contentionz profiles don't have format.
  663. return nil, errUnrecognized
  664. case "resolution":
  665. // CPP contentionz profiles don't have resolution.
  666. return nil, errUnrecognized
  667. case "discarded samples":
  668. default:
  669. return nil, errUnrecognized
  670. }
  671. }
  672. if err := s.Err(); err != nil {
  673. return nil, err
  674. }
  675. locs := make(map[uint64]*Location)
  676. for {
  677. line := strings.TrimSpace(s.Text())
  678. if strings.HasPrefix(line, "---") {
  679. break
  680. }
  681. value, addrs, err := parseContentionSample(line, p.Period, cpuHz)
  682. if err != nil {
  683. return nil, err
  684. }
  685. var sloc []*Location
  686. for _, addr := range addrs {
  687. // Addresses from stack traces point to the next instruction after
  688. // each call. Adjust by -1 to land somewhere on the actual call.
  689. addr--
  690. loc := locs[addr]
  691. if locs[addr] == nil {
  692. loc = &Location{
  693. Address: addr,
  694. }
  695. p.Location = append(p.Location, loc)
  696. locs[addr] = loc
  697. }
  698. sloc = append(sloc, loc)
  699. }
  700. p.Sample = append(p.Sample, &Sample{
  701. Value: value,
  702. Location: sloc,
  703. })
  704. if !s.Scan() {
  705. break
  706. }
  707. }
  708. if err := s.Err(); err != nil {
  709. return nil, err
  710. }
  711. if err = parseAdditionalSections(s, p); err != nil {
  712. return nil, err
  713. }
  714. return p, nil
  715. }
  716. // parseContentionSample parses a single row from a contention profile
  717. // into a new Sample.
  718. func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
  719. sampleData := contentionSampleRE.FindStringSubmatch(line)
  720. if sampleData == nil {
  721. return value, addrs, errUnrecognized
  722. }
  723. v1, err := strconv.ParseInt(sampleData[1], 10, 64)
  724. if err != nil {
  725. return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
  726. }
  727. v2, err := strconv.ParseInt(sampleData[2], 10, 64)
  728. if err != nil {
  729. return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
  730. }
  731. // Unsample values if period and cpuHz are available.
  732. // - Delays are scaled to cycles and then to nanoseconds.
  733. // - Contentions are scaled to cycles.
  734. if period > 0 {
  735. if cpuHz > 0 {
  736. cpuGHz := float64(cpuHz) / 1e9
  737. v1 = int64(float64(v1) * float64(period) / cpuGHz)
  738. }
  739. v2 = v2 * period
  740. }
  741. value = []int64{v2, v1}
  742. addrs = parseHexAddresses(sampleData[3])
  743. return value, addrs, nil
  744. }
  745. // parseThread parses a Threadz profile and returns a new Profile.
  746. func parseThread(b []byte) (*Profile, error) {
  747. s := bufio.NewScanner(bytes.NewBuffer(b))
  748. // Skip past comments and empty lines seeking a real header.
  749. for s.Scan() && isSpaceOrComment(s.Text()) {
  750. }
  751. line := s.Text()
  752. if m := threadzStartRE.FindStringSubmatch(line); m != nil {
  753. // Advance over initial comments until first stack trace.
  754. for s.Scan() {
  755. if line = s.Text(); isMemoryMapSentinel(line) || strings.HasPrefix(line, "-") {
  756. break
  757. }
  758. }
  759. } else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
  760. return nil, errUnrecognized
  761. }
  762. p := &Profile{
  763. SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
  764. PeriodType: &ValueType{Type: "thread", Unit: "count"},
  765. Period: 1,
  766. }
  767. locs := make(map[uint64]*Location)
  768. // Recognize each thread and populate profile samples.
  769. for !isMemoryMapSentinel(line) {
  770. if strings.HasPrefix(line, "---- no stack trace for") {
  771. line = ""
  772. break
  773. }
  774. if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
  775. return nil, errUnrecognized
  776. }
  777. var addrs []uint64
  778. var err error
  779. line, addrs, err = parseThreadSample(s)
  780. if err != nil {
  781. return nil, errUnrecognized
  782. }
  783. if len(addrs) == 0 {
  784. // We got a --same as previous threads--. Bump counters.
  785. if len(p.Sample) > 0 {
  786. s := p.Sample[len(p.Sample)-1]
  787. s.Value[0]++
  788. }
  789. continue
  790. }
  791. var sloc []*Location
  792. for i, addr := range addrs {
  793. // Addresses from stack traces point to the next instruction after
  794. // each call. Adjust by -1 to land somewhere on the actual call
  795. // (except for the leaf, which is not a call).
  796. if i > 0 {
  797. addr--
  798. }
  799. loc := locs[addr]
  800. if locs[addr] == nil {
  801. loc = &Location{
  802. Address: addr,
  803. }
  804. p.Location = append(p.Location, loc)
  805. locs[addr] = loc
  806. }
  807. sloc = append(sloc, loc)
  808. }
  809. p.Sample = append(p.Sample, &Sample{
  810. Value: []int64{1},
  811. Location: sloc,
  812. })
  813. }
  814. if err := parseAdditionalSections(s, p); err != nil {
  815. return nil, err
  816. }
  817. cleanupDuplicateLocations(p)
  818. return p, nil
  819. }
  820. // parseThreadSample parses a symbolized or unsymbolized stack trace.
  821. // Returns the first line after the traceback, the sample (or nil if
  822. // it hits a 'same-as-previous' marker) and an error.
  823. func parseThreadSample(s *bufio.Scanner) (nextl string, addrs []uint64, err error) {
  824. var line string
  825. sameAsPrevious := false
  826. for s.Scan() {
  827. line = strings.TrimSpace(s.Text())
  828. if line == "" {
  829. continue
  830. }
  831. if strings.HasPrefix(line, "---") {
  832. break
  833. }
  834. if strings.Contains(line, "same as previous thread") {
  835. sameAsPrevious = true
  836. continue
  837. }
  838. addrs = append(addrs, parseHexAddresses(line)...)
  839. }
  840. if err := s.Err(); err != nil {
  841. return "", nil, err
  842. }
  843. if sameAsPrevious {
  844. return line, nil, nil
  845. }
  846. return line, addrs, nil
  847. }
  848. // parseAdditionalSections parses any additional sections in the
  849. // profile, ignoring any unrecognized sections.
  850. func parseAdditionalSections(s *bufio.Scanner, p *Profile) error {
  851. for !isMemoryMapSentinel(s.Text()) && s.Scan() {
  852. }
  853. if err := s.Err(); err != nil {
  854. return err
  855. }
  856. return p.ParseMemoryMapFromScanner(s)
  857. }
  858. // ParseProcMaps parses a memory map in the format of /proc/self/maps.
  859. // ParseMemoryMap should be called after setting on a profile to
  860. // associate locations to the corresponding mapping based on their
  861. // address.
  862. func ParseProcMaps(rd io.Reader) ([]*Mapping, error) {
  863. s := bufio.NewScanner(rd)
  864. return parseProcMapsFromScanner(s)
  865. }
  866. func parseProcMapsFromScanner(s *bufio.Scanner) ([]*Mapping, error) {
  867. var mapping []*Mapping
  868. // If the memory-map sentinel is at column X, assume memory mappings
  869. // also start at X. This is useful to eliminate logging information.
  870. offset := memoryMapSentinelOffset(s.Text())
  871. var attrs []string
  872. var r *strings.Replacer
  873. const delimiter = "="
  874. for s.Scan() {
  875. line := s.Text()
  876. if len(line) > offset {
  877. line = line[offset:]
  878. }
  879. if line = strings.TrimSpace(line); line == "" {
  880. continue
  881. }
  882. if r != nil {
  883. line = r.Replace(line)
  884. }
  885. m, err := parseMappingEntry(line)
  886. if err != nil {
  887. if err == errUnrecognized {
  888. // Recognize assignments of the form: attr=value, and replace
  889. // $attr with value on subsequent mappings.
  890. if attr := strings.SplitN(line, delimiter, 2); len(attr) == 2 {
  891. attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
  892. r = strings.NewReplacer(attrs...)
  893. }
  894. // Ignore any unrecognized entries
  895. continue
  896. }
  897. return nil, err
  898. }
  899. if m == nil {
  900. continue
  901. }
  902. mapping = append(mapping, m)
  903. }
  904. if err := s.Err(); err != nil {
  905. return nil, err
  906. }
  907. return mapping, nil
  908. }
  909. // ParseMemoryMap parses a memory map in the format of
  910. // /proc/self/maps, and overrides the mappings in the current profile.
  911. // It renumbers the samples and locations in the profile correspondingly.
  912. func (p *Profile) ParseMemoryMap(rd io.Reader) error {
  913. return p.ParseMemoryMapFromScanner(bufio.NewScanner(rd))
  914. }
  915. // ParseMemoryMapFromScanner parses a memory map in the format of
  916. // /proc/self/maps or a variety of legacy format, and overrides the
  917. // mappings in the current profile. It renumbers the samples and
  918. // locations in the profile correspondingly.
  919. func (p *Profile) ParseMemoryMapFromScanner(s *bufio.Scanner) error {
  920. mapping, err := parseProcMapsFromScanner(s)
  921. if err != nil {
  922. return err
  923. }
  924. p.Mapping = append(p.Mapping, mapping...)
  925. p.massageMappings()
  926. p.remapLocationIDs()
  927. p.remapFunctionIDs()
  928. p.remapMappingIDs()
  929. return nil
  930. }
  931. func parseMappingEntry(l string) (*Mapping, error) {
  932. var start, end, perm, file, offset, buildID string
  933. if me := procMapsRE.FindStringSubmatch(l); len(me) == 6 {
  934. start, end, perm, offset, file = me[1], me[2], me[3], me[4], me[5]
  935. } else if me := briefMapsRE.FindStringSubmatch(l); len(me) == 7 {
  936. start, end, perm, file, offset, buildID = me[1], me[2], me[3], me[4], me[5], me[6]
  937. } else {
  938. return nil, errUnrecognized
  939. }
  940. var err error
  941. mapping := &Mapping{
  942. File: file,
  943. BuildID: buildID,
  944. }
  945. if perm != "" && !strings.Contains(perm, "x") {
  946. // Skip non-executable entries.
  947. return nil, nil
  948. }
  949. if mapping.Start, err = strconv.ParseUint(start, 16, 64); err != nil {
  950. return nil, errUnrecognized
  951. }
  952. if mapping.Limit, err = strconv.ParseUint(end, 16, 64); err != nil {
  953. return nil, errUnrecognized
  954. }
  955. if offset != "" {
  956. if mapping.Offset, err = strconv.ParseUint(offset, 16, 64); err != nil {
  957. return nil, errUnrecognized
  958. }
  959. }
  960. return mapping, nil
  961. }
  962. var memoryMapSentinels = []string{
  963. "--- Memory map: ---",
  964. "MAPPED_LIBRARIES:",
  965. }
  966. // isMemoryMapSentinel returns true if the string contains one of the
  967. // known sentinels for memory map information.
  968. func isMemoryMapSentinel(line string) bool {
  969. for _, s := range memoryMapSentinels {
  970. if strings.Contains(line, s) {
  971. return true
  972. }
  973. }
  974. return false
  975. }
  976. // memoryMapSentinelOffset returns the index of a known memory map
  977. // sentinel in the string. If the string does not contain a sentinel,
  978. // it returns 0.
  979. func memoryMapSentinelOffset(line string) int {
  980. for _, s := range memoryMapSentinels {
  981. if i := strings.Index(line, s); i != -1 {
  982. return i
  983. }
  984. }
  985. return 0
  986. }
  987. func (p *Profile) addLegacyFrameInfo() {
  988. switch {
  989. case isProfileType(p, heapzSampleTypes):
  990. p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
  991. case isProfileType(p, contentionzSampleTypes):
  992. p.DropFrames, p.KeepFrames = lockRxStr, ""
  993. default:
  994. p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
  995. }
  996. }
  997. var heapzSampleTypes = [][]string{
  998. {"allocations", "size"}, // early Go pprof profiles
  999. {"objects", "space"},
  1000. {"inuse_objects", "inuse_space"},
  1001. {"alloc_objects", "alloc_space"},
  1002. }
  1003. var contentionzSampleTypes = [][]string{
  1004. {"contentions", "delay"},
  1005. }
  1006. func isProfileType(p *Profile, types [][]string) bool {
  1007. st := p.SampleType
  1008. nextType:
  1009. for _, t := range types {
  1010. if len(st) != len(t) {
  1011. continue
  1012. }
  1013. for i := range st {
  1014. if st[i].Type != t[i] {
  1015. continue nextType
  1016. }
  1017. }
  1018. return true
  1019. }
  1020. return false
  1021. }
  1022. var allocRxStr = strings.Join([]string{
  1023. // POSIX entry points.
  1024. `calloc`,
  1025. `cfree`,
  1026. `malloc`,
  1027. `free`,
  1028. `memalign`,
  1029. `do_memalign`,
  1030. `(__)?posix_memalign`,
  1031. `pvalloc`,
  1032. `valloc`,
  1033. `realloc`,
  1034. // TC malloc.
  1035. `tcmalloc::.*`,
  1036. `tc_calloc`,
  1037. `tc_cfree`,
  1038. `tc_malloc`,
  1039. `tc_free`,
  1040. `tc_memalign`,
  1041. `tc_posix_memalign`,
  1042. `tc_pvalloc`,
  1043. `tc_valloc`,
  1044. `tc_realloc`,
  1045. `tc_new`,
  1046. `tc_delete`,
  1047. `tc_newarray`,
  1048. `tc_deletearray`,
  1049. `tc_new_nothrow`,
  1050. `tc_newarray_nothrow`,
  1051. // Memory-allocation routines on OS X.
  1052. `malloc_zone_malloc`,
  1053. `malloc_zone_calloc`,
  1054. `malloc_zone_valloc`,
  1055. `malloc_zone_realloc`,
  1056. `malloc_zone_memalign`,
  1057. `malloc_zone_free`,
  1058. // Go runtime
  1059. `runtime\..*`,
  1060. // Other misc. memory allocation routines
  1061. `BaseArena::.*`,
  1062. `(::)?do_malloc_no_errno`,
  1063. `(::)?do_malloc_pages`,
  1064. `(::)?do_malloc`,
  1065. `DoSampledAllocation`,
  1066. `MallocedMemBlock::MallocedMemBlock`,
  1067. `_M_allocate`,
  1068. `__builtin_(vec_)?delete`,
  1069. `__builtin_(vec_)?new`,
  1070. `__gnu_cxx::new_allocator::allocate`,
  1071. `__libc_malloc`,
  1072. `__malloc_alloc_template::allocate`,
  1073. `allocate`,
  1074. `cpp_alloc`,
  1075. `operator new(\[\])?`,
  1076. `simple_alloc::allocate`,
  1077. }, `|`)
  1078. var allocSkipRxStr = strings.Join([]string{
  1079. // Preserve Go runtime frames that appear in the middle/bottom of
  1080. // the stack.
  1081. `runtime\.panic`,
  1082. `runtime\.reflectcall`,
  1083. `runtime\.call[0-9]*`,
  1084. }, `|`)
  1085. var cpuProfilerRxStr = strings.Join([]string{
  1086. `ProfileData::Add`,
  1087. `ProfileData::prof_handler`,
  1088. `CpuProfiler::prof_handler`,
  1089. `__pthread_sighandler`,
  1090. `__restore`,
  1091. }, `|`)
  1092. var lockRxStr = strings.Join([]string{
  1093. `RecordLockProfileData`,
  1094. `(base::)?RecordLockProfileData.*`,
  1095. `(base::)?SubmitMutexProfileData.*`,
  1096. `(base::)?SubmitSpinLockProfileData.*`,
  1097. `(base::Mutex::)?AwaitCommon.*`,
  1098. `(base::Mutex::)?Unlock.*`,
  1099. `(base::Mutex::)?UnlockSlow.*`,
  1100. `(base::Mutex::)?ReaderUnlock.*`,
  1101. `(base::MutexLock::)?~MutexLock.*`,
  1102. `(Mutex::)?AwaitCommon.*`,
  1103. `(Mutex::)?Unlock.*`,
  1104. `(Mutex::)?UnlockSlow.*`,
  1105. `(Mutex::)?ReaderUnlock.*`,
  1106. `(MutexLock::)?~MutexLock.*`,
  1107. `(SpinLock::)?Unlock.*`,
  1108. `(SpinLock::)?SlowUnlock.*`,
  1109. `(SpinLockHolder::)?~SpinLockHolder.*`,
  1110. }, `|`)