Ingen beskrivning

legacy_profile.go 31KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // This file implements parsers to convert legacy profiles into the
  15. // profile.proto format.
  16. package profile
  17. import (
  18. "bufio"
  19. "bytes"
  20. "fmt"
  21. "io"
  22. "math"
  23. "regexp"
  24. "strconv"
  25. "strings"
  26. )
  27. var (
  28. countStartRE = regexp.MustCompile(`\A(\w+) profile: total \d+\z`)
  29. countRE = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\z`)
  30. heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
  31. heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
  32. contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
  33. hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
  34. growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz?`)
  35. fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz?`)
  36. threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
  37. threadStartRE = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
  38. procMapsRE = regexp.MustCompile(`([[:xdigit:]]+)-([[:xdigit:]]+)\s+([-rwxp]+)\s+([[:xdigit:]]+)\s+([[:xdigit:]]+):([[:xdigit:]]+)\s+([[:digit:]]+)\s*(\S+)?`)
  39. briefMapsRE = regexp.MustCompile(`\s*([[:xdigit:]]+)-([[:xdigit:]]+):\s*(\S+)(\s.*@)?([[:xdigit:]]+)?`)
  40. )
  41. func isSpaceOrComment(line string) bool {
  42. trimmed := strings.TrimSpace(line)
  43. return len(trimmed) == 0 || trimmed[0] == '#'
  44. }
  45. // parseGoCount parses a Go count profile (e.g., threadcreate or
  46. // goroutine) and returns a new Profile.
  47. func parseGoCount(b []byte) (*Profile, error) {
  48. s := bufio.NewScanner(bytes.NewBuffer(b))
  49. // Skip comments at the beginning of the file.
  50. for s.Scan() && isSpaceOrComment(s.Text()) {
  51. }
  52. if err := s.Err(); err != nil {
  53. return nil, err
  54. }
  55. m := countStartRE.FindStringSubmatch(s.Text())
  56. if m == nil {
  57. return nil, errUnrecognized
  58. }
  59. profileType := m[1]
  60. p := &Profile{
  61. PeriodType: &ValueType{Type: profileType, Unit: "count"},
  62. Period: 1,
  63. SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
  64. }
  65. locations := make(map[uint64]*Location)
  66. for s.Scan() {
  67. line := s.Text()
  68. if isSpaceOrComment(line) {
  69. continue
  70. }
  71. if strings.HasPrefix(line, "---") {
  72. break
  73. }
  74. m := countRE.FindStringSubmatch(line)
  75. if m == nil {
  76. return nil, errMalformed
  77. }
  78. n, err := strconv.ParseInt(m[1], 0, 64)
  79. if err != nil {
  80. return nil, errMalformed
  81. }
  82. fields := strings.Fields(m[2])
  83. locs := make([]*Location, 0, len(fields))
  84. for _, stk := range fields {
  85. addr, err := strconv.ParseUint(stk, 0, 64)
  86. if err != nil {
  87. return nil, errMalformed
  88. }
  89. // Adjust all frames by -1 to land on top of the call instruction.
  90. addr--
  91. loc := locations[addr]
  92. if loc == nil {
  93. loc = &Location{
  94. Address: addr,
  95. }
  96. locations[addr] = loc
  97. p.Location = append(p.Location, loc)
  98. }
  99. locs = append(locs, loc)
  100. }
  101. p.Sample = append(p.Sample, &Sample{
  102. Location: locs,
  103. Value: []int64{n},
  104. })
  105. }
  106. if err := s.Err(); err != nil {
  107. return nil, err
  108. }
  109. if err := parseAdditionalSections(s, p); err != nil {
  110. return nil, err
  111. }
  112. return p, nil
  113. }
  114. // remapLocationIDs ensures there is a location for each address
  115. // referenced by a sample, and remaps the samples to point to the new
  116. // location ids.
  117. func (p *Profile) remapLocationIDs() {
  118. seen := make(map[*Location]bool, len(p.Location))
  119. var locs []*Location
  120. for _, s := range p.Sample {
  121. for _, l := range s.Location {
  122. if seen[l] {
  123. continue
  124. }
  125. l.ID = uint64(len(locs) + 1)
  126. locs = append(locs, l)
  127. seen[l] = true
  128. }
  129. }
  130. p.Location = locs
  131. }
  132. func (p *Profile) remapFunctionIDs() {
  133. seen := make(map[*Function]bool, len(p.Function))
  134. var fns []*Function
  135. for _, l := range p.Location {
  136. for _, ln := range l.Line {
  137. fn := ln.Function
  138. if fn == nil || seen[fn] {
  139. continue
  140. }
  141. fn.ID = uint64(len(fns) + 1)
  142. fns = append(fns, fn)
  143. seen[fn] = true
  144. }
  145. }
  146. p.Function = fns
  147. }
  148. // remapMappingIDs matches location addresses with existing mappings
  149. // and updates them appropriately. This is O(N*M), if this ever shows
  150. // up as a bottleneck, evaluate sorting the mappings and doing a
  151. // binary search, which would make it O(N*log(M)).
  152. func (p *Profile) remapMappingIDs() {
  153. // Some profile handlers will incorrectly set regions for the main
  154. // executable if its section is remapped. Fix them through heuristics.
  155. if len(p.Mapping) > 0 {
  156. // Remove the initial mapping if named '/anon_hugepage' and has a
  157. // consecutive adjacent mapping.
  158. if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
  159. if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
  160. p.Mapping = p.Mapping[1:]
  161. }
  162. }
  163. }
  164. // Subtract the offset from the start of the main mapping if it
  165. // ends up at a recognizable start address.
  166. if len(p.Mapping) > 0 {
  167. const expectedStart = 0x400000
  168. if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
  169. m.Start = expectedStart
  170. m.Offset = 0
  171. }
  172. }
  173. // Associate each location with an address to the corresponding
  174. // mapping. Create fake mapping if a suitable one isn't found.
  175. var fake *Mapping
  176. nextLocation:
  177. for _, l := range p.Location {
  178. a := l.Address
  179. if l.Mapping != nil || a == 0 {
  180. continue
  181. }
  182. for _, m := range p.Mapping {
  183. if m.Start <= a && a < m.Limit {
  184. l.Mapping = m
  185. continue nextLocation
  186. }
  187. }
  188. // Work around legacy handlers failing to encode the first
  189. // part of mappings split into adjacent ranges.
  190. for _, m := range p.Mapping {
  191. if m.Offset != 0 && m.Start-m.Offset <= a && a < m.Start {
  192. m.Start -= m.Offset
  193. m.Offset = 0
  194. l.Mapping = m
  195. continue nextLocation
  196. }
  197. }
  198. // If there is still no mapping, create a fake one.
  199. // This is important for the Go legacy handler, which produced
  200. // no mappings.
  201. if fake == nil {
  202. fake = &Mapping{
  203. ID: 1,
  204. Limit: ^uint64(0),
  205. }
  206. p.Mapping = append(p.Mapping, fake)
  207. }
  208. l.Mapping = fake
  209. }
  210. // Reset all mapping IDs.
  211. for i, m := range p.Mapping {
  212. m.ID = uint64(i + 1)
  213. }
  214. }
  215. var cpuInts = []func([]byte) (uint64, []byte){
  216. get32l,
  217. get32b,
  218. get64l,
  219. get64b,
  220. }
  221. func get32l(b []byte) (uint64, []byte) {
  222. if len(b) < 4 {
  223. return 0, nil
  224. }
  225. return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
  226. }
  227. func get32b(b []byte) (uint64, []byte) {
  228. if len(b) < 4 {
  229. return 0, nil
  230. }
  231. return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
  232. }
  233. func get64l(b []byte) (uint64, []byte) {
  234. if len(b) < 8 {
  235. return 0, nil
  236. }
  237. return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
  238. }
  239. func get64b(b []byte) (uint64, []byte) {
  240. if len(b) < 8 {
  241. return 0, nil
  242. }
  243. return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
  244. }
  245. // parseCPU parses a profilez legacy profile and returns a newly
  246. // populated Profile.
  247. //
  248. // The general format for profilez samples is a sequence of words in
  249. // binary format. The first words are a header with the following data:
  250. // 1st word -- 0
  251. // 2nd word -- 3
  252. // 3rd word -- 0 if a c++ application, 1 if a java application.
  253. // 4th word -- Sampling period (in microseconds).
  254. // 5th word -- Padding.
  255. func parseCPU(b []byte) (*Profile, error) {
  256. var parse func([]byte) (uint64, []byte)
  257. var n1, n2, n3, n4, n5 uint64
  258. for _, parse = range cpuInts {
  259. var tmp []byte
  260. n1, tmp = parse(b)
  261. n2, tmp = parse(tmp)
  262. n3, tmp = parse(tmp)
  263. n4, tmp = parse(tmp)
  264. n5, tmp = parse(tmp)
  265. if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
  266. b = tmp
  267. return cpuProfile(b, int64(n4), parse)
  268. }
  269. if tmp != nil && n1 == 0 && n2 == 3 && n3 == 1 && n4 > 0 && n5 == 0 {
  270. b = tmp
  271. return javaCPUProfile(b, int64(n4), parse)
  272. }
  273. }
  274. return nil, errUnrecognized
  275. }
  276. // cpuProfile returns a new Profile from C++ profilez data.
  277. // b is the profile bytes after the header, period is the profiling
  278. // period, and parse is a function to parse 8-byte chunks from the
  279. // profile in its native endianness.
  280. func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
  281. p := &Profile{
  282. Period: period * 1000,
  283. PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
  284. SampleType: []*ValueType{
  285. {Type: "samples", Unit: "count"},
  286. {Type: "cpu", Unit: "nanoseconds"},
  287. },
  288. }
  289. var err error
  290. if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
  291. return nil, err
  292. }
  293. // If *most* samples have the same second-to-the-bottom frame, it
  294. // strongly suggests that it is an uninteresting artifact of
  295. // measurement -- a stack frame pushed by the signal handler. The
  296. // bottom frame is always correct as it is picked up from the signal
  297. // structure, not the stack. Check if this is the case and if so,
  298. // remove.
  299. // Remove up to two frames.
  300. maxiter := 2
  301. // Allow one different sample for this many samples with the same
  302. // second-to-last frame.
  303. similarSamples := 32
  304. margin := len(p.Sample) / similarSamples
  305. for iter := 0; iter < maxiter; iter++ {
  306. addr1 := make(map[uint64]int)
  307. for _, s := range p.Sample {
  308. if len(s.Location) > 1 {
  309. a := s.Location[1].Address
  310. addr1[a] = addr1[a] + 1
  311. }
  312. }
  313. for id1, count := range addr1 {
  314. if count >= len(p.Sample)-margin {
  315. // Found uninteresting frame, strip it out from all samples
  316. for _, s := range p.Sample {
  317. if len(s.Location) > 1 && s.Location[1].Address == id1 {
  318. s.Location = append(s.Location[:1], s.Location[2:]...)
  319. }
  320. }
  321. break
  322. }
  323. }
  324. }
  325. if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
  326. return nil, err
  327. }
  328. cleanupDuplicateLocations(p)
  329. return p, nil
  330. }
  331. func cleanupDuplicateLocations(p *Profile) {
  332. // The profile handler may duplicate the leaf frame, because it gets
  333. // its address both from stack unwinding and from the signal
  334. // context. Detect this and delete the duplicate, which has been
  335. // adjusted by -1. The leaf address should not be adjusted as it is
  336. // not a call.
  337. for _, s := range p.Sample {
  338. if len(s.Location) > 1 && s.Location[0].Address == s.Location[1].Address+1 {
  339. s.Location = append(s.Location[:1], s.Location[2:]...)
  340. }
  341. }
  342. }
  343. // parseCPUSamples parses a collection of profilez samples from a
  344. // profile.
  345. //
  346. // profilez samples are a repeated sequence of stack frames of the
  347. // form:
  348. // 1st word -- The number of times this stack was encountered.
  349. // 2nd word -- The size of the stack (StackSize).
  350. // 3rd word -- The first address on the stack.
  351. // ...
  352. // StackSize + 2 -- The last address on the stack
  353. // The last stack trace is of the form:
  354. // 1st word -- 0
  355. // 2nd word -- 1
  356. // 3rd word -- 0
  357. //
  358. // Addresses from stack traces may point to the next instruction after
  359. // each call. Optionally adjust by -1 to land somewhere on the actual
  360. // call (except for the leaf, which is not a call).
  361. func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
  362. locs := make(map[uint64]*Location)
  363. for len(b) > 0 {
  364. var count, nstk uint64
  365. count, b = parse(b)
  366. nstk, b = parse(b)
  367. if b == nil || nstk > uint64(len(b)/4) {
  368. return nil, nil, errUnrecognized
  369. }
  370. var sloc []*Location
  371. addrs := make([]uint64, nstk)
  372. for i := 0; i < int(nstk); i++ {
  373. addrs[i], b = parse(b)
  374. }
  375. if count == 0 && nstk == 1 && addrs[0] == 0 {
  376. // End of data marker
  377. break
  378. }
  379. for i, addr := range addrs {
  380. if adjust && i > 0 {
  381. addr--
  382. }
  383. loc := locs[addr]
  384. if loc == nil {
  385. loc = &Location{
  386. Address: addr,
  387. }
  388. locs[addr] = loc
  389. p.Location = append(p.Location, loc)
  390. }
  391. sloc = append(sloc, loc)
  392. }
  393. p.Sample = append(p.Sample,
  394. &Sample{
  395. Value: []int64{int64(count), int64(count) * p.Period},
  396. Location: sloc,
  397. })
  398. }
  399. // Reached the end without finding the EOD marker.
  400. return b, locs, nil
  401. }
  402. // parseHeap parses a heapz legacy or a growthz profile and
  403. // returns a newly populated Profile.
  404. func parseHeap(b []byte) (p *Profile, err error) {
  405. s := bufio.NewScanner(bytes.NewBuffer(b))
  406. if !s.Scan() {
  407. if err := s.Err(); err != nil {
  408. return nil, err
  409. }
  410. return nil, errUnrecognized
  411. }
  412. p = &Profile{}
  413. sampling := ""
  414. hasAlloc := false
  415. line := s.Text()
  416. p.PeriodType = &ValueType{Type: "space", Unit: "bytes"}
  417. if header := heapHeaderRE.FindStringSubmatch(line); header != nil {
  418. sampling, p.Period, hasAlloc, err = parseHeapHeader(line)
  419. if err != nil {
  420. return nil, err
  421. }
  422. } else if header = growthHeaderRE.FindStringSubmatch(line); header != nil {
  423. p.Period = 1
  424. } else if header = fragmentationHeaderRE.FindStringSubmatch(line); header != nil {
  425. p.Period = 1
  426. } else {
  427. return nil, errUnrecognized
  428. }
  429. if hasAlloc {
  430. // Put alloc before inuse so that default pprof selection
  431. // will prefer inuse_space.
  432. p.SampleType = []*ValueType{
  433. {Type: "alloc_objects", Unit: "count"},
  434. {Type: "alloc_space", Unit: "bytes"},
  435. {Type: "inuse_objects", Unit: "count"},
  436. {Type: "inuse_space", Unit: "bytes"},
  437. }
  438. } else {
  439. p.SampleType = []*ValueType{
  440. {Type: "objects", Unit: "count"},
  441. {Type: "space", Unit: "bytes"},
  442. }
  443. }
  444. locs := make(map[uint64]*Location)
  445. for s.Scan() {
  446. line := strings.TrimSpace(s.Text())
  447. if isSpaceOrComment(line) {
  448. continue
  449. }
  450. if sectionTrigger(line) != unrecognizedSection {
  451. break
  452. }
  453. value, blocksize, addrs, err := parseHeapSample(line, p.Period, sampling, hasAlloc)
  454. if err != nil {
  455. return nil, err
  456. }
  457. var sloc []*Location
  458. for _, addr := range addrs {
  459. // Addresses from stack traces point to the next instruction after
  460. // each call. Adjust by -1 to land somewhere on the actual call.
  461. addr--
  462. loc := locs[addr]
  463. if locs[addr] == nil {
  464. loc = &Location{
  465. Address: addr,
  466. }
  467. p.Location = append(p.Location, loc)
  468. locs[addr] = loc
  469. }
  470. sloc = append(sloc, loc)
  471. }
  472. p.Sample = append(p.Sample, &Sample{
  473. Value: value,
  474. Location: sloc,
  475. NumLabel: map[string][]int64{"bytes": {blocksize}},
  476. })
  477. }
  478. if err := s.Err(); err != nil {
  479. return nil, err
  480. }
  481. if err := parseAdditionalSections(s, p); err != nil {
  482. return nil, err
  483. }
  484. return p, nil
  485. }
  486. func parseHeapHeader(line string) (sampling string, period int64, hasAlloc bool, err error) {
  487. header := heapHeaderRE.FindStringSubmatch(line)
  488. if header == nil {
  489. return "", 0, false, errUnrecognized
  490. }
  491. if len(header[6]) > 0 {
  492. if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
  493. return "", 0, false, errUnrecognized
  494. }
  495. }
  496. if (header[3] != header[1] && header[3] != "0") || (header[4] != header[2] && header[4] != "0") {
  497. hasAlloc = true
  498. }
  499. switch header[5] {
  500. case "heapz_v2", "heap_v2":
  501. return "v2", period, hasAlloc, nil
  502. case "heapprofile":
  503. return "", 1, hasAlloc, nil
  504. case "heap":
  505. return "v2", period / 2, hasAlloc, nil
  506. default:
  507. return "", 0, false, errUnrecognized
  508. }
  509. }
  510. // parseHeapSample parses a single row from a heap profile into a new Sample.
  511. func parseHeapSample(line string, rate int64, sampling string, includeAlloc bool) (value []int64, blocksize int64, addrs []uint64, err error) {
  512. sampleData := heapSampleRE.FindStringSubmatch(line)
  513. if len(sampleData) != 6 {
  514. return nil, 0, nil, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
  515. }
  516. // This is a local-scoped helper function to avoid needing to pass
  517. // around rate, sampling and many return parameters.
  518. addValues := func(countString, sizeString string, label string) error {
  519. count, err := strconv.ParseInt(countString, 10, 64)
  520. if err != nil {
  521. return fmt.Errorf("malformed sample: %s: %v", line, err)
  522. }
  523. size, err := strconv.ParseInt(sizeString, 10, 64)
  524. if err != nil {
  525. return fmt.Errorf("malformed sample: %s: %v", line, err)
  526. }
  527. if count == 0 && size != 0 {
  528. return fmt.Errorf("%s count was 0 but %s bytes was %d", label, label, size)
  529. }
  530. if count != 0 {
  531. blocksize = size / count
  532. if sampling == "v2" {
  533. count, size = scaleHeapSample(count, size, rate)
  534. }
  535. }
  536. value = append(value, count, size)
  537. return nil
  538. }
  539. if includeAlloc {
  540. if err := addValues(sampleData[3], sampleData[4], "allocation"); err != nil {
  541. return nil, 0, nil, err
  542. }
  543. }
  544. if err := addValues(sampleData[1], sampleData[2], "inuse"); err != nil {
  545. return nil, 0, nil, err
  546. }
  547. addrs = parseHexAddresses(sampleData[5])
  548. return value, blocksize, addrs, nil
  549. }
  550. // extractHexAddresses extracts hex numbers from a string and returns
  551. // them, together with their numeric value, in a slice.
  552. func extractHexAddresses(s string) ([]string, []uint64) {
  553. hexStrings := hexNumberRE.FindAllString(s, -1)
  554. var ids []uint64
  555. for _, s := range hexStrings {
  556. if id, err := strconv.ParseUint(s, 0, 64); err == nil {
  557. ids = append(ids, id)
  558. } else {
  559. // Do not expect any parsing failures due to the regexp matching.
  560. panic("failed to parse hex value:" + s)
  561. }
  562. }
  563. return hexStrings, ids
  564. }
  565. // parseHexAddresses parses hex numbers from a string and returns them
  566. // in a slice.
  567. func parseHexAddresses(s string) []uint64 {
  568. _, ids := extractHexAddresses(s)
  569. return ids
  570. }
  571. // scaleHeapSample adjusts the data from a heapz Sample to
  572. // account for its probability of appearing in the collected
  573. // data. heapz profiles are a sampling of the memory allocations
  574. // requests in a program. We estimate the unsampled value by dividing
  575. // each collected sample by its probability of appearing in the
  576. // profile. heapz v2 profiles rely on a poisson process to determine
  577. // which samples to collect, based on the desired average collection
  578. // rate R. The probability of a sample of size S to appear in that
  579. // profile is 1-exp(-S/R).
  580. func scaleHeapSample(count, size, rate int64) (int64, int64) {
  581. if count == 0 || size == 0 {
  582. return 0, 0
  583. }
  584. if rate <= 1 {
  585. // if rate==1 all samples were collected so no adjustment is needed.
  586. // if rate<1 treat as unknown and skip scaling.
  587. return count, size
  588. }
  589. avgSize := float64(size) / float64(count)
  590. scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
  591. return int64(float64(count) * scale), int64(float64(size) * scale)
  592. }
  593. // parseContention parses a contentionz profile and returns a newly
  594. // populated Profile.
  595. func parseContention(b []byte) (p *Profile, err error) {
  596. s := bufio.NewScanner(bytes.NewBuffer(b))
  597. if !s.Scan() {
  598. if err := s.Err(); err != nil {
  599. return nil, err
  600. }
  601. return nil, errUnrecognized
  602. }
  603. line := s.Text()
  604. if !strings.HasPrefix(line, "--- contention") {
  605. return nil, errUnrecognized
  606. }
  607. p = &Profile{
  608. PeriodType: &ValueType{Type: "contentions", Unit: "count"},
  609. Period: 1,
  610. SampleType: []*ValueType{
  611. {Type: "contentions", Unit: "count"},
  612. {Type: "delay", Unit: "nanoseconds"},
  613. },
  614. }
  615. var cpuHz int64
  616. // Parse text of the form "attribute = value" before the samples.
  617. const delimiter = "="
  618. for s.Scan() {
  619. line := s.Text()
  620. if line = strings.TrimSpace(line); line == "" {
  621. continue
  622. }
  623. if strings.HasPrefix(line, "---") {
  624. break
  625. }
  626. attr := strings.SplitN(line, delimiter, 2)
  627. if len(attr) != 2 {
  628. break
  629. }
  630. key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
  631. var err error
  632. switch key {
  633. case "cycles/second":
  634. if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
  635. return nil, errUnrecognized
  636. }
  637. case "sampling period":
  638. if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
  639. return nil, errUnrecognized
  640. }
  641. case "ms since reset":
  642. ms, err := strconv.ParseInt(val, 0, 64)
  643. if err != nil {
  644. return nil, errUnrecognized
  645. }
  646. p.DurationNanos = ms * 1000 * 1000
  647. case "format":
  648. // CPP contentionz profiles don't have format.
  649. return nil, errUnrecognized
  650. case "resolution":
  651. // CPP contentionz profiles don't have resolution.
  652. return nil, errUnrecognized
  653. case "discarded samples":
  654. default:
  655. return nil, errUnrecognized
  656. }
  657. }
  658. if err := s.Err(); err != nil {
  659. return nil, err
  660. }
  661. locs := make(map[uint64]*Location)
  662. for {
  663. line := strings.TrimSpace(s.Text())
  664. if strings.HasPrefix(line, "---") {
  665. break
  666. }
  667. value, addrs, err := parseContentionSample(line, p.Period, cpuHz)
  668. if err != nil {
  669. return nil, err
  670. }
  671. var sloc []*Location
  672. for _, addr := range addrs {
  673. // Addresses from stack traces point to the next instruction after
  674. // each call. Adjust by -1 to land somewhere on the actual call.
  675. addr--
  676. loc := locs[addr]
  677. if locs[addr] == nil {
  678. loc = &Location{
  679. Address: addr,
  680. }
  681. p.Location = append(p.Location, loc)
  682. locs[addr] = loc
  683. }
  684. sloc = append(sloc, loc)
  685. }
  686. p.Sample = append(p.Sample, &Sample{
  687. Value: value,
  688. Location: sloc,
  689. })
  690. if !s.Scan() {
  691. break
  692. }
  693. }
  694. if err := s.Err(); err != nil {
  695. return nil, err
  696. }
  697. if err = parseAdditionalSections(s, p); err != nil {
  698. return nil, err
  699. }
  700. return p, nil
  701. }
  702. // parseContentionSample parses a single row from a contention profile
  703. // into a new Sample.
  704. func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
  705. sampleData := contentionSampleRE.FindStringSubmatch(line)
  706. if sampleData == nil {
  707. return value, addrs, errUnrecognized
  708. }
  709. v1, err := strconv.ParseInt(sampleData[1], 10, 64)
  710. if err != nil {
  711. return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
  712. }
  713. v2, err := strconv.ParseInt(sampleData[2], 10, 64)
  714. if err != nil {
  715. return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
  716. }
  717. // Unsample values if period and cpuHz are available.
  718. // - Delays are scaled to cycles and then to nanoseconds.
  719. // - Contentions are scaled to cycles.
  720. if period > 0 {
  721. if cpuHz > 0 {
  722. cpuGHz := float64(cpuHz) / 1e9
  723. v1 = int64(float64(v1) * float64(period) / cpuGHz)
  724. }
  725. v2 = v2 * period
  726. }
  727. value = []int64{v2, v1}
  728. addrs = parseHexAddresses(sampleData[3])
  729. return value, addrs, nil
  730. }
  731. // parseThread parses a Threadz profile and returns a new Profile.
  732. func parseThread(b []byte) (*Profile, error) {
  733. s := bufio.NewScanner(bytes.NewBuffer(b))
  734. // Skip past comments and empty lines seeking a real header.
  735. for s.Scan() && isSpaceOrComment(s.Text()) {
  736. }
  737. line := s.Text()
  738. if m := threadzStartRE.FindStringSubmatch(line); m != nil {
  739. // Advance over initial comments until first stack trace.
  740. for s.Scan() {
  741. if line = s.Text(); sectionTrigger(line) != unrecognizedSection || strings.HasPrefix(line, "-") {
  742. break
  743. }
  744. }
  745. } else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
  746. return nil, errUnrecognized
  747. }
  748. p := &Profile{
  749. SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
  750. PeriodType: &ValueType{Type: "thread", Unit: "count"},
  751. Period: 1,
  752. }
  753. locs := make(map[uint64]*Location)
  754. // Recognize each thread and populate profile samples.
  755. for sectionTrigger(line) == unrecognizedSection {
  756. if strings.HasPrefix(line, "---- no stack trace for") {
  757. line = ""
  758. break
  759. }
  760. if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
  761. return nil, errUnrecognized
  762. }
  763. var addrs []uint64
  764. var err error
  765. line, addrs, err = parseThreadSample(s)
  766. if err != nil {
  767. return nil, errUnrecognized
  768. }
  769. if len(addrs) == 0 {
  770. // We got a --same as previous threads--. Bump counters.
  771. if len(p.Sample) > 0 {
  772. s := p.Sample[len(p.Sample)-1]
  773. s.Value[0]++
  774. }
  775. continue
  776. }
  777. var sloc []*Location
  778. for i, addr := range addrs {
  779. // Addresses from stack traces point to the next instruction after
  780. // each call. Adjust by -1 to land somewhere on the actual call
  781. // (except for the leaf, which is not a call).
  782. if i > 0 {
  783. addr--
  784. }
  785. loc := locs[addr]
  786. if locs[addr] == nil {
  787. loc = &Location{
  788. Address: addr,
  789. }
  790. p.Location = append(p.Location, loc)
  791. locs[addr] = loc
  792. }
  793. sloc = append(sloc, loc)
  794. }
  795. p.Sample = append(p.Sample, &Sample{
  796. Value: []int64{1},
  797. Location: sloc,
  798. })
  799. }
  800. if err := parseAdditionalSections(s, p); err != nil {
  801. return nil, err
  802. }
  803. cleanupDuplicateLocations(p)
  804. return p, nil
  805. }
  806. // parseThreadSample parses a symbolized or unsymbolized stack trace.
  807. // Returns the first line after the traceback, the sample (or nil if
  808. // it hits a 'same-as-previous' marker) and an error.
  809. func parseThreadSample(s *bufio.Scanner) (nextl string, addrs []uint64, err error) {
  810. var line string
  811. sameAsPrevious := false
  812. for s.Scan() {
  813. line = strings.TrimSpace(s.Text())
  814. if line == "" {
  815. continue
  816. }
  817. if strings.HasPrefix(line, "---") {
  818. break
  819. }
  820. if strings.Contains(line, "same as previous thread") {
  821. sameAsPrevious = true
  822. continue
  823. }
  824. addrs = append(addrs, parseHexAddresses(line)...)
  825. }
  826. if err := s.Err(); err != nil {
  827. return "", nil, err
  828. }
  829. if sameAsPrevious {
  830. return line, nil, nil
  831. }
  832. return line, addrs, nil
  833. }
  834. // parseAdditionalSections parses any additional sections in the
  835. // profile, ignoring any unrecognized sections.
  836. func parseAdditionalSections(s *bufio.Scanner, p *Profile) error {
  837. for sectionTrigger(s.Text()) != memoryMapSection && s.Scan() {
  838. }
  839. if err := s.Err(); err != nil {
  840. return err
  841. }
  842. return p.parseMemoryMapFromScanner(s)
  843. }
  844. // ParseProcMaps parses a memory map in the format of /proc/self/maps.
  845. // ParseMemoryMap should be called after setting on a profile to
  846. // associate locations to the corresponding mapping based on their
  847. // address.
  848. func ParseProcMaps(rd io.Reader) ([]*Mapping, error) {
  849. s := bufio.NewScanner(rd)
  850. return parseProcMapsFromScanner(s)
  851. }
  852. func parseProcMapsFromScanner(s *bufio.Scanner) ([]*Mapping, error) {
  853. var mapping []*Mapping
  854. var attrs []string
  855. var r *strings.Replacer
  856. const delimiter = "="
  857. for s.Scan() {
  858. line := strings.TrimSpace(s.Text())
  859. if line == "" {
  860. continue
  861. }
  862. if r != nil {
  863. line = r.Replace(line)
  864. }
  865. m, err := parseMappingEntry(line)
  866. if err != nil {
  867. if err == errUnrecognized {
  868. // Recognize assignments of the form: attr=value, and replace
  869. // $attr with value on subsequent mappings.
  870. if attr := strings.SplitN(line, delimiter, 2); len(attr) == 2 {
  871. attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
  872. r = strings.NewReplacer(attrs...)
  873. }
  874. // Ignore any unrecognized entries
  875. continue
  876. }
  877. return nil, err
  878. }
  879. if m == nil {
  880. continue
  881. }
  882. mapping = append(mapping, m)
  883. }
  884. if err := s.Err(); err != nil {
  885. return nil, err
  886. }
  887. return mapping, nil
  888. }
  889. // ParseMemoryMap parses a memory map in the format of
  890. // /proc/self/maps, and overrides the mappings in the current profile.
  891. // It renumbers the samples and locations in the profile correspondingly.
  892. func (p *Profile) ParseMemoryMap(rd io.Reader) error {
  893. return p.parseMemoryMapFromScanner(bufio.NewScanner(rd))
  894. }
  895. func (p *Profile) parseMemoryMapFromScanner(s *bufio.Scanner) error {
  896. mapping, err := parseProcMapsFromScanner(s)
  897. if err != nil {
  898. return err
  899. }
  900. p.Mapping = append(p.Mapping, mapping...)
  901. p.massageMappings()
  902. p.remapLocationIDs()
  903. p.remapFunctionIDs()
  904. p.remapMappingIDs()
  905. return nil
  906. }
  907. func parseMappingEntry(l string) (*Mapping, error) {
  908. mapping := &Mapping{}
  909. var err error
  910. if me := procMapsRE.FindStringSubmatch(l); len(me) == 9 {
  911. if !strings.Contains(me[3], "x") {
  912. // Skip non-executable entries.
  913. return nil, nil
  914. }
  915. if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
  916. return nil, errUnrecognized
  917. }
  918. if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
  919. return nil, errUnrecognized
  920. }
  921. if me[4] != "" {
  922. if mapping.Offset, err = strconv.ParseUint(me[4], 16, 64); err != nil {
  923. return nil, errUnrecognized
  924. }
  925. }
  926. mapping.File = me[8]
  927. return mapping, nil
  928. }
  929. if me := briefMapsRE.FindStringSubmatch(l); len(me) == 6 {
  930. if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
  931. return nil, errUnrecognized
  932. }
  933. if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
  934. return nil, errUnrecognized
  935. }
  936. mapping.File = me[3]
  937. if me[5] != "" {
  938. if mapping.Offset, err = strconv.ParseUint(me[5], 16, 64); err != nil {
  939. return nil, errUnrecognized
  940. }
  941. }
  942. return mapping, nil
  943. }
  944. return nil, errUnrecognized
  945. }
  946. type sectionType int
  947. const (
  948. unrecognizedSection sectionType = iota
  949. memoryMapSection
  950. )
  951. var memoryMapTriggers = []string{
  952. "--- Memory map: ---",
  953. "MAPPED_LIBRARIES:",
  954. }
  955. func sectionTrigger(line string) sectionType {
  956. for _, trigger := range memoryMapTriggers {
  957. if strings.Contains(line, trigger) {
  958. return memoryMapSection
  959. }
  960. }
  961. return unrecognizedSection
  962. }
  963. func (p *Profile) addLegacyFrameInfo() {
  964. switch {
  965. case isProfileType(p, heapzSampleTypes):
  966. p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
  967. case isProfileType(p, contentionzSampleTypes):
  968. p.DropFrames, p.KeepFrames = lockRxStr, ""
  969. default:
  970. p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
  971. }
  972. }
  973. var heapzSampleTypes = [][]string{
  974. {"allocations", "size"}, // early Go pprof profiles
  975. {"objects", "space"},
  976. {"inuse_objects", "inuse_space"},
  977. {"alloc_objects", "alloc_space"},
  978. }
  979. var contentionzSampleTypes = [][]string{
  980. {"contentions", "delay"},
  981. }
  982. func isProfileType(p *Profile, types [][]string) bool {
  983. st := p.SampleType
  984. nextType:
  985. for _, t := range types {
  986. if len(st) != len(t) {
  987. continue
  988. }
  989. for i := range st {
  990. if st[i].Type != t[i] {
  991. continue nextType
  992. }
  993. }
  994. return true
  995. }
  996. return false
  997. }
  998. var allocRxStr = strings.Join([]string{
  999. // POSIX entry points.
  1000. `calloc`,
  1001. `cfree`,
  1002. `malloc`,
  1003. `free`,
  1004. `memalign`,
  1005. `do_memalign`,
  1006. `(__)?posix_memalign`,
  1007. `pvalloc`,
  1008. `valloc`,
  1009. `realloc`,
  1010. // TC malloc.
  1011. `tcmalloc::.*`,
  1012. `tc_calloc`,
  1013. `tc_cfree`,
  1014. `tc_malloc`,
  1015. `tc_free`,
  1016. `tc_memalign`,
  1017. `tc_posix_memalign`,
  1018. `tc_pvalloc`,
  1019. `tc_valloc`,
  1020. `tc_realloc`,
  1021. `tc_new`,
  1022. `tc_delete`,
  1023. `tc_newarray`,
  1024. `tc_deletearray`,
  1025. `tc_new_nothrow`,
  1026. `tc_newarray_nothrow`,
  1027. // Memory-allocation routines on OS X.
  1028. `malloc_zone_malloc`,
  1029. `malloc_zone_calloc`,
  1030. `malloc_zone_valloc`,
  1031. `malloc_zone_realloc`,
  1032. `malloc_zone_memalign`,
  1033. `malloc_zone_free`,
  1034. // Go runtime
  1035. `runtime\..*`,
  1036. // Other misc. memory allocation routines
  1037. `BaseArena::.*`,
  1038. `(::)?do_malloc_no_errno`,
  1039. `(::)?do_malloc_pages`,
  1040. `(::)?do_malloc`,
  1041. `DoSampledAllocation`,
  1042. `MallocedMemBlock::MallocedMemBlock`,
  1043. `_M_allocate`,
  1044. `__builtin_(vec_)?delete`,
  1045. `__builtin_(vec_)?new`,
  1046. `__gnu_cxx::new_allocator::allocate`,
  1047. `__libc_malloc`,
  1048. `__malloc_alloc_template::allocate`,
  1049. `allocate`,
  1050. `cpp_alloc`,
  1051. `operator new(\[\])?`,
  1052. `simple_alloc::allocate`,
  1053. }, `|`)
  1054. var allocSkipRxStr = strings.Join([]string{
  1055. // Preserve Go runtime frames that appear in the middle/bottom of
  1056. // the stack.
  1057. `runtime\.panic`,
  1058. // See https://github.com/google/pprof/issues/54.
  1059. `runtime\.call32`,
  1060. `runtime\.call64`,
  1061. }, `|`)
  1062. var cpuProfilerRxStr = strings.Join([]string{
  1063. `ProfileData::Add`,
  1064. `ProfileData::prof_handler`,
  1065. `CpuProfiler::prof_handler`,
  1066. `__pthread_sighandler`,
  1067. `__restore`,
  1068. }, `|`)
  1069. var lockRxStr = strings.Join([]string{
  1070. `RecordLockProfileData`,
  1071. `(base::)?RecordLockProfileData.*`,
  1072. `(base::)?SubmitMutexProfileData.*`,
  1073. `(base::)?SubmitSpinLockProfileData.*`,
  1074. `(base::Mutex::)?AwaitCommon.*`,
  1075. `(base::Mutex::)?Unlock.*`,
  1076. `(base::Mutex::)?UnlockSlow.*`,
  1077. `(base::Mutex::)?ReaderUnlock.*`,
  1078. `(base::MutexLock::)?~MutexLock.*`,
  1079. `(Mutex::)?AwaitCommon.*`,
  1080. `(Mutex::)?Unlock.*`,
  1081. `(Mutex::)?UnlockSlow.*`,
  1082. `(Mutex::)?ReaderUnlock.*`,
  1083. `(MutexLock::)?~MutexLock.*`,
  1084. `(SpinLock::)?Unlock.*`,
  1085. `(SpinLock::)?SlowUnlock.*`,
  1086. `(SpinLockHolder::)?~SpinLockHolder.*`,
  1087. }, `|`)