説明なし

legacy_profile.go 33KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // This file implements parsers to convert legacy profiles into the
  15. // profile.proto format.
  16. package profile
  17. import (
  18. "bufio"
  19. "bytes"
  20. "fmt"
  21. "io"
  22. "math"
  23. "regexp"
  24. "strconv"
  25. "strings"
  26. )
  27. var (
  28. countStartRE = regexp.MustCompile(`\A(\S+) profile: total \d+\z`)
  29. countRE = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\z`)
  30. heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
  31. heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
  32. contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
  33. hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
  34. growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz?`)
  35. fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz?`)
  36. threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
  37. threadStartRE = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
  38. // Regular expressions to parse process mappings. Support the format used by Linux /proc/.../maps and other tools.
  39. // Recommended format:
  40. // Start End object file name offset(optional) linker build id
  41. // 0x40000-0x80000 /path/to/binary (@FF00) abc123456
  42. spaceDigits = `\s+[[:digit:]]+`
  43. hexPair = `\s+[[:xdigit:]]+:[[:xdigit:]]+`
  44. oSpace = `\s*`
  45. // Capturing expressions.
  46. cHex = `(?:0x)?([[:xdigit:]]+)`
  47. cHexRange = `\s*` + cHex + `[\s-]?` + oSpace + cHex + `:?`
  48. cSpaceString = `(?:\s+(\S+))?`
  49. cSpaceHex = `(?:\s+([[:xdigit:]]+))?`
  50. cSpaceAtOffset = `(?:\s+\(@([[:xdigit:]]+)\))?`
  51. cPerm = `(?:\s+([-rwxp]+))?`
  52. procMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceHex + hexPair + spaceDigits + cSpaceString)
  53. briefMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceString + cSpaceAtOffset + cSpaceHex)
  54. // Regular expression to parse log data, of the form:
  55. // ... file:line] msg...
  56. logInfoRE = regexp.MustCompile(`^[^\[\]]+:[0-9]+]\s`)
  57. )
  58. func isSpaceOrComment(line string) bool {
  59. trimmed := strings.TrimSpace(line)
  60. return len(trimmed) == 0 || trimmed[0] == '#'
  61. }
  62. // parseGoCount parses a Go count profile (e.g., threadcreate or
  63. // goroutine) and returns a new Profile.
  64. func parseGoCount(b []byte) (*Profile, error) {
  65. s := bufio.NewScanner(bytes.NewBuffer(b))
  66. // Skip comments at the beginning of the file.
  67. for s.Scan() && isSpaceOrComment(s.Text()) {
  68. }
  69. if err := s.Err(); err != nil {
  70. return nil, err
  71. }
  72. m := countStartRE.FindStringSubmatch(s.Text())
  73. if m == nil {
  74. return nil, errUnrecognized
  75. }
  76. profileType := m[1]
  77. p := &Profile{
  78. PeriodType: &ValueType{Type: profileType, Unit: "count"},
  79. Period: 1,
  80. SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
  81. }
  82. locations := make(map[uint64]*Location)
  83. for s.Scan() {
  84. line := s.Text()
  85. if isSpaceOrComment(line) {
  86. continue
  87. }
  88. if strings.HasPrefix(line, "---") {
  89. break
  90. }
  91. m := countRE.FindStringSubmatch(line)
  92. if m == nil {
  93. return nil, errMalformed
  94. }
  95. n, err := strconv.ParseInt(m[1], 0, 64)
  96. if err != nil {
  97. return nil, errMalformed
  98. }
  99. fields := strings.Fields(m[2])
  100. locs := make([]*Location, 0, len(fields))
  101. for _, stk := range fields {
  102. addr, err := strconv.ParseUint(stk, 0, 64)
  103. if err != nil {
  104. return nil, errMalformed
  105. }
  106. // Adjust all frames by -1 to land on top of the call instruction.
  107. addr--
  108. loc := locations[addr]
  109. if loc == nil {
  110. loc = &Location{
  111. Address: addr,
  112. }
  113. locations[addr] = loc
  114. p.Location = append(p.Location, loc)
  115. }
  116. locs = append(locs, loc)
  117. }
  118. p.Sample = append(p.Sample, &Sample{
  119. Location: locs,
  120. Value: []int64{n},
  121. })
  122. }
  123. if err := s.Err(); err != nil {
  124. return nil, err
  125. }
  126. if err := parseAdditionalSections(s, p); err != nil {
  127. return nil, err
  128. }
  129. return p, nil
  130. }
  131. // remapLocationIDs ensures there is a location for each address
  132. // referenced by a sample, and remaps the samples to point to the new
  133. // location ids.
  134. func (p *Profile) remapLocationIDs() {
  135. seen := make(map[*Location]bool, len(p.Location))
  136. var locs []*Location
  137. for _, s := range p.Sample {
  138. for _, l := range s.Location {
  139. if seen[l] {
  140. continue
  141. }
  142. l.ID = uint64(len(locs) + 1)
  143. locs = append(locs, l)
  144. seen[l] = true
  145. }
  146. }
  147. p.Location = locs
  148. }
  149. func (p *Profile) remapFunctionIDs() {
  150. seen := make(map[*Function]bool, len(p.Function))
  151. var fns []*Function
  152. for _, l := range p.Location {
  153. for _, ln := range l.Line {
  154. fn := ln.Function
  155. if fn == nil || seen[fn] {
  156. continue
  157. }
  158. fn.ID = uint64(len(fns) + 1)
  159. fns = append(fns, fn)
  160. seen[fn] = true
  161. }
  162. }
  163. p.Function = fns
  164. }
  165. // remapMappingIDs matches location addresses with existing mappings
  166. // and updates them appropriately. This is O(N*M), if this ever shows
  167. // up as a bottleneck, evaluate sorting the mappings and doing a
  168. // binary search, which would make it O(N*log(M)).
  169. func (p *Profile) remapMappingIDs() {
  170. // Some profile handlers will incorrectly set regions for the main
  171. // executable if its section is remapped. Fix them through heuristics.
  172. if len(p.Mapping) > 0 {
  173. // Remove the initial mapping if named '/anon_hugepage' and has a
  174. // consecutive adjacent mapping.
  175. if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
  176. if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
  177. p.Mapping = p.Mapping[1:]
  178. }
  179. }
  180. }
  181. // Subtract the offset from the start of the main mapping if it
  182. // ends up at a recognizable start address.
  183. if len(p.Mapping) > 0 {
  184. const expectedStart = 0x400000
  185. if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
  186. m.Start = expectedStart
  187. m.Offset = 0
  188. }
  189. }
  190. // Associate each location with an address to the corresponding
  191. // mapping. Create fake mapping if a suitable one isn't found.
  192. var fake *Mapping
  193. nextLocation:
  194. for _, l := range p.Location {
  195. a := l.Address
  196. if l.Mapping != nil || a == 0 {
  197. continue
  198. }
  199. for _, m := range p.Mapping {
  200. if m.Start <= a && a < m.Limit {
  201. l.Mapping = m
  202. continue nextLocation
  203. }
  204. }
  205. // Work around legacy handlers failing to encode the first
  206. // part of mappings split into adjacent ranges.
  207. for _, m := range p.Mapping {
  208. if m.Offset != 0 && m.Start-m.Offset <= a && a < m.Start {
  209. m.Start -= m.Offset
  210. m.Offset = 0
  211. l.Mapping = m
  212. continue nextLocation
  213. }
  214. }
  215. // If there is still no mapping, create a fake one.
  216. // This is important for the Go legacy handler, which produced
  217. // no mappings.
  218. if fake == nil {
  219. fake = &Mapping{
  220. ID: 1,
  221. Limit: ^uint64(0),
  222. }
  223. p.Mapping = append(p.Mapping, fake)
  224. }
  225. l.Mapping = fake
  226. }
  227. // Reset all mapping IDs.
  228. for i, m := range p.Mapping {
  229. m.ID = uint64(i + 1)
  230. }
  231. }
  232. var cpuInts = []func([]byte) (uint64, []byte){
  233. get32l,
  234. get32b,
  235. get64l,
  236. get64b,
  237. }
  238. func get32l(b []byte) (uint64, []byte) {
  239. if len(b) < 4 {
  240. return 0, nil
  241. }
  242. return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
  243. }
  244. func get32b(b []byte) (uint64, []byte) {
  245. if len(b) < 4 {
  246. return 0, nil
  247. }
  248. return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
  249. }
  250. func get64l(b []byte) (uint64, []byte) {
  251. if len(b) < 8 {
  252. return 0, nil
  253. }
  254. return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
  255. }
  256. func get64b(b []byte) (uint64, []byte) {
  257. if len(b) < 8 {
  258. return 0, nil
  259. }
  260. return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
  261. }
  262. // parseCPU parses a profilez legacy profile and returns a newly
  263. // populated Profile.
  264. //
  265. // The general format for profilez samples is a sequence of words in
  266. // binary format. The first words are a header with the following data:
  267. // 1st word -- 0
  268. // 2nd word -- 3
  269. // 3rd word -- 0 if a c++ application, 1 if a java application.
  270. // 4th word -- Sampling period (in microseconds).
  271. // 5th word -- Padding.
  272. func parseCPU(b []byte) (*Profile, error) {
  273. var parse func([]byte) (uint64, []byte)
  274. var n1, n2, n3, n4, n5 uint64
  275. for _, parse = range cpuInts {
  276. var tmp []byte
  277. n1, tmp = parse(b)
  278. n2, tmp = parse(tmp)
  279. n3, tmp = parse(tmp)
  280. n4, tmp = parse(tmp)
  281. n5, tmp = parse(tmp)
  282. if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
  283. b = tmp
  284. return cpuProfile(b, int64(n4), parse)
  285. }
  286. if tmp != nil && n1 == 0 && n2 == 3 && n3 == 1 && n4 > 0 && n5 == 0 {
  287. b = tmp
  288. return javaCPUProfile(b, int64(n4), parse)
  289. }
  290. }
  291. return nil, errUnrecognized
  292. }
  293. // cpuProfile returns a new Profile from C++ profilez data.
  294. // b is the profile bytes after the header, period is the profiling
  295. // period, and parse is a function to parse 8-byte chunks from the
  296. // profile in its native endianness.
  297. func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
  298. p := &Profile{
  299. Period: period * 1000,
  300. PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
  301. SampleType: []*ValueType{
  302. {Type: "samples", Unit: "count"},
  303. {Type: "cpu", Unit: "nanoseconds"},
  304. },
  305. }
  306. var err error
  307. if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
  308. return nil, err
  309. }
  310. // If *most* samples have the same second-to-the-bottom frame, it
  311. // strongly suggests that it is an uninteresting artifact of
  312. // measurement -- a stack frame pushed by the signal handler. The
  313. // bottom frame is always correct as it is picked up from the signal
  314. // structure, not the stack. Check if this is the case and if so,
  315. // remove.
  316. // Remove up to two frames.
  317. maxiter := 2
  318. // Allow one different sample for this many samples with the same
  319. // second-to-last frame.
  320. similarSamples := 32
  321. margin := len(p.Sample) / similarSamples
  322. for iter := 0; iter < maxiter; iter++ {
  323. addr1 := make(map[uint64]int)
  324. for _, s := range p.Sample {
  325. if len(s.Location) > 1 {
  326. a := s.Location[1].Address
  327. addr1[a] = addr1[a] + 1
  328. }
  329. }
  330. for id1, count := range addr1 {
  331. if count >= len(p.Sample)-margin {
  332. // Found uninteresting frame, strip it out from all samples
  333. for _, s := range p.Sample {
  334. if len(s.Location) > 1 && s.Location[1].Address == id1 {
  335. s.Location = append(s.Location[:1], s.Location[2:]...)
  336. }
  337. }
  338. break
  339. }
  340. }
  341. }
  342. if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
  343. return nil, err
  344. }
  345. cleanupDuplicateLocations(p)
  346. return p, nil
  347. }
  348. func cleanupDuplicateLocations(p *Profile) {
  349. // The profile handler may duplicate the leaf frame, because it gets
  350. // its address both from stack unwinding and from the signal
  351. // context. Detect this and delete the duplicate, which has been
  352. // adjusted by -1. The leaf address should not be adjusted as it is
  353. // not a call.
  354. for _, s := range p.Sample {
  355. if len(s.Location) > 1 && s.Location[0].Address == s.Location[1].Address+1 {
  356. s.Location = append(s.Location[:1], s.Location[2:]...)
  357. }
  358. }
  359. }
  360. // parseCPUSamples parses a collection of profilez samples from a
  361. // profile.
  362. //
  363. // profilez samples are a repeated sequence of stack frames of the
  364. // form:
  365. // 1st word -- The number of times this stack was encountered.
  366. // 2nd word -- The size of the stack (StackSize).
  367. // 3rd word -- The first address on the stack.
  368. // ...
  369. // StackSize + 2 -- The last address on the stack
  370. // The last stack trace is of the form:
  371. // 1st word -- 0
  372. // 2nd word -- 1
  373. // 3rd word -- 0
  374. //
  375. // Addresses from stack traces may point to the next instruction after
  376. // each call. Optionally adjust by -1 to land somewhere on the actual
  377. // call (except for the leaf, which is not a call).
  378. func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
  379. locs := make(map[uint64]*Location)
  380. for len(b) > 0 {
  381. var count, nstk uint64
  382. count, b = parse(b)
  383. nstk, b = parse(b)
  384. if b == nil || nstk > uint64(len(b)/4) {
  385. return nil, nil, errUnrecognized
  386. }
  387. var sloc []*Location
  388. addrs := make([]uint64, nstk)
  389. for i := 0; i < int(nstk); i++ {
  390. addrs[i], b = parse(b)
  391. }
  392. if count == 0 && nstk == 1 && addrs[0] == 0 {
  393. // End of data marker
  394. break
  395. }
  396. for i, addr := range addrs {
  397. if adjust && i > 0 {
  398. addr--
  399. }
  400. loc := locs[addr]
  401. if loc == nil {
  402. loc = &Location{
  403. Address: addr,
  404. }
  405. locs[addr] = loc
  406. p.Location = append(p.Location, loc)
  407. }
  408. sloc = append(sloc, loc)
  409. }
  410. p.Sample = append(p.Sample,
  411. &Sample{
  412. Value: []int64{int64(count), int64(count) * p.Period},
  413. Location: sloc,
  414. })
  415. }
  416. // Reached the end without finding the EOD marker.
  417. return b, locs, nil
  418. }
  419. // parseHeap parses a heapz legacy or a growthz profile and
  420. // returns a newly populated Profile.
  421. func parseHeap(b []byte) (p *Profile, err error) {
  422. s := bufio.NewScanner(bytes.NewBuffer(b))
  423. if !s.Scan() {
  424. if err := s.Err(); err != nil {
  425. return nil, err
  426. }
  427. return nil, errUnrecognized
  428. }
  429. p = &Profile{}
  430. sampling := ""
  431. hasAlloc := false
  432. line := s.Text()
  433. p.PeriodType = &ValueType{Type: "space", Unit: "bytes"}
  434. if header := heapHeaderRE.FindStringSubmatch(line); header != nil {
  435. sampling, p.Period, hasAlloc, err = parseHeapHeader(line)
  436. if err != nil {
  437. return nil, err
  438. }
  439. } else if header = growthHeaderRE.FindStringSubmatch(line); header != nil {
  440. p.Period = 1
  441. } else if header = fragmentationHeaderRE.FindStringSubmatch(line); header != nil {
  442. p.Period = 1
  443. } else {
  444. return nil, errUnrecognized
  445. }
  446. if hasAlloc {
  447. // Put alloc before inuse so that default pprof selection
  448. // will prefer inuse_space.
  449. p.SampleType = []*ValueType{
  450. {Type: "alloc_objects", Unit: "count"},
  451. {Type: "alloc_space", Unit: "bytes"},
  452. {Type: "inuse_objects", Unit: "count"},
  453. {Type: "inuse_space", Unit: "bytes"},
  454. }
  455. } else {
  456. p.SampleType = []*ValueType{
  457. {Type: "objects", Unit: "count"},
  458. {Type: "space", Unit: "bytes"},
  459. }
  460. }
  461. locs := make(map[uint64]*Location)
  462. for s.Scan() {
  463. line := strings.TrimSpace(s.Text())
  464. if isSpaceOrComment(line) {
  465. continue
  466. }
  467. if isMemoryMapSentinel(line) {
  468. break
  469. }
  470. value, blocksize, addrs, err := parseHeapSample(line, p.Period, sampling, hasAlloc)
  471. if err != nil {
  472. return nil, err
  473. }
  474. var sloc []*Location
  475. for _, addr := range addrs {
  476. // Addresses from stack traces point to the next instruction after
  477. // each call. Adjust by -1 to land somewhere on the actual call.
  478. addr--
  479. loc := locs[addr]
  480. if locs[addr] == nil {
  481. loc = &Location{
  482. Address: addr,
  483. }
  484. p.Location = append(p.Location, loc)
  485. locs[addr] = loc
  486. }
  487. sloc = append(sloc, loc)
  488. }
  489. p.Sample = append(p.Sample, &Sample{
  490. Value: value,
  491. Location: sloc,
  492. NumLabel: map[string][]int64{"bytes": {blocksize}},
  493. })
  494. }
  495. if err := s.Err(); err != nil {
  496. return nil, err
  497. }
  498. if err := parseAdditionalSections(s, p); err != nil {
  499. return nil, err
  500. }
  501. return p, nil
  502. }
  503. func parseHeapHeader(line string) (sampling string, period int64, hasAlloc bool, err error) {
  504. header := heapHeaderRE.FindStringSubmatch(line)
  505. if header == nil {
  506. return "", 0, false, errUnrecognized
  507. }
  508. if len(header[6]) > 0 {
  509. if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
  510. return "", 0, false, errUnrecognized
  511. }
  512. }
  513. if (header[3] != header[1] && header[3] != "0") || (header[4] != header[2] && header[4] != "0") {
  514. hasAlloc = true
  515. }
  516. switch header[5] {
  517. case "heapz_v2", "heap_v2":
  518. return "v2", period, hasAlloc, nil
  519. case "heapprofile":
  520. return "", 1, hasAlloc, nil
  521. case "heap":
  522. return "v2", period / 2, hasAlloc, nil
  523. default:
  524. return "", 0, false, errUnrecognized
  525. }
  526. }
  527. // parseHeapSample parses a single row from a heap profile into a new Sample.
  528. func parseHeapSample(line string, rate int64, sampling string, includeAlloc bool) (value []int64, blocksize int64, addrs []uint64, err error) {
  529. sampleData := heapSampleRE.FindStringSubmatch(line)
  530. if len(sampleData) != 6 {
  531. return nil, 0, nil, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
  532. }
  533. // This is a local-scoped helper function to avoid needing to pass
  534. // around rate, sampling and many return parameters.
  535. addValues := func(countString, sizeString string, label string) error {
  536. count, err := strconv.ParseInt(countString, 10, 64)
  537. if err != nil {
  538. return fmt.Errorf("malformed sample: %s: %v", line, err)
  539. }
  540. size, err := strconv.ParseInt(sizeString, 10, 64)
  541. if err != nil {
  542. return fmt.Errorf("malformed sample: %s: %v", line, err)
  543. }
  544. if count == 0 && size != 0 {
  545. return fmt.Errorf("%s count was 0 but %s bytes was %d", label, label, size)
  546. }
  547. if count != 0 {
  548. blocksize = size / count
  549. if sampling == "v2" {
  550. count, size = scaleHeapSample(count, size, rate)
  551. }
  552. }
  553. value = append(value, count, size)
  554. return nil
  555. }
  556. if includeAlloc {
  557. if err := addValues(sampleData[3], sampleData[4], "allocation"); err != nil {
  558. return nil, 0, nil, err
  559. }
  560. }
  561. if err := addValues(sampleData[1], sampleData[2], "inuse"); err != nil {
  562. return nil, 0, nil, err
  563. }
  564. addrs = parseHexAddresses(sampleData[5])
  565. return value, blocksize, addrs, nil
  566. }
  567. // extractHexAddresses extracts hex numbers from a string and returns
  568. // them, together with their numeric value, in a slice.
  569. func extractHexAddresses(s string) ([]string, []uint64) {
  570. hexStrings := hexNumberRE.FindAllString(s, -1)
  571. var ids []uint64
  572. for _, s := range hexStrings {
  573. if id, err := strconv.ParseUint(s, 0, 64); err == nil {
  574. ids = append(ids, id)
  575. } else {
  576. // Do not expect any parsing failures due to the regexp matching.
  577. panic("failed to parse hex value:" + s)
  578. }
  579. }
  580. return hexStrings, ids
  581. }
  582. // parseHexAddresses parses hex numbers from a string and returns them
  583. // in a slice.
  584. func parseHexAddresses(s string) []uint64 {
  585. _, ids := extractHexAddresses(s)
  586. return ids
  587. }
  588. // scaleHeapSample adjusts the data from a heapz Sample to
  589. // account for its probability of appearing in the collected
  590. // data. heapz profiles are a sampling of the memory allocations
  591. // requests in a program. We estimate the unsampled value by dividing
  592. // each collected sample by its probability of appearing in the
  593. // profile. heapz v2 profiles rely on a poisson process to determine
  594. // which samples to collect, based on the desired average collection
  595. // rate R. The probability of a sample of size S to appear in that
  596. // profile is 1-exp(-S/R).
  597. func scaleHeapSample(count, size, rate int64) (int64, int64) {
  598. if count == 0 || size == 0 {
  599. return 0, 0
  600. }
  601. if rate <= 1 {
  602. // if rate==1 all samples were collected so no adjustment is needed.
  603. // if rate<1 treat as unknown and skip scaling.
  604. return count, size
  605. }
  606. avgSize := float64(size) / float64(count)
  607. scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
  608. return int64(float64(count) * scale), int64(float64(size) * scale)
  609. }
  610. // parseContention parses a mutex or contention profile. There are 2 cases:
  611. // "--- contentionz " for legacy C++ profiles (and backwards compatibility)
  612. // "--- mutex:" or "--- contention:" for profiles generated by the Go runtime.
  613. func parseContention(b []byte) (*Profile, error) {
  614. s := bufio.NewScanner(bytes.NewBuffer(b))
  615. if !s.Scan() {
  616. if err := s.Err(); err != nil {
  617. return nil, err
  618. }
  619. return nil, errUnrecognized
  620. }
  621. switch l := s.Text(); {
  622. case strings.HasPrefix(l, "--- contentionz "):
  623. case strings.HasPrefix(l, "--- mutex:"):
  624. case strings.HasPrefix(l, "--- contention:"):
  625. default:
  626. return nil, errUnrecognized
  627. }
  628. p := &Profile{
  629. PeriodType: &ValueType{Type: "contentions", Unit: "count"},
  630. Period: 1,
  631. SampleType: []*ValueType{
  632. {Type: "contentions", Unit: "count"},
  633. {Type: "delay", Unit: "nanoseconds"},
  634. },
  635. }
  636. var cpuHz int64
  637. // Parse text of the form "attribute = value" before the samples.
  638. const delimiter = "="
  639. for s.Scan() {
  640. line := s.Text()
  641. if line = strings.TrimSpace(line); isSpaceOrComment(line) {
  642. continue
  643. }
  644. if strings.HasPrefix(line, "---") {
  645. break
  646. }
  647. attr := strings.SplitN(line, delimiter, 2)
  648. if len(attr) != 2 {
  649. break
  650. }
  651. key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
  652. var err error
  653. switch key {
  654. case "cycles/second":
  655. if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
  656. return nil, errUnrecognized
  657. }
  658. case "sampling period":
  659. if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
  660. return nil, errUnrecognized
  661. }
  662. case "ms since reset":
  663. ms, err := strconv.ParseInt(val, 0, 64)
  664. if err != nil {
  665. return nil, errUnrecognized
  666. }
  667. p.DurationNanos = ms * 1000 * 1000
  668. case "format":
  669. // CPP contentionz profiles don't have format.
  670. return nil, errUnrecognized
  671. case "resolution":
  672. // CPP contentionz profiles don't have resolution.
  673. return nil, errUnrecognized
  674. case "discarded samples":
  675. default:
  676. return nil, errUnrecognized
  677. }
  678. }
  679. if err := s.Err(); err != nil {
  680. return nil, err
  681. }
  682. locs := make(map[uint64]*Location)
  683. for {
  684. line := strings.TrimSpace(s.Text())
  685. if strings.HasPrefix(line, "---") {
  686. break
  687. }
  688. if !isSpaceOrComment(line) {
  689. value, addrs, err := parseContentionSample(line, p.Period, cpuHz)
  690. if err != nil {
  691. return nil, err
  692. }
  693. var sloc []*Location
  694. for _, addr := range addrs {
  695. // Addresses from stack traces point to the next instruction after
  696. // each call. Adjust by -1 to land somewhere on the actual call.
  697. addr--
  698. loc := locs[addr]
  699. if locs[addr] == nil {
  700. loc = &Location{
  701. Address: addr,
  702. }
  703. p.Location = append(p.Location, loc)
  704. locs[addr] = loc
  705. }
  706. sloc = append(sloc, loc)
  707. }
  708. p.Sample = append(p.Sample, &Sample{
  709. Value: value,
  710. Location: sloc,
  711. })
  712. }
  713. if !s.Scan() {
  714. break
  715. }
  716. }
  717. if err := s.Err(); err != nil {
  718. return nil, err
  719. }
  720. if err := parseAdditionalSections(s, p); err != nil {
  721. return nil, err
  722. }
  723. return p, nil
  724. }
  725. // parseContentionSample parses a single row from a contention profile
  726. // into a new Sample.
  727. func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
  728. sampleData := contentionSampleRE.FindStringSubmatch(line)
  729. if sampleData == nil {
  730. return value, addrs, errUnrecognized
  731. }
  732. v1, err := strconv.ParseInt(sampleData[1], 10, 64)
  733. if err != nil {
  734. return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
  735. }
  736. v2, err := strconv.ParseInt(sampleData[2], 10, 64)
  737. if err != nil {
  738. return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
  739. }
  740. // Unsample values if period and cpuHz are available.
  741. // - Delays are scaled to cycles and then to nanoseconds.
  742. // - Contentions are scaled to cycles.
  743. if period > 0 {
  744. if cpuHz > 0 {
  745. cpuGHz := float64(cpuHz) / 1e9
  746. v1 = int64(float64(v1) * float64(period) / cpuGHz)
  747. }
  748. v2 = v2 * period
  749. }
  750. value = []int64{v2, v1}
  751. addrs = parseHexAddresses(sampleData[3])
  752. return value, addrs, nil
  753. }
  754. // parseThread parses a Threadz profile and returns a new Profile.
  755. func parseThread(b []byte) (*Profile, error) {
  756. s := bufio.NewScanner(bytes.NewBuffer(b))
  757. // Skip past comments and empty lines seeking a real header.
  758. for s.Scan() && isSpaceOrComment(s.Text()) {
  759. }
  760. line := s.Text()
  761. if m := threadzStartRE.FindStringSubmatch(line); m != nil {
  762. // Advance over initial comments until first stack trace.
  763. for s.Scan() {
  764. if line = s.Text(); isMemoryMapSentinel(line) || strings.HasPrefix(line, "-") {
  765. break
  766. }
  767. }
  768. } else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
  769. return nil, errUnrecognized
  770. }
  771. p := &Profile{
  772. SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
  773. PeriodType: &ValueType{Type: "thread", Unit: "count"},
  774. Period: 1,
  775. }
  776. locs := make(map[uint64]*Location)
  777. // Recognize each thread and populate profile samples.
  778. for !isMemoryMapSentinel(line) {
  779. if strings.HasPrefix(line, "---- no stack trace for") {
  780. line = ""
  781. break
  782. }
  783. if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
  784. return nil, errUnrecognized
  785. }
  786. var addrs []uint64
  787. var err error
  788. line, addrs, err = parseThreadSample(s)
  789. if err != nil {
  790. return nil, errUnrecognized
  791. }
  792. if len(addrs) == 0 {
  793. // We got a --same as previous threads--. Bump counters.
  794. if len(p.Sample) > 0 {
  795. s := p.Sample[len(p.Sample)-1]
  796. s.Value[0]++
  797. }
  798. continue
  799. }
  800. var sloc []*Location
  801. for i, addr := range addrs {
  802. // Addresses from stack traces point to the next instruction after
  803. // each call. Adjust by -1 to land somewhere on the actual call
  804. // (except for the leaf, which is not a call).
  805. if i > 0 {
  806. addr--
  807. }
  808. loc := locs[addr]
  809. if locs[addr] == nil {
  810. loc = &Location{
  811. Address: addr,
  812. }
  813. p.Location = append(p.Location, loc)
  814. locs[addr] = loc
  815. }
  816. sloc = append(sloc, loc)
  817. }
  818. p.Sample = append(p.Sample, &Sample{
  819. Value: []int64{1},
  820. Location: sloc,
  821. })
  822. }
  823. if err := parseAdditionalSections(s, p); err != nil {
  824. return nil, err
  825. }
  826. cleanupDuplicateLocations(p)
  827. return p, nil
  828. }
  829. // parseThreadSample parses a symbolized or unsymbolized stack trace.
  830. // Returns the first line after the traceback, the sample (or nil if
  831. // it hits a 'same-as-previous' marker) and an error.
  832. func parseThreadSample(s *bufio.Scanner) (nextl string, addrs []uint64, err error) {
  833. var line string
  834. sameAsPrevious := false
  835. for s.Scan() {
  836. line = strings.TrimSpace(s.Text())
  837. if line == "" {
  838. continue
  839. }
  840. if strings.HasPrefix(line, "---") {
  841. break
  842. }
  843. if strings.Contains(line, "same as previous thread") {
  844. sameAsPrevious = true
  845. continue
  846. }
  847. addrs = append(addrs, parseHexAddresses(line)...)
  848. }
  849. if err := s.Err(); err != nil {
  850. return "", nil, err
  851. }
  852. if sameAsPrevious {
  853. return line, nil, nil
  854. }
  855. return line, addrs, nil
  856. }
  857. // parseAdditionalSections parses any additional sections in the
  858. // profile, ignoring any unrecognized sections.
  859. func parseAdditionalSections(s *bufio.Scanner, p *Profile) error {
  860. for !isMemoryMapSentinel(s.Text()) && s.Scan() {
  861. }
  862. if err := s.Err(); err != nil {
  863. return err
  864. }
  865. return p.ParseMemoryMapFromScanner(s)
  866. }
  867. // ParseProcMaps parses a memory map in the format of /proc/self/maps.
  868. // ParseMemoryMap should be called after setting on a profile to
  869. // associate locations to the corresponding mapping based on their
  870. // address.
  871. func ParseProcMaps(rd io.Reader) ([]*Mapping, error) {
  872. s := bufio.NewScanner(rd)
  873. return parseProcMapsFromScanner(s)
  874. }
  875. func parseProcMapsFromScanner(s *bufio.Scanner) ([]*Mapping, error) {
  876. var mapping []*Mapping
  877. var attrs []string
  878. const delimiter = "="
  879. r := strings.NewReplacer()
  880. for s.Scan() {
  881. line := r.Replace(removeLoggingInfo(s.Text()))
  882. m, err := parseMappingEntry(line)
  883. if err != nil {
  884. if err == errUnrecognized {
  885. // Recognize assignments of the form: attr=value, and replace
  886. // $attr with value on subsequent mappings.
  887. if attr := strings.SplitN(line, delimiter, 2); len(attr) == 2 {
  888. attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
  889. r = strings.NewReplacer(attrs...)
  890. }
  891. // Ignore any unrecognized entries
  892. continue
  893. }
  894. return nil, err
  895. }
  896. if m == nil {
  897. continue
  898. }
  899. mapping = append(mapping, m)
  900. }
  901. if err := s.Err(); err != nil {
  902. return nil, err
  903. }
  904. return mapping, nil
  905. }
  906. // removeLoggingInfo detects and removes log prefix entries generated
  907. // by the glog package. If no logging prefix is detected, the string
  908. // is returned unmodified.
  909. func removeLoggingInfo(line string) string {
  910. if match := logInfoRE.FindStringIndex(line); match != nil {
  911. return line[match[1]:]
  912. }
  913. return line
  914. }
  915. // ParseMemoryMap parses a memory map in the format of
  916. // /proc/self/maps, and overrides the mappings in the current profile.
  917. // It renumbers the samples and locations in the profile correspondingly.
  918. func (p *Profile) ParseMemoryMap(rd io.Reader) error {
  919. return p.ParseMemoryMapFromScanner(bufio.NewScanner(rd))
  920. }
  921. // ParseMemoryMapFromScanner parses a memory map in the format of
  922. // /proc/self/maps or a variety of legacy format, and overrides the
  923. // mappings in the current profile. It renumbers the samples and
  924. // locations in the profile correspondingly.
  925. func (p *Profile) ParseMemoryMapFromScanner(s *bufio.Scanner) error {
  926. mapping, err := parseProcMapsFromScanner(s)
  927. if err != nil {
  928. return err
  929. }
  930. p.Mapping = append(p.Mapping, mapping...)
  931. p.massageMappings()
  932. p.remapLocationIDs()
  933. p.remapFunctionIDs()
  934. p.remapMappingIDs()
  935. return nil
  936. }
  937. func parseMappingEntry(l string) (*Mapping, error) {
  938. var start, end, perm, file, offset, buildID string
  939. if me := procMapsRE.FindStringSubmatch(l); len(me) == 6 {
  940. start, end, perm, offset, file = me[1], me[2], me[3], me[4], me[5]
  941. } else if me := briefMapsRE.FindStringSubmatch(l); len(me) == 7 {
  942. start, end, perm, file, offset, buildID = me[1], me[2], me[3], me[4], me[5], me[6]
  943. } else {
  944. return nil, errUnrecognized
  945. }
  946. var err error
  947. mapping := &Mapping{
  948. File: file,
  949. BuildID: buildID,
  950. }
  951. if perm != "" && !strings.Contains(perm, "x") {
  952. // Skip non-executable entries.
  953. return nil, nil
  954. }
  955. if mapping.Start, err = strconv.ParseUint(start, 16, 64); err != nil {
  956. return nil, errUnrecognized
  957. }
  958. if mapping.Limit, err = strconv.ParseUint(end, 16, 64); err != nil {
  959. return nil, errUnrecognized
  960. }
  961. if offset != "" {
  962. if mapping.Offset, err = strconv.ParseUint(offset, 16, 64); err != nil {
  963. return nil, errUnrecognized
  964. }
  965. }
  966. return mapping, nil
  967. }
  968. var memoryMapSentinels = []string{
  969. "--- Memory map: ---",
  970. "MAPPED_LIBRARIES:",
  971. }
  972. // isMemoryMapSentinel returns true if the string contains one of the
  973. // known sentinels for memory map information.
  974. func isMemoryMapSentinel(line string) bool {
  975. for _, s := range memoryMapSentinels {
  976. if strings.Contains(line, s) {
  977. return true
  978. }
  979. }
  980. return false
  981. }
  982. func (p *Profile) addLegacyFrameInfo() {
  983. switch {
  984. case isProfileType(p, heapzSampleTypes):
  985. p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
  986. case isProfileType(p, contentionzSampleTypes):
  987. p.DropFrames, p.KeepFrames = lockRxStr, ""
  988. default:
  989. p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
  990. }
  991. }
  992. var heapzSampleTypes = [][]string{
  993. {"allocations", "size"}, // early Go pprof profiles
  994. {"objects", "space"},
  995. {"inuse_objects", "inuse_space"},
  996. {"alloc_objects", "alloc_space"},
  997. }
  998. var contentionzSampleTypes = [][]string{
  999. {"contentions", "delay"},
  1000. }
  1001. func isProfileType(p *Profile, types [][]string) bool {
  1002. st := p.SampleType
  1003. nextType:
  1004. for _, t := range types {
  1005. if len(st) != len(t) {
  1006. continue
  1007. }
  1008. for i := range st {
  1009. if st[i].Type != t[i] {
  1010. continue nextType
  1011. }
  1012. }
  1013. return true
  1014. }
  1015. return false
  1016. }
  1017. var allocRxStr = strings.Join([]string{
  1018. // POSIX entry points.
  1019. `calloc`,
  1020. `cfree`,
  1021. `malloc`,
  1022. `free`,
  1023. `memalign`,
  1024. `do_memalign`,
  1025. `(__)?posix_memalign`,
  1026. `pvalloc`,
  1027. `valloc`,
  1028. `realloc`,
  1029. // TC malloc.
  1030. `tcmalloc::.*`,
  1031. `tc_calloc`,
  1032. `tc_cfree`,
  1033. `tc_malloc`,
  1034. `tc_free`,
  1035. `tc_memalign`,
  1036. `tc_posix_memalign`,
  1037. `tc_pvalloc`,
  1038. `tc_valloc`,
  1039. `tc_realloc`,
  1040. `tc_new`,
  1041. `tc_delete`,
  1042. `tc_newarray`,
  1043. `tc_deletearray`,
  1044. `tc_new_nothrow`,
  1045. `tc_newarray_nothrow`,
  1046. // Memory-allocation routines on OS X.
  1047. `malloc_zone_malloc`,
  1048. `malloc_zone_calloc`,
  1049. `malloc_zone_valloc`,
  1050. `malloc_zone_realloc`,
  1051. `malloc_zone_memalign`,
  1052. `malloc_zone_free`,
  1053. // Go runtime
  1054. `runtime\..*`,
  1055. // Other misc. memory allocation routines
  1056. `BaseArena::.*`,
  1057. `(::)?do_malloc_no_errno`,
  1058. `(::)?do_malloc_pages`,
  1059. `(::)?do_malloc`,
  1060. `DoSampledAllocation`,
  1061. `MallocedMemBlock::MallocedMemBlock`,
  1062. `_M_allocate`,
  1063. `__builtin_(vec_)?delete`,
  1064. `__builtin_(vec_)?new`,
  1065. `__gnu_cxx::new_allocator::allocate`,
  1066. `__libc_malloc`,
  1067. `__malloc_alloc_template::allocate`,
  1068. `allocate`,
  1069. `cpp_alloc`,
  1070. `operator new(\[\])?`,
  1071. `simple_alloc::allocate`,
  1072. }, `|`)
  1073. var allocSkipRxStr = strings.Join([]string{
  1074. // Preserve Go runtime frames that appear in the middle/bottom of
  1075. // the stack.
  1076. `runtime\.panic`,
  1077. `runtime\.reflectcall`,
  1078. `runtime\.call[0-9]*`,
  1079. }, `|`)
  1080. var cpuProfilerRxStr = strings.Join([]string{
  1081. `ProfileData::Add`,
  1082. `ProfileData::prof_handler`,
  1083. `CpuProfiler::prof_handler`,
  1084. `__pthread_sighandler`,
  1085. `__restore`,
  1086. }, `|`)
  1087. var lockRxStr = strings.Join([]string{
  1088. `RecordLockProfileData`,
  1089. `(base::)?RecordLockProfileData.*`,
  1090. `(base::)?SubmitMutexProfileData.*`,
  1091. `(base::)?SubmitSpinLockProfileData.*`,
  1092. `(base::Mutex::)?AwaitCommon.*`,
  1093. `(base::Mutex::)?Unlock.*`,
  1094. `(base::Mutex::)?UnlockSlow.*`,
  1095. `(base::Mutex::)?ReaderUnlock.*`,
  1096. `(base::MutexLock::)?~MutexLock.*`,
  1097. `(Mutex::)?AwaitCommon.*`,
  1098. `(Mutex::)?Unlock.*`,
  1099. `(Mutex::)?UnlockSlow.*`,
  1100. `(Mutex::)?ReaderUnlock.*`,
  1101. `(MutexLock::)?~MutexLock.*`,
  1102. `(SpinLock::)?Unlock.*`,
  1103. `(SpinLock::)?SlowUnlock.*`,
  1104. `(SpinLockHolder::)?~SpinLockHolder.*`,
  1105. }, `|`)