diff --git a/src/internal/profilerecord/profilerecord.go b/src/internal/profilerecord/profilerecord.go index a5efdced8f77c9..f6f7e21c3d8455 100644 --- a/src/internal/profilerecord/profilerecord.go +++ b/src/internal/profilerecord/profilerecord.go @@ -8,10 +8,16 @@ // TODO: Consider moving this to internal/runtime, see golang.org/issue/65355. package profilerecord +import "unsafe" + type StackRecord struct { Stack []uintptr } +func (r StackRecord) GetStack() []uintptr { return r.Stack } +func (r StackRecord) GetLabels() unsafe.Pointer { return nil } +func (r StackRecord) GetGoroutine() GoroutineRecord { return GoroutineRecord{} } + type MemProfileRecord struct { AllocBytes, FreeBytes int64 AllocObjects, FreeObjects int64 @@ -26,3 +32,18 @@ type BlockProfileRecord struct { Cycles int64 Stack []uintptr } + +type GoroutineRecord struct { + ID uint64 + State uint32 + WaitReason uint8 + CreatorID uint64 + CreationPC uintptr + WaitSince int64 // approx time when the g became blocked, in nanoseconds + Labels unsafe.Pointer + Stack []uintptr +} + +func (r GoroutineRecord) GetStack() []uintptr { return r.Stack } +func (r GoroutineRecord) GetLabels() unsafe.Pointer { return r.Labels } +func (r GoroutineRecord) GetGoroutine() GoroutineRecord { return r } diff --git a/src/net/http/pprof/pprof.go b/src/net/http/pprof/pprof.go index 71aade67d32046..91a59a65c7352e 100644 --- a/src/net/http/pprof/pprof.go +++ b/src/net/http/pprof/pprof.go @@ -264,7 +264,7 @@ func (name handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { runtime.GC() } debug, _ := strconv.Atoi(r.FormValue("debug")) - if debug != 0 { + if debug != 0 && !(name == "goroutine" && debug == 3) { w.Header().Set("Content-Type", "text/plain; charset=utf-8") } else { w.Header().Set("Content-Type", "application/octet-stream") diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index 19e90fbb337e55..f001c3e916e637 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -1245,40 +1245,21 @@ func pprof_threadCreateInternal(p []profilerecord.StackRecord) (n int, ok bool) }) } -//go:linkname pprof_goroutineProfileWithLabels -func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { - return goroutineProfileWithLabels(p, labels) +//go:linkname pprof_goroutineProfile +func pprof_goroutineProfile(p []profilerecord.GoroutineRecord) (n int, ok bool) { + return goroutineProfileInternal(p) } -// labels may be nil. If labels is non-nil, it must have the same length as p. -func goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { - if labels != nil && len(labels) != len(p) { - labels = nil - } - - return goroutineProfileWithLabelsConcurrent(p, labels) -} - -//go:linkname pprof_goroutineLeakProfileWithLabels -func pprof_goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { - return goroutineLeakProfileWithLabelsConcurrent(p, labels) -} - -// labels may be nil. If labels is non-nil, it must have the same length as p. -func goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { - if labels != nil && len(labels) != len(p) { - labels = nil - } - - return goroutineLeakProfileWithLabelsConcurrent(p, labels) +//go:linkname pprof_goroutineLeakProfile +func pprof_goroutineLeakProfile(p []profilerecord.GoroutineRecord) (n int, ok bool) { + return goroutineLeakProfileInternal(p) } var goroutineProfile = struct { sema uint32 active bool offset atomic.Int64 - records []profilerecord.StackRecord - labels []unsafe.Pointer + records []profilerecord.GoroutineRecord }{ sema: 1, } @@ -1316,18 +1297,18 @@ func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileSt return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new)) } -func goroutineLeakProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { +func goroutineLeakProfileInternal(p []profilerecord.GoroutineRecord) (n int, ok bool) { if len(p) == 0 { // An empty slice is obviously too small. Return a rough // allocation estimate. return work.goroutineLeak.count, false } - // Use the same semaphore as goroutineProfileWithLabelsConcurrent, + // Use the same semaphore as goroutineProfileInternal, // because ultimately we still use goroutine profiles. semacquire(&goroutineProfile.sema) - // Unlike in goroutineProfileWithLabelsConcurrent, we don't need to + // Unlike in goroutineProfileInternal, we don't need to // save the current goroutine stack, because it is obviously not leaked. pcbuf := makeProfStack() // see saveg() for explanation @@ -1358,7 +1339,7 @@ func goroutineLeakProfileWithLabelsConcurrent(p []profilerecord.StackRecord, lab return n, true } -func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { +func goroutineProfileInternal(p []profilerecord.GoroutineRecord) (n int, ok bool) { if len(p) == 0 { // An empty slice is obviously too small. Return a rough // allocation estimate without bothering to STW. As long as @@ -1401,9 +1382,15 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels systemstack(func() { saveg(pc, sp, ourg, &p[0], pcbuf) }) - if labels != nil { - labels[0] = ourg.labels - } + + p[0].ID = ourg.goid + p[0].CreatorID = ourg.parentGoid + p[0].CreationPC = ourg.gopc + p[0].Labels = ourg.labels + p[0].State = readgstatus(ourg) &^ _Gscan + p[0].WaitReason = uint8(ourg.waitreason) + p[0].WaitSince = ourg.waitsince + ourg.goroutineProfiled.Store(goroutineProfileSatisfied) goroutineProfile.offset.Store(1) @@ -1414,7 +1401,6 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels // field set to goroutineProfileSatisfied. goroutineProfile.active = true goroutineProfile.records = p - goroutineProfile.labels = labels startTheWorld(stw) // Visit each goroutine that existed as of the startTheWorld call above. @@ -1436,7 +1422,6 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels endOffset := goroutineProfile.offset.Swap(0) goroutineProfile.active = false goroutineProfile.records = nil - goroutineProfile.labels = nil startTheWorld(stw) // Restore the invariant that every goroutine struct in allgs has its @@ -1528,7 +1513,7 @@ func doRecordGoroutineProfile(gp1 *g, pcbuf []uintptr) { // System goroutines should not appear in the profile. // Check this here and not in tryRecordGoroutineProfile because isSystemGoroutine // may change on a goroutine while it is executing, so while the scheduler might - // see a system goroutine, goroutineProfileWithLabelsConcurrent might not, and + // see a system goroutine, goroutineProfileInternal might not, and // this inconsistency could cause invariants to be violated, such as trying to // record the stack of a running goroutine below. In short, we still want system // goroutines to participate in the same state machine on gp1.goroutineProfiled as @@ -1545,7 +1530,7 @@ func doRecordGoroutineProfile(gp1 *g, pcbuf []uintptr) { if offset >= len(goroutineProfile.records) { // Should be impossible, but better to return a truncated profile than // to crash the entire process at this point. Instead, deal with it in - // goroutineProfileWithLabelsConcurrent where we have more context. + // goroutineProfileInternal where we have more context. return } @@ -1559,12 +1544,16 @@ func doRecordGoroutineProfile(gp1 *g, pcbuf []uintptr) { // to avoid schedule delays. systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &goroutineProfile.records[offset], pcbuf) }) - if goroutineProfile.labels != nil { - goroutineProfile.labels[offset] = gp1.labels - } + goroutineProfile.records[offset].Labels = gp1.labels + goroutineProfile.records[offset].ID = gp1.goid + goroutineProfile.records[offset].CreatorID = gp1.parentGoid + goroutineProfile.records[offset].CreationPC = gp1.gopc + goroutineProfile.records[offset].State = readgstatus(gp1) &^ _Gscan + goroutineProfile.records[offset].WaitReason = uint8(gp1.waitreason) + goroutineProfile.records[offset].WaitSince = gp1.waitsince } -func goroutineProfileWithLabelsSync(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { +func goroutineProfileWithLabelsSync(p []profilerecord.GoroutineRecord, labels []unsafe.Pointer) (n int, ok bool) { gp := getg() isOK := func(gp1 *g) bool { @@ -1650,7 +1639,7 @@ func goroutineProfileWithLabelsSync(p []profilerecord.StackRecord, labels []unsa // Most clients should use the [runtime/pprof] package instead // of calling GoroutineProfile directly. func GoroutineProfile(p []StackRecord) (n int, ok bool) { - records := make([]profilerecord.StackRecord, len(p)) + records := make([]profilerecord.GoroutineRecord, len(p)) n, ok = goroutineProfileInternal(records) if !ok { return @@ -1662,11 +1651,7 @@ func GoroutineProfile(p []StackRecord) (n int, ok bool) { return } -func goroutineProfileInternal(p []profilerecord.StackRecord) (n int, ok bool) { - return goroutineProfileWithLabels(p, nil) -} - -func saveg(pc, sp uintptr, gp *g, r *profilerecord.StackRecord, pcbuf []uintptr) { +func saveg(pc, sp uintptr, gp *g, r *profilerecord.GoroutineRecord, pcbuf []uintptr) { // To reduce memory usage, we want to allocate a r.Stack that is just big // enough to hold gp's stack trace. Naively we might achieve this by // recording our stack trace into mp.profStack, and then allocating a diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go index b524e992b8b209..775620c96157f6 100644 --- a/src/runtime/pprof/pprof.go +++ b/src/runtime/pprof/pprof.go @@ -375,7 +375,9 @@ func (p *Profile) Remove(value any) { // The predefined profiles may assign meaning to other debug values; // for example, when printing the "goroutine" profile, debug=2 means to // print the goroutine stacks in the same form that a Go program uses -// when dying due to an unrecovered panic. +// when dying due to an unrecovered panic, and debug=3 means to output a +// binary protobuf profile identical to debug=0, but with per-goroutine +// details such as ID and status included as sample labels. func (p *Profile) WriteTo(w io.Writer, debug int) error { if p.name == "" { panic("pprof: use of zero Profile") @@ -402,7 +404,9 @@ type stackProfile [][]uintptr func (x stackProfile) Len() int { return len(x) } func (x stackProfile) Stack(i int) []uintptr { return x[i] } -func (x stackProfile) Label(i int) *labelMap { return nil } +func (x stackProfile) Goroutine(i int) profilerecord.GoroutineRecord { + return profilerecord.GoroutineRecord{} +} // A countProfile is a set of stack traces to be printed as counts // grouped by stack trace. There are multiple implementations: @@ -411,7 +415,7 @@ func (x stackProfile) Label(i int) *labelMap { return nil } type countProfile interface { Len() int Stack(i int) []uintptr - Label(i int) *labelMap + Goroutine(i int) profilerecord.GoroutineRecord } // expandInlinedFrames copies the call stack from pcs into dst, expanding any @@ -464,7 +468,50 @@ func printCountCycleProfile(w io.Writer, countName, cycleName string, records [] // printCountProfile prints a countProfile at the specified debug level. // The profile will be in compressed proto format unless debug is nonzero. +// Non-zero debug values have the following behaviors: +// - debug=1: text format, aggregated by unique stack, state and labels. +// - debug=2: handled elsewhere; the output of runtime.Stack(). +// - debug=3: binary proto format-identical to debug=0 with per-goroutine labels. func printCountProfile(w io.Writer, debug int, name string, p countProfile) error { + // Debug >1 is non-aggregated, so branch immediately to avoid computing the + // aggregated map if we do not need it. This does duplicate a little of the + // logic for actually constructing and outputting the proto profile but only + // the helper calls so it is minimal and avoids a more complex structuring of + // one output path that handles both the aggregated and non-aggregated cases. + if debug == 3 { + // Output profile in protobuf form. + b := newProfileBuilder(w) + b.pbValueType(tagProfile_PeriodType, name, "count") + b.pb.int64Opt(tagProfile_Period, 1) + b.pbValueType(tagProfile_SampleType, name, "count") + + values := []int64{1} + var locs []uint64 + for i := 0; i < p.Len(); i++ { + locs = b.appendLocsForStack(locs[:0], p.Stack(i)) + g := p.Goroutine(i) + extra := func() { + if g.ID != 0 { + b.pbLabelNum(tagSample_Label, "go::goroutine", int64(g.ID)) + if g.ID != 1 { + b.pbLabelNum(tagSample_Label, "go::goroutine_created_by", int64(g.CreatorID)) + } + b.pbLabel(tagSample_Label, "go::goroutine_state", pprof_gStatusString(g.State, g.WaitReason), 0) + if mins := pprof_gWaitFor(g.State, g.WaitSince); mins > 0 { + b.pbLabelNum(tagSample_Label, "go::goroutine_wait_minutes", int64(mins)) + } + } + if g.Labels != nil { + for _, lbl := range (*labelMap)(g.Labels).list { + b.pbLabel(tagSample_Label, lbl.key, lbl.value, 0) + } + } + } + b.pbSample(values, locs, extra) + } + return b.build() + } + // Build count of each stack. var buf strings.Builder key := func(stk []uintptr, lbls *labelMap) string { @@ -484,7 +531,7 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro var keys []string n := p.Len() for i := 0; i < n; i++ { - k := key(p.Stack(i), p.Label(i)) + k := key(p.Stack(i), (*labelMap)(p.Goroutine(i).Labels)) if count[k] == 0 { index[k] = i keys = append(keys, k) @@ -520,9 +567,9 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro locs = b.appendLocsForStack(locs[:0], p.Stack(index[k])) idx := index[k] var labels func() - if p.Label(idx) != nil { + if l := p.Goroutine(idx).Labels; l != nil { labels = func() { - for _, lbl := range p.Label(idx).list { + for _, lbl := range (*labelMap)(l).list { b.pbLabel(tagSample_Label, lbl.key, lbl.value, 0) } } @@ -742,7 +789,7 @@ func writeThreadCreate(w io.Writer, debug int) error { // Until https://golang.org/issues/6104 is addressed, wrap // ThreadCreateProfile because there's no point in tracking labels when we // don't get any stack-traces. - return writeRuntimeProfile(w, debug, "threadcreate", func(p []profilerecord.StackRecord, _ []unsafe.Pointer) (n int, ok bool) { + return writeRuntimeProfile(w, debug, "threadcreate", func(p []profilerecord.StackRecord) (n int, ok bool) { return pprof_threadCreateInternal(p) }) } @@ -754,10 +801,10 @@ func countGoroutine() int { // writeGoroutine writes the current runtime GoroutineProfile to w. func writeGoroutine(w io.Writer, debug int) error { - if debug >= 2 { + if debug == 2 { return writeGoroutineStacks(w) } - return writeRuntimeProfile(w, debug, "goroutine", pprof_goroutineProfileWithLabels) + return writeRuntimeProfile(w, debug, "goroutine", pprof_goroutineProfile) } // writeGoroutineLeak first invokes a GC cycle that performs goroutine leak detection. @@ -774,7 +821,7 @@ func writeGoroutineLeak(w io.Writer, debug int) error { } // Otherwise, write the goroutine leak profile. - return writeRuntimeProfile(w, debug, "goroutineleak", pprof_goroutineLeakProfileWithLabels) + return writeRuntimeProfile(w, debug, "goroutineleak", pprof_goroutineLeakProfile) } func writeGoroutineStacks(w io.Writer) error { @@ -798,24 +845,28 @@ func writeGoroutineStacks(w io.Writer) error { return err } -func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]profilerecord.StackRecord, []unsafe.Pointer) (int, bool)) error { +type capturedStack interface { + GetStack() []uintptr + GetLabels() unsafe.Pointer + GetGoroutine() profilerecord.GoroutineRecord +} + +func writeRuntimeProfile[T capturedStack](w io.Writer, debug int, name string, fetch func([]T) (int, bool)) error { // Find out how many records there are (fetch(nil)), // allocate that many records, and get the data. // There's a race—more records might be added between // the two calls—so allocate a few extra records for safety // and also try again if we're very unlucky. // The loop should only execute one iteration in the common case. - var p []profilerecord.StackRecord - var labels []unsafe.Pointer - n, ok := fetch(nil, nil) + var p []T + n, ok := fetch(nil) for { // Allocate room for a slightly bigger profile, // in case a few more entries have been added // since the call to ThreadProfile. - p = make([]profilerecord.StackRecord, n+10) - labels = make([]unsafe.Pointer, n+10) - n, ok = fetch(p, labels) + p = make([]T, n+10) + n, ok = fetch(p) if ok { p = p[0:n] break @@ -823,17 +874,14 @@ func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]profi // Profile grew; try again. } - return printCountProfile(w, debug, name, &runtimeProfile{p, labels}) + return printCountProfile(w, debug, name, runtimeProfile[T](p)) } -type runtimeProfile struct { - stk []profilerecord.StackRecord - labels []unsafe.Pointer -} +type runtimeProfile[T capturedStack] []T -func (p *runtimeProfile) Len() int { return len(p.stk) } -func (p *runtimeProfile) Stack(i int) []uintptr { return p.stk[i].Stack } -func (p *runtimeProfile) Label(i int) *labelMap { return (*labelMap)(p.labels[i]) } +func (p runtimeProfile[T]) Len() int { return len(p) } +func (p runtimeProfile[T]) Stack(i int) []uintptr { return p[i].GetStack() } +func (p runtimeProfile[T]) Goroutine(i int) profilerecord.GoroutineRecord { return p[i].GetGoroutine() } var cpu struct { sync.Mutex @@ -996,11 +1044,11 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu return b.Flush() } -//go:linkname pprof_goroutineProfileWithLabels runtime.pprof_goroutineProfileWithLabels -func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) +//go:linkname pprof_goroutineProfile runtime.pprof_goroutineProfile +func pprof_goroutineProfile(p []profilerecord.GoroutineRecord) (n int, ok bool) -//go:linkname pprof_goroutineLeakProfileWithLabels runtime.pprof_goroutineLeakProfileWithLabels -func pprof_goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) +//go:linkname pprof_goroutineLeakProfile runtime.pprof_goroutineLeakProfile +func pprof_goroutineLeakProfile(p []profilerecord.GoroutineRecord) (n int, ok bool) //go:linkname pprof_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond func pprof_cyclesPerSecond() int64 @@ -1022,3 +1070,9 @@ func pprof_fpunwindExpand(dst, src []uintptr) int //go:linkname pprof_makeProfStack runtime.pprof_makeProfStack func pprof_makeProfStack() []uintptr + +//go:linkname pprof_gStatusString runtime.pprof_gStatusString +func pprof_gStatusString(state uint32, reason uint8) string + +//go:linkname pprof_gWaitFor runtime.pprof_gWaitFor +func pprof_gWaitFor(gpstatus uint32, waitsince int64) int64 diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go index b816833e52285f..69daad207b5985 100644 --- a/src/runtime/pprof/pprof_test.go +++ b/src/runtime/pprof/pprof_test.go @@ -1571,7 +1571,7 @@ func TestGoroutineProfileConcurrency(t *testing.T) { goroutineProf := Lookup("goroutine") profilerCalls := func(s string) int { - return strings.Count(s, "\truntime/pprof.runtime_goroutineProfileWithLabels+") + return strings.Count(s, "\truntime/pprof.runtime_goroutineProfile+") } includesFinalizerOrCleanup := func(s string) bool { @@ -1931,6 +1931,195 @@ func BenchmarkGoroutine(b *testing.B) { } } +func TestGoroutineProfileDebug3Creators(t *testing.T) { + // Create synthetic goroutines using the helper + cleanup := createSyntheticGoroutines(10, 2) + defer cleanup() + + p := Lookup("goroutine") + + // Establish a created-by mapping by parsing a debug=2 profile. + createdBy := make(map[string]string) + var buf bytes.Buffer + err := p.WriteTo(&buf, 2) + if err != nil { + t.Fatal(err) + } + for _, goroutine := range strings.Split(buf.String(), "\n\n") { + parts := strings.SplitN(goroutine, " ", 3) + for _, l := range strings.Split(parts[2], "\n") { + if c := strings.Split(l, "in goroutine "); len(c) > 1 { + createdBy[parts[1]] = c[1] + } + } + } + + // Verify the debug=3 format of said profile has matching ID and creator labels. + buf.Reset() + err = p.WriteTo(&buf, 3) + if err != nil { + t.Fatal(err) + } + parsed, err := profile.Parse(&buf) + if err != nil { + t.Fatalf("failed to parse profile: %v", err) + } + if e, g := len(createdBy), len(parsed.Sample); e > g { + t.Fatalf("expected %d goroutines, got %d", e, g) + } + for _, s := range parsed.Sample { + id := strconv.Itoa(int(s.NumLabel["go::goroutine"][0])) + got := s.NumLabel["go::goroutine_created_by"] + if createdBy[id] == "" { + if len(got) != 0 { + t.Fatalf("goroutine %s: got created_by %q, want none", id, got) + } + } else { + if e := createdBy[id]; len(got) != 1 || strconv.Itoa(int(got[0])) != e { + t.Fatalf("goroutine %s: got created_by %q, want %q", id, got, e) + } + } + } +} + +// BenchmarkGoroutineProfileLatencyImpact measures the latency impact on *other* +// running goroutines during goroutine profile collection, when variable numbers +// of other goroutines are profiled. This focus on the observed latency of in +// other goroutines is what differentiates this benchmark from other benchmarks +// of goroutine profiling, such as BenchmarkGoroutine, that measure the +// the performance of profiling itself rather than its impact on the performance +// of concurrently executing goroutines. +func BenchmarkGoroutineProfileLatencyImpact(b *testing.B) { + for _, numGoroutines := range []int{100, 1000, 10000} { + for _, withDepth := range []int{3, 10, 50} { + b.Run(fmt.Sprintf("goroutines=%dx%d", numGoroutines, withDepth), func(b *testing.B) { + // Setup synthetic blocked goroutines using helper + cleanup := createSyntheticGoroutines(numGoroutines, withDepth) + defer cleanup() + for _, debug := range []int{-1, 1, 2, 3} { + name := fmt.Sprintf("debug=%d", debug) + if debug < 0 { + name = "debug=none" + } + b.Run(name, func(b *testing.B) { + // Launch latency probe goroutines. + const numProbes = 3 + probeStop := make(chan struct{}) + var probeWg, probesReady sync.WaitGroup + probes := make([]time.Duration, numProbes) + for prober := 0; prober < numProbes; prober++ { + probeWg.Add(1) + probesReady.Add(1) + go func(idx int) { + defer probeWg.Done() + probesReady.Done() + for { + select { + case <-probeStop: + return + default: + last := time.Now() + for i := 0; i < 20; i++ { + latency := time.Since(last) + last = time.Now() + if probes[idx] < latency { + probes[idx] = latency + } + } + } + } + }(prober) + } + probesReady.Wait() + b.ResetTimer() + for i := 0; i < b.N; i++ { + if debug >= 0 { + time.Sleep(3 * time.Millisecond) + var buf bytes.Buffer + err := Lookup("goroutine").WriteTo(&buf, debug) + if err != nil { + b.Fatalf("Profile failed: %v", err) + } + time.Sleep(3 * time.Millisecond) + } else { + // Just observe the probes without a profile interrupting them. + time.Sleep(6 * time.Millisecond) + } + } + b.StopTimer() + close(probeStop) + probeWg.Wait() + + var probeMax time.Duration + for _, probe := range probes { + if probe > probeMax { + probeMax = probe + } + } + b.ReportMetric(float64(probeMax), "max_latency_ns") + }) + } + }) + } + } +} + +// createSyntheticGoroutines creates a requested number of goroutines to provide +// testdata against which various goroutine profiling mechanisms such as +// pprof.Lookup("goroutine").WriteTo(debug=1/2) can be tested or benchmarked. +// After the requested goroutines are started it returns a cleanup function to +// stop them. The created goroutines vary their stacks across several sythnetic +// functions, with a minimum stack depth controlled by minDepth. +func createSyntheticGoroutines(numGoroutines, minDepth int) func() { + var syntheticFn0, syntheticFn1, syntheticFn2 func(<-chan struct{}, int) + + syntheticFn0 = func(ch <-chan struct{}, d int) { + if d > 0 { + syntheticFn1(ch, d-1) + } + <-ch + } + syntheticFn1 = func(ch <-chan struct{}, d int) { + if d > 0 { + syntheticFn2(ch, d-1) + } + <-ch + } + syntheticFn2 = func(ch <-chan struct{}, d int) { + if d > 0 { + syntheticFn0(ch, d-1) + } + <-ch + } + + var wg, setup sync.WaitGroup + setup.Add(numGoroutines) + blockCh := make(chan struct{}) + + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + setup.Done() + depth := minDepth + (id % 7) + switch id % 4 { + case 0: + syntheticFn0(blockCh, depth) + case 1: + syntheticFn1(blockCh, depth) + case 2: + syntheticFn2(blockCh, depth) + } + }(i) + } + setup.Wait() + + return func() { + close(blockCh) + wg.Wait() + } +} + var emptyCallStackTestRun int64 // Issue 18836. diff --git a/src/runtime/pprof/proto.go b/src/runtime/pprof/proto.go index 28ceb81542110a..1a38de97388d25 100644 --- a/src/runtime/pprof/proto.go +++ b/src/runtime/pprof/proto.go @@ -177,6 +177,14 @@ func (b *profileBuilder) pbLabel(tag int, key, str string, num int64) { b.pb.endMessage(tag, start) } +// pbLabel encodes a Label message to b.pb, with only a num component. +func (b *profileBuilder) pbLabelNum(tag int, key string, num int64) { + start := b.pb.startMessage() + b.pb.int64Opt(tagLabel_Key, b.stringIndex(key)) + b.pb.int64Opt(tagLabel_Num, num) + b.pb.endMessage(tag, start) +} + // pbLine encodes a Line message to b.pb. func (b *profileBuilder) pbLine(tag int, funcID uint64, line int64) { start := b.pb.startMessage() diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 36949fb2cf0681..0788fcb7389d4a 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -4803,7 +4803,6 @@ func exitsyscall() { throw("exitsyscall: syscall frame is no longer valid") } - gp.waitsince = 0 oldp := gp.m.oldp.ptr() gp.m.oldp = 0 if exitsyscallfast(oldp) { @@ -4817,6 +4816,7 @@ func exitsyscall() { tryRecordGoroutineProfileWB(gp) }) } + gp.waitsince = 0 trace := traceAcquire() if trace.ok() { lostP := oldp != gp.m.p.ptr() || gp.m.syscalltick != gp.m.p.ptr().syscalltick @@ -4878,6 +4878,10 @@ func exitsyscall() { gp.syscallsp = 0 gp.m.p.ptr().syscalltick++ gp.throwsplit = false + + // NB: we do not reset gp.waitsince in this branch as we have not yet checked + // if we need to collect a profile before resuming; we leave resetting it to + // execute(). } //go:nosplit diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index de62762ba76906..3aad8bf31f6bbd 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -1219,24 +1219,8 @@ func goroutineheader(gp *g) { isScan := gpstatus&_Gscan != 0 gpstatus &^= _Gscan // drop the scan bit - // Basic string status - var status string - if 0 <= gpstatus && gpstatus < uint32(len(gStatusStrings)) { - status = gStatusStrings[gpstatus] - } else { - status = "???" - } - - // Override. - if (gpstatus == _Gwaiting || gpstatus == _Gleaked) && gp.waitreason != waitReasonZero { - status = gp.waitreason.String() - } - // approx time the G is blocked, in minutes - var waitfor int64 - if (gpstatus == _Gwaiting || gpstatus == _Gsyscall) && gp.waitsince != 0 { - waitfor = (nanotime() - gp.waitsince) / 60e9 - } + print("goroutine ", gp.goid) if gp.m != nil && gp.m.throwing >= throwTypeRuntime && gp == gp.m.curg || level >= 2 { print(" gp=", gp) @@ -1246,6 +1230,7 @@ func goroutineheader(gp *g) { print(" m=nil") } } + status := gStatusString(gpstatus, gp.waitreason) print(" [", status) if gpstatus == _Gleaked { print(" (leaked)") @@ -1261,7 +1246,7 @@ func goroutineheader(gp *g) { // suffix to distinguish it from the non-durable form, add it here. print(" (durable)") } - if waitfor >= 1 { + if waitfor := gWaitFor(gpstatus, gp.waitsince); waitfor >= 1 { print(", ", waitfor, " minutes") } if gp.lockedm != 0 { @@ -1273,6 +1258,38 @@ func goroutineheader(gp *g) { print("]:\n") } +//go:linkname pprof_gWaitFor +func pprof_gWaitFor(gpstatus uint32, waitsince int64) int64 { + return gWaitFor(gpstatus, waitsince) +} + +//go:linkname pprof_gStatusString +func pprof_gStatusString(gpstatus uint32, reason waitReason) string { + return gStatusString(gpstatus, reason) +} + +// gStatusString returns a string representing the status of a goroutine. +func gStatusString(gpstatus uint32, reason waitReason) string { + var status string + if gpstatus < uint32(len(gStatusStrings)) { + status = gStatusStrings[gpstatus] + } else { + status = "???" + } + if (gpstatus == _Gwaiting || gpstatus == _Gleaked) && reason != waitReasonZero { + status = reason.String() + } + return status +} + +// gWaitFor returns the number of minutes that the goroutine has been waiting. +func gWaitFor(gpstatus uint32, waitsince int64) int64 { + if (gpstatus == _Gwaiting || gpstatus == _Gsyscall) && waitsince != 0 { + return (nanotime() - waitsince) / 60e9 + } + return 0 +} + func tracebackothers(me *g) { tracebacksomeothers(me, func(*g) bool { return true }) }