From 60260c35ba5e3a51e5f340e9ceb551c741979479 Mon Sep 17 00:00:00 2001 From: Kalyana Chadalavada Date: Thu, 10 Sep 2020 02:12:45 -0700 Subject: [PATCH 1/5] fix(profiler): do not collect disabled profile types Fixes #2835 --- profiler/profiler.go | 13 +++++++++++++ profiler/profiler_test.go | 17 +++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/profiler/profiler.go b/profiler/profiler.go index 6e3e176752b..fdcfc2dd60f 100644 --- a/profiler/profiler.go +++ b/profiler/profiler.go @@ -353,6 +353,19 @@ func (a *agent) profileAndUpload(ctx context.Context, p *pb.Profile) { var prof bytes.Buffer pt := p.GetProfileType() + ptEnabled := false + for _, enabled := range a.profileTypes { + if enabled == pt { + ptEnabled = true + break + } + } + + if !ptEnabled { + debugLog("skipping collection of disabled profile type: %v", pt) + return + } + switch pt { case pb.ProfileType_CPU: duration, err := ptypes.Duration(p.Duration) diff --git a/profiler/profiler_test.go b/profiler/profiler_test.go index eeb709cbbb4..097640b0cc1 100644 --- a/profiler/profiler_test.go +++ b/profiler/profiler_test.go @@ -76,7 +76,7 @@ func createTestAgent(psc pb.ProfilerServiceClient) *agent { client: psc, deployment: createTestDeployment(), profileLabels: map[string]string{instanceLabel: testInstance}, - profileTypes: []pb.ProfileType{pb.ProfileType_CPU, pb.ProfileType_HEAP, pb.ProfileType_THREADS}, + profileTypes: []pb.ProfileType{pb.ProfileType_CPU, pb.ProfileType_HEAP, pb.ProfileType_HEAP_ALLOC, pb.ProfileType_THREADS}, } } @@ -140,11 +140,12 @@ func TestProfileAndUpload(t *testing.T) { errFunc := func(io.Writer) error { return errors.New("") } testDuration := time.Second * 5 tests := []struct { - profileType pb.ProfileType - duration *time.Duration - startCPUProfileFunc func(io.Writer) error - writeHeapProfileFunc func(io.Writer) error - wantBytes []byte + profileType pb.ProfileType + duration *time.Duration + startCPUProfileFunc func(io.Writer) error + writeHeapProfileFunc func(io.Writer) error + deltaMutexProfileFunc func(io.Writer) error + wantBytes []byte }{ { profileType: pb.ProfileType_CPU, @@ -218,6 +219,10 @@ func TestProfileAndUpload(t *testing.T) { return nil }, }, + { + profileType: pb.ProfileType_CONTENTION, + deltaMutexProfileFunc: errFunc, + }, } for _, tt := range tests { From d77d11a21ec2bd423453cd16540df3e0344d573b Mon Sep 17 00:00:00 2001 From: Kalyana Chadalavada Date: Fri, 11 Sep 2020 18:25:28 -0700 Subject: [PATCH 2/5] feat(profiler): Add OOM Profiling capabilty --- profiler/profiler.go | 119 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/profiler/profiler.go b/profiler/profiler.go index fdcfc2dd60f..54f2a98669d 100644 --- a/profiler/profiler.go +++ b/profiler/profiler.go @@ -35,6 +35,7 @@ package profiler import ( + "bufio" "bytes" "context" "errors" @@ -44,6 +45,7 @@ import ( "regexp" "runtime" "runtime/pprof" + "strconv" "sync" "time" @@ -78,6 +80,7 @@ var ( dialGRPC = gtransport.DialPool onGCE = gcemd.OnGCE serviceRegexp = regexp.MustCompile(`^[a-z]([-a-z0-9_.]{0,253}[a-z0-9])?$`) + mutexHeap sync.Mutex // For testing only. // When the profiling loop has exited without error and this channel is @@ -99,6 +102,9 @@ const ( maxBackoff = time.Hour backoffMultiplier = 1.3 // Backoff envelope increases by this factor on each retry. retryInfoMetadata = "google.rpc.retryinfo-bin" + memLimitFile = "/sys/fs/cgroup/memory/memory.limit_in_bytes" + memUsedFile = "/sys/fs/cgroup/memory/memory.usage_in_bytes" + eventLabel = "event" ) // Config is the profiler configuration. @@ -183,6 +189,19 @@ type Config struct { // the profile collection loop exits.When numProfiles is 0, profiles will // be collected for the duration of the program. For testing only. numProfiles int + + // When true, the agent collects a heap profile when memory usage hits a + // thresholds specified in OOMProfileThreshold. Used only when + // running in containers. + OOMProfile bool + + // OOMProfileThreshold sets thresholds for memory utilization, above which + // a heap profile is collected. Used only when running in containers. + OOMProfileThreshold float32 + + // OOMCheckInterval is the time interval between two consecutive memory + // utilization checks. + OOMCheckInterval time.Duration } // allowUntilSuccess is an object that will perform action till @@ -256,6 +275,7 @@ func start(cfg Config, options ...option.ClientOption) error { return err } go pollProfilerService(withXGoogHeader(ctx), a) + go pollMemoryUtilization(ctx, a) return nil } @@ -380,6 +400,8 @@ func (a *agent) profileAndUpload(ctx context.Context, p *pb.Profile) { sleep(ctx, duration) stopCPUProfile() case pb.ProfileType_HEAP: + mutexHeap.Lock() + defer mutexHeap.Unlock() if err := heapProfile(&prof); err != nil { debugLog("failed to write heap profile: %v", err) return @@ -594,6 +616,11 @@ func initializeConfig(cfg Config) error { if config.APIAddr == "" { config.APIAddr = apiAddress } + + if config.OOMProfileThreshold == 0 { + config.OOMProfileThreshold = 0.98 + } + return nil } @@ -612,3 +639,95 @@ func pollProfilerService(ctx context.Context, a *agent) { profilingDone <- true } } + +func readValueFromFile(ctx context.Context, file string) (int64, error) { + fh, err := os.Open(file) + if err != nil { + return 0, fmt.Errorf("failed to read file: %v", err) + } + defer fh.Close() + + fscanner := bufio.NewScanner(fh) + + var value int64 + for fscanner.Scan() { + value, err = strconv.ParseInt(fscanner.Text(), 10, 64) + if err != nil { + return 0, fmt.Errorf("failed to read number from file: %v", err) + } + break + } + return value, nil +} + +func calculateMemoryUtilization(ctx context.Context) (float32, error) { + memLimitBytes, err := readValueFromFile(ctx, memLimitFile) + if err != nil { + return 0, fmt.Errorf("failed to read value from file: %v", err) + } + + memUsedBytes, err := readValueFromFile(ctx, memUsedFile) + if err != nil { + return 0, fmt.Errorf("failed to read value from file: %v", err) + } + return float32(float64(memUsedBytes) / float64(memLimitBytes)), nil +} + +// pollMemoryUtilization periodically calculates memory utilization. When the +// utilization crosses OOMProfileThreshold, a heap profile is collected and +// uploaded. +func pollMemoryUtilization(ctx context.Context, a *agent) { + // Check HEAP profile is not disabled. + heapEnabled := false + for _, enabled := range a.profileTypes { + if enabled == pb.ProfileType_HEAP { + heapEnabled = true + break + } + } + if !heapEnabled { + debugLog("HEAP profile collection is disabled. OOM profile collection will be disabled") + return + } + + // Start ticker + tick := time.NewTicker(config.OOMCheckInterval) + var ( + utilization float32 + err error + prof bytes.Buffer + ) + for range tick.C { + utilization, err = calculateMemoryUtilization(ctx) + if err != nil { + debugLog("failed to calculate memory utilization: %v", err) + } + if utilization >= config.OOMProfileThreshold { + mutexHeap.Lock() + if err := heapProfile(&prof); err != nil { + debugLog("failed to write OOM heap profile: %v", err) + } + mutexHeap.Unlock() + req := pb.CreateOfflineProfileRequest{ + Parent: "projects/" + a.deployment.ProjectId, + Profile: &pb.Profile{ + ProfileType: pb.ProfileType_HEAP, + Deployment: &pb.Deployment{ + ProjectId: a.deployment.ProjectId, + Target: a.deployment.Target + "-event", + Labels: a.deployment.Labels, + }, + ProfileBytes: prof.Bytes(), + Labels: map[string]string{ + eventLabel: "oom", + }, + }, + } + p, err := a.client.CreateOfflineProfile(ctx, &req) + if err != nil { + debugLog("failed to upload event profile: %v", err) + } + debugLog("uploaded event profile: %s", p.Name) + } + } +} From 57460798306423cfa7351a76606427d6226c864d Mon Sep 17 00:00:00 2001 From: Kalyana Chadalavada Date: Fri, 11 Sep 2020 19:49:11 -0700 Subject: [PATCH 3/5] Add default value for OOMCheckInterval --- profiler/profiler.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/profiler/profiler.go b/profiler/profiler.go index 54f2a98669d..283dfde2e44 100644 --- a/profiler/profiler.go +++ b/profiler/profiler.go @@ -621,6 +621,10 @@ func initializeConfig(cfg Config) error { config.OOMProfileThreshold = 0.98 } + if config.OOMCheckInterval == 0 { + config.OOMCheckInterval = time.Duration(30) * time.Second + } + return nil } @@ -689,6 +693,7 @@ func pollMemoryUtilization(ctx context.Context, a *agent) { debugLog("HEAP profile collection is disabled. OOM profile collection will be disabled") return } + debugLog("OOM profiling enabled") // Start ticker tick := time.NewTicker(config.OOMCheckInterval) From ed3d121a1cb7945139fe64816f7708b1b0844e29 Mon Sep 17 00:00:00 2001 From: Kalyana Chadalavada Date: Fri, 11 Sep 2020 20:46:20 -0700 Subject: [PATCH 4/5] Revert "Add default value for OOMCheckInterval" This reverts commit 57460798306423cfa7351a76606427d6226c864d. --- profiler/profiler.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/profiler/profiler.go b/profiler/profiler.go index 283dfde2e44..54f2a98669d 100644 --- a/profiler/profiler.go +++ b/profiler/profiler.go @@ -621,10 +621,6 @@ func initializeConfig(cfg Config) error { config.OOMProfileThreshold = 0.98 } - if config.OOMCheckInterval == 0 { - config.OOMCheckInterval = time.Duration(30) * time.Second - } - return nil } @@ -693,7 +689,6 @@ func pollMemoryUtilization(ctx context.Context, a *agent) { debugLog("HEAP profile collection is disabled. OOM profile collection will be disabled") return } - debugLog("OOM profiling enabled") // Start ticker tick := time.NewTicker(config.OOMCheckInterval) From a0968dc29552cbfe0cf56f3309e30ece4015a37b Mon Sep 17 00:00:00 2001 From: Kalyana Chadalavada Date: Fri, 11 Sep 2020 20:46:36 -0700 Subject: [PATCH 5/5] Revert "feat(profiler): Add OOM Profiling capabilty" This reverts commit d77d11a21ec2bd423453cd16540df3e0344d573b. --- profiler/profiler.go | 119 ------------------------------------------- 1 file changed, 119 deletions(-) diff --git a/profiler/profiler.go b/profiler/profiler.go index 54f2a98669d..fdcfc2dd60f 100644 --- a/profiler/profiler.go +++ b/profiler/profiler.go @@ -35,7 +35,6 @@ package profiler import ( - "bufio" "bytes" "context" "errors" @@ -45,7 +44,6 @@ import ( "regexp" "runtime" "runtime/pprof" - "strconv" "sync" "time" @@ -80,7 +78,6 @@ var ( dialGRPC = gtransport.DialPool onGCE = gcemd.OnGCE serviceRegexp = regexp.MustCompile(`^[a-z]([-a-z0-9_.]{0,253}[a-z0-9])?$`) - mutexHeap sync.Mutex // For testing only. // When the profiling loop has exited without error and this channel is @@ -102,9 +99,6 @@ const ( maxBackoff = time.Hour backoffMultiplier = 1.3 // Backoff envelope increases by this factor on each retry. retryInfoMetadata = "google.rpc.retryinfo-bin" - memLimitFile = "/sys/fs/cgroup/memory/memory.limit_in_bytes" - memUsedFile = "/sys/fs/cgroup/memory/memory.usage_in_bytes" - eventLabel = "event" ) // Config is the profiler configuration. @@ -189,19 +183,6 @@ type Config struct { // the profile collection loop exits.When numProfiles is 0, profiles will // be collected for the duration of the program. For testing only. numProfiles int - - // When true, the agent collects a heap profile when memory usage hits a - // thresholds specified in OOMProfileThreshold. Used only when - // running in containers. - OOMProfile bool - - // OOMProfileThreshold sets thresholds for memory utilization, above which - // a heap profile is collected. Used only when running in containers. - OOMProfileThreshold float32 - - // OOMCheckInterval is the time interval between two consecutive memory - // utilization checks. - OOMCheckInterval time.Duration } // allowUntilSuccess is an object that will perform action till @@ -275,7 +256,6 @@ func start(cfg Config, options ...option.ClientOption) error { return err } go pollProfilerService(withXGoogHeader(ctx), a) - go pollMemoryUtilization(ctx, a) return nil } @@ -400,8 +380,6 @@ func (a *agent) profileAndUpload(ctx context.Context, p *pb.Profile) { sleep(ctx, duration) stopCPUProfile() case pb.ProfileType_HEAP: - mutexHeap.Lock() - defer mutexHeap.Unlock() if err := heapProfile(&prof); err != nil { debugLog("failed to write heap profile: %v", err) return @@ -616,11 +594,6 @@ func initializeConfig(cfg Config) error { if config.APIAddr == "" { config.APIAddr = apiAddress } - - if config.OOMProfileThreshold == 0 { - config.OOMProfileThreshold = 0.98 - } - return nil } @@ -639,95 +612,3 @@ func pollProfilerService(ctx context.Context, a *agent) { profilingDone <- true } } - -func readValueFromFile(ctx context.Context, file string) (int64, error) { - fh, err := os.Open(file) - if err != nil { - return 0, fmt.Errorf("failed to read file: %v", err) - } - defer fh.Close() - - fscanner := bufio.NewScanner(fh) - - var value int64 - for fscanner.Scan() { - value, err = strconv.ParseInt(fscanner.Text(), 10, 64) - if err != nil { - return 0, fmt.Errorf("failed to read number from file: %v", err) - } - break - } - return value, nil -} - -func calculateMemoryUtilization(ctx context.Context) (float32, error) { - memLimitBytes, err := readValueFromFile(ctx, memLimitFile) - if err != nil { - return 0, fmt.Errorf("failed to read value from file: %v", err) - } - - memUsedBytes, err := readValueFromFile(ctx, memUsedFile) - if err != nil { - return 0, fmt.Errorf("failed to read value from file: %v", err) - } - return float32(float64(memUsedBytes) / float64(memLimitBytes)), nil -} - -// pollMemoryUtilization periodically calculates memory utilization. When the -// utilization crosses OOMProfileThreshold, a heap profile is collected and -// uploaded. -func pollMemoryUtilization(ctx context.Context, a *agent) { - // Check HEAP profile is not disabled. - heapEnabled := false - for _, enabled := range a.profileTypes { - if enabled == pb.ProfileType_HEAP { - heapEnabled = true - break - } - } - if !heapEnabled { - debugLog("HEAP profile collection is disabled. OOM profile collection will be disabled") - return - } - - // Start ticker - tick := time.NewTicker(config.OOMCheckInterval) - var ( - utilization float32 - err error - prof bytes.Buffer - ) - for range tick.C { - utilization, err = calculateMemoryUtilization(ctx) - if err != nil { - debugLog("failed to calculate memory utilization: %v", err) - } - if utilization >= config.OOMProfileThreshold { - mutexHeap.Lock() - if err := heapProfile(&prof); err != nil { - debugLog("failed to write OOM heap profile: %v", err) - } - mutexHeap.Unlock() - req := pb.CreateOfflineProfileRequest{ - Parent: "projects/" + a.deployment.ProjectId, - Profile: &pb.Profile{ - ProfileType: pb.ProfileType_HEAP, - Deployment: &pb.Deployment{ - ProjectId: a.deployment.ProjectId, - Target: a.deployment.Target + "-event", - Labels: a.deployment.Labels, - }, - ProfileBytes: prof.Bytes(), - Labels: map[string]string{ - eventLabel: "oom", - }, - }, - } - p, err := a.client.CreateOfflineProfile(ctx, &req) - if err != nil { - debugLog("failed to upload event profile: %v", err) - } - debugLog("uploaded event profile: %s", p.Name) - } - } -}