diff --git a/compute/metadata/metadata.go b/compute/metadata/metadata.go index 545bd9d379c..646950811c2 100644 --- a/compute/metadata/metadata.go +++ b/compute/metadata/metadata.go @@ -32,6 +32,8 @@ import ( "strings" "sync" "time" + + "github.com/googleapis/gax-go/v2" ) const ( @@ -282,6 +284,7 @@ func NewClient(c *http.Client) *Client { // getETag returns a value from the metadata service as well as the associated ETag. // This func is otherwise equivalent to Get. func (c *Client) getETag(suffix string) (value, etag string, err error) { + ctx := context.TODO() // Using a fixed IP makes it very difficult to spoof the metadata service in // a container, which is an important use-case for local testing of cloud // deployments. To enable spoofing of the metadata service, the environment @@ -304,8 +307,20 @@ func (c *Client) getETag(suffix string) (value, etag string, err error) { } req.Header.Set("Metadata-Flavor", "Google") req.Header.Set("User-Agent", userAgent) - res, err := c.hc.Do(req) - if err != nil { + var res *http.Response + retryer := newRetryer() + for { + var err error + res, err = c.hc.Do(req) + if err == nil { + break + } + if delay, shouldRetry := retryer.Retry(res.StatusCode, err); shouldRetry { + if err := gax.Sleep(ctx, delay); err != nil { + return "", "", err + } + continue + } return "", "", err } defer res.Body.Close() diff --git a/compute/metadata/retry.go b/compute/metadata/retry.go new file mode 100644 index 00000000000..e290452234e --- /dev/null +++ b/compute/metadata/retry.go @@ -0,0 +1,77 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metadata + +import ( + "io" + "time" + + "github.com/googleapis/gax-go/v2" +) + +const ( + maxRetryAttempts = 5 +) + +var ( + syscallRetryable = func(err error) bool { return false } +) + +func newRetryer() *metadataRetryer { + return &metadataRetryer{bo: &gax.Backoff{Initial: 100 * time.Millisecond}} +} + +type backoff interface { + Pause() time.Duration +} + +type metadataRetryer struct { + bo backoff + attempts int +} + +func (r *metadataRetryer) Retry(status int, err error) (time.Duration, bool) { + retryOk := shouldRetry(status, err) + if !retryOk { + return 0, false + } + if r.attempts == maxRetryAttempts { + return 0, false + } + r.attempts++ + return r.bo.Pause(), true +} + +func shouldRetry(status int, err error) bool { + if 500 <= status && status <= 599 { + return true + } + if err == io.ErrUnexpectedEOF { + return true + } + // Transient network errors should be retried. + if syscallRetryable(err) { + return true + } + if err, ok := err.(interface{ Temporary() bool }); ok { + if err.Temporary() { + return true + } + } + if err, ok := err.(interface{ Unwrap() error }); ok { + return shouldRetry(status, err.Unwrap()) + } + return false +} diff --git a/compute/metadata/retry_linux.go b/compute/metadata/retry_linux.go new file mode 100644 index 00000000000..6e90cb85ba7 --- /dev/null +++ b/compute/metadata/retry_linux.go @@ -0,0 +1,25 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package metadata + +import "syscall" + +func init() { + // Initialize syscallRetryable to return true on transient socket-level + // errors. These errors are specific to Linux. + syscallRetryable = func(err error) bool { return err == syscall.ECONNRESET || err == syscall.ECONNREFUSED } +} diff --git a/compute/metadata/retry_linux_test.go b/compute/metadata/retry_linux_test.go new file mode 100644 index 00000000000..a046c697357 --- /dev/null +++ b/compute/metadata/retry_linux_test.go @@ -0,0 +1,39 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package metadata + +import ( + "syscall" + "testing" +) + +func TestMetadataRetryerLinux(t *testing.T) { + retryer := metadataRetryer{bo: constantBackoff{}} + + t.Run("retry on syscall.ECONNRESET", func(t *testing.T) { + _, shouldRetry := retryer.Retry(400, syscall.ECONNRESET) + if !shouldRetry { + t.Fatal("retryer.Retry(400, syscall.ECONNRESET) = false, want true") + } + }) + t.Run("retry on syscall.ECONNREFUSED", func(t *testing.T) { + _, shouldRetry := retryer.Retry(400, syscall.ECONNREFUSED) + if !shouldRetry { + t.Fatal("retryer.Retry(400, syscall.ECONNREFUSED) = false, want true") + } + }) +} diff --git a/compute/metadata/retry_test.go b/compute/metadata/retry_test.go new file mode 100644 index 00000000000..5908ea07933 --- /dev/null +++ b/compute/metadata/retry_test.go @@ -0,0 +1,120 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metadata + +import ( + "io" + "testing" + "time" +) + +type constantBackoff struct{} + +func (b constantBackoff) Pause() time.Duration { return 100 } + +type errTemp struct{} + +func (e errTemp) Error() string { return "temporary error" } + +func (e errTemp) Temporary() bool { return true } + +type errWrapped struct { + e error +} + +func (e errWrapped) Error() string { return "unwrap me to get more context" } + +func (e errWrapped) Unwrap() error { return e.e } + +func TestMetadataRetryer(t *testing.T) { + tests := []struct { + name string + code int + err error + wantDelay time.Duration + wantShouldRetry bool + }{ + { + name: "retry on 500", + code: 500, + wantDelay: 100, + wantShouldRetry: true, + }, + { + name: "don't retry on 400", + code: 400, + err: io.EOF, + wantDelay: 0, + wantShouldRetry: false, + }, + { + name: "retry on io.ErrUnexpectedEOF", + code: 400, + err: io.ErrUnexpectedEOF, + wantDelay: 100, + wantShouldRetry: true, + }, + { + name: "retry on temporary error", + code: 400, + err: errTemp{}, + wantDelay: 100, + wantShouldRetry: true, + }, + { + name: "retry on wrapped temporary error", + code: 400, + err: errWrapped{errTemp{}}, + wantDelay: 100, + wantShouldRetry: true, + }, + { + name: "don't retry on wrapped io.EOF", + code: 400, + err: errWrapped{io.EOF}, + wantDelay: 0, + wantShouldRetry: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + retryer := metadataRetryer{bo: constantBackoff{}} + delay, shouldRetry := retryer.Retry(tc.code, tc.err) + if delay != tc.wantDelay { + t.Fatalf("retryer.Retry(%v, %v) = %v, want %v", tc.code, tc.err, delay, tc.wantDelay) + } + if shouldRetry != tc.wantShouldRetry { + t.Fatalf("retryer.Retry(%v, %v) = %v, want %v", tc.code, tc.err, shouldRetry, tc.wantShouldRetry) + } + }) + } +} + +func TestMetadataRetryerAttempts(t *testing.T) { + retryer := metadataRetryer{bo: constantBackoff{}} + for i := 1; i <= 6; i++ { + _, shouldRetry := retryer.Retry(500, nil) + if i == 6 { + if shouldRetry { + t.Fatal("an error should only be retried 5 times") + } + break + } + if !shouldRetry { + t.Fatalf("retryer.Retry(500, nil) = false, want true") + } + } +}