From 1b7b8ea4457a9c8e66383b58403801f9b1f70faa Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Thu, 18 Aug 2022 20:54:48 +0800 Subject: [PATCH 1/2] retry on goaway. --- pkg/errorutil/ignore.go | 7 ++++++- pkg/errorutil/ignore_test.go | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pkg/errorutil/ignore.go b/pkg/errorutil/ignore.go index 06212b2d8a4..51a6bccf581 100644 --- a/pkg/errorutil/ignore.go +++ b/pkg/errorutil/ignore.go @@ -73,12 +73,17 @@ func IsRetryableEtcdError(err error) bool { return true default: } - // when the PD instance was deleted from the PD cluster, it may meet error with `raft:stopped`, + // when the PD instance was deleted from the PD cluster, it may meet different errors. // retry on such error make cdc robust to PD / ETCD cluster member removal. // we should tolerant such case to make cdc robust to PD / ETCD cluster member change. // see: https://github.com/etcd-io/etcd/blob/ae36a577d7be/raft/node.go#L35 if strings.Contains(etcdErr.Error(), "raft: stopped") { return true } + // see: https://github.com/pingcap/tiflow/issues/6720 + if strings.Contains(etcdErr.Error(), "error reading from server: EOF") && + strings.Contains(etcdErr.Error(), "received prior goaway") { + return true + } return false } diff --git a/pkg/errorutil/ignore_test.go b/pkg/errorutil/ignore_test.go index 794d8f93a1e..1cbe6ce9773 100644 --- a/pkg/errorutil/ignore_test.go +++ b/pkg/errorutil/ignore_test.go @@ -60,6 +60,9 @@ func TestIsRetryableEtcdError(t *testing.T) { {v3rpc.ErrTimeoutDueToLeaderFail, true}, {v3rpc.ErrNoSpace, true}, {raft.ErrStopped, true}, + {errors.New("rpc error: code = Unavailable desc = closing transport due to: " + + "connection error: desc = \\\"error reading from server: EOF\\\", " + + "received prior goaway: code: NO_ERROR\""), true}, } for _, item := range cases { From d1a8743c23cd25f60b57c3685045a5a891af1949 Mon Sep 17 00:00:00 2001 From: 3AceShowHand Date: Thu, 18 Aug 2022 21:14:55 +0800 Subject: [PATCH 2/2] same as what google do. --- pkg/errorutil/ignore.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/errorutil/ignore.go b/pkg/errorutil/ignore.go index 51a6bccf581..0b5d5fe1d79 100644 --- a/pkg/errorutil/ignore.go +++ b/pkg/errorutil/ignore.go @@ -81,8 +81,7 @@ func IsRetryableEtcdError(err error) bool { return true } // see: https://github.com/pingcap/tiflow/issues/6720 - if strings.Contains(etcdErr.Error(), "error reading from server: EOF") && - strings.Contains(etcdErr.Error(), "received prior goaway") { + if strings.Contains(etcdErr.Error(), "received prior goaway: code: NO_ERROR") { return true } return false