diff --git a/pkg/errorutil/ignore.go b/pkg/errorutil/ignore.go index 06212b2d8a4..0b5d5fe1d79 100644 --- a/pkg/errorutil/ignore.go +++ b/pkg/errorutil/ignore.go @@ -73,12 +73,16 @@ func IsRetryableEtcdError(err error) bool { return true default: } - // when the PD instance was deleted from the PD cluster, it may meet error with `raft:stopped`, + // when the PD instance was deleted from the PD cluster, it may meet different errors. // retry on such error make cdc robust to PD / ETCD cluster member removal. // we should tolerant such case to make cdc robust to PD / ETCD cluster member change. // see: https://github.com/etcd-io/etcd/blob/ae36a577d7be/raft/node.go#L35 if strings.Contains(etcdErr.Error(), "raft: stopped") { return true } + // see: https://github.com/pingcap/tiflow/issues/6720 + if strings.Contains(etcdErr.Error(), "received prior goaway: code: NO_ERROR") { + return true + } return false } diff --git a/pkg/errorutil/ignore_test.go b/pkg/errorutil/ignore_test.go index 794d8f93a1e..1cbe6ce9773 100644 --- a/pkg/errorutil/ignore_test.go +++ b/pkg/errorutil/ignore_test.go @@ -60,6 +60,9 @@ func TestIsRetryableEtcdError(t *testing.T) { {v3rpc.ErrTimeoutDueToLeaderFail, true}, {v3rpc.ErrNoSpace, true}, {raft.ErrStopped, true}, + {errors.New("rpc error: code = Unavailable desc = closing transport due to: " + + "connection error: desc = \\\"error reading from server: EOF\\\", " + + "received prior goaway: code: NO_ERROR\""), true}, } for _, item := range cases {