Skip to content

Commit

Permalink
[BACKPORT 2.14][#20330] docdb: fix retryable request timeout for sysc…
Browse files Browse the repository at this point in the history
…atalog tablet

Summary:
Original commit: cae24a3 / D31196
SysCatalog tablet is in YQL table type and its retryable requests retain duration is 60s (from D29599). Any YSql DDL can be rejected with `request is too old` error after 60s from the time it was sent.

As SysCatalog tablet can handle catalog change requests from both YQL and YSQL, its retryable request retain duration should be set to max(ysql client timeout, yql client timeout).
Jira: DB-9316

Test Plan:
RetryableRequestTest.TestRetryableRequestTimeoutSecs
RetryableRequestTest.SysCatalogRequestTimeoutSecs

Reviewers: timur, bogdan, rthallam

Reviewed By: rthallam

Subscribers: ybase, bogdan

Tags: #jenkins-ready

Differential Revision: https://phorge.dev.yugabyte.com/D31432
  • Loading branch information
Huqicheng committed Jan 10, 2024
1 parent 5619f72 commit a6541ad
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 5 deletions.
5 changes: 5 additions & 0 deletions src/yb/client/session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,11 @@ int YsqlClientReadWriteTimeoutMs() {
: FLAGS_ysql_client_read_write_timeout_ms);
}

int SysCatalogRetryableRequestTimeoutSecs() {
return std::max(RetryableRequestTimeoutSecs(TableType::PGSQL_TABLE_TYPE),
RetryableRequestTimeoutSecs(TableType::YQL_TABLE_TYPE));
}

int RetryableRequestTimeoutSecs(TableType table_type) {
const int client_timeout_ms = table_type == TableType::PGSQL_TABLE_TYPE
? YsqlClientReadWriteTimeoutMs()
Expand Down
1 change: 1 addition & 0 deletions src/yb/client/session.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ class YBSession : public std::enable_shared_from_this<YBSession> {
bool ShouldSessionRetryError(const Status& status);

int YsqlClientReadWriteTimeoutMs();
int SysCatalogRetryableRequestTimeoutSecs();
int RetryableRequestTimeoutSecs(TableType table_type);

} // namespace client
Expand Down
61 changes: 61 additions & 0 deletions src/yb/integration-tests/retryable_request-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#include "yb/integration-tests/mini_cluster.h"
#include "yb/integration-tests/yb_table_test_base.h"

#include "yb/master/catalog_manager_if.h"
#include "yb/master/sys_catalog.h"
#include "yb/tablet/tablet.h"
#include "yb/tablet/tablet_peer.h"

Expand All @@ -43,6 +45,7 @@ DECLARE_int32(client_read_write_timeout_ms);
DECLARE_int32(retryable_request_timeout_secs);
DECLARE_bool(TEST_asyncrpc_finished_set_timedout);
DECLARE_bool(TEST_pause_before_replicate_batch);
DECLARE_int32(ysql_client_read_write_timeout_ms);

namespace yb {
namespace integration_tests {
Expand Down Expand Up @@ -91,6 +94,10 @@ class SingleServerRetryableRequestTest : public RetryableRequestTest {
}

size_t num_tablet_servers() override { return 1; }

std::shared_ptr<consensus::RaftConsensus> GetMasterRaftConsensus() {
return mini_cluster_->mini_master()->catalog_manager().tablet_peer()->shared_raft_consensus();
}
};

TEST_F_EX(RetryableRequestTest, YqlRequestTimeoutSecs, SingleServerRetryableRequestTest) {
Expand Down Expand Up @@ -119,6 +126,60 @@ TEST_F_EX(RetryableRequestTest, YqlRequestTimeoutSecs, SingleServerRetryableRequ
DeleteTable();
}

TEST_F_EX(RetryableRequestTest, TestRetryableRequestTimeoutSecs, SingleServerRetryableRequestTest) {
ANNOTATE_UNPROTECTED_WRITE(FLAGS_retryable_request_timeout_secs) = 660;
ANNOTATE_UNPROTECTED_WRITE(FLAGS_client_read_write_timeout_ms) = 60000;
ASSERT_EQ(client::RetryableRequestTimeoutSecs(TableType::PGSQL_TABLE_TYPE), 600);
ASSERT_EQ(client::RetryableRequestTimeoutSecs(TableType::YQL_TABLE_TYPE), 60);
ASSERT_EQ(client::SysCatalogRetryableRequestTimeoutSecs(), 600);

ANNOTATE_UNPROTECTED_WRITE(FLAGS_client_read_write_timeout_ms) = 601000;
ASSERT_EQ(client::RetryableRequestTimeoutSecs(TableType::PGSQL_TABLE_TYPE), 601);
ASSERT_EQ(client::RetryableRequestTimeoutSecs(TableType::YQL_TABLE_TYPE), 601);
ASSERT_EQ(client::SysCatalogRetryableRequestTimeoutSecs(), 601);

ANNOTATE_UNPROTECTED_WRITE(FLAGS_client_read_write_timeout_ms) = 661000;
ASSERT_EQ(client::RetryableRequestTimeoutSecs(TableType::PGSQL_TABLE_TYPE), 660);
ASSERT_EQ(client::RetryableRequestTimeoutSecs(TableType::YQL_TABLE_TYPE), 660);
ASSERT_EQ(client::SysCatalogRetryableRequestTimeoutSecs(), 660);

ANNOTATE_UNPROTECTED_WRITE(FLAGS_ysql_client_read_write_timeout_ms) = 60000;
ANNOTATE_UNPROTECTED_WRITE(FLAGS_client_read_write_timeout_ms) = 60000;
ASSERT_EQ(client::RetryableRequestTimeoutSecs(TableType::PGSQL_TABLE_TYPE), 60);
ASSERT_EQ(client::RetryableRequestTimeoutSecs(TableType::YQL_TABLE_TYPE), 60);
ASSERT_EQ(client::SysCatalogRetryableRequestTimeoutSecs(), 60);

ANNOTATE_UNPROTECTED_WRITE(FLAGS_ysql_client_read_write_timeout_ms) = 661000;
ASSERT_EQ(client::RetryableRequestTimeoutSecs(TableType::PGSQL_TABLE_TYPE), 660);
ASSERT_EQ(client::RetryableRequestTimeoutSecs(TableType::YQL_TABLE_TYPE), 60);
ASSERT_EQ(client::SysCatalogRetryableRequestTimeoutSecs(), 660);
}

TEST_F_EX(RetryableRequestTest, SysCatalogRequestTimeoutSecs, SingleServerRetryableRequestTest) {
auto master = mini_cluster_->mini_master();
ANNOTATE_UNPROTECTED_WRITE(FLAGS_retryable_request_timeout_secs) = 660;
ANNOTATE_UNPROTECTED_WRITE(FLAGS_client_read_write_timeout_ms) = 60000;
ASSERT_OK(master->Restart());
ASSERT_EQ(GetMasterRaftConsensus()->TEST_RetryableRequestTimeoutSecs(), 600);

ANNOTATE_UNPROTECTED_WRITE(FLAGS_client_read_write_timeout_ms) = 601000;
ASSERT_OK(master->Restart());
ASSERT_EQ(GetMasterRaftConsensus()->TEST_RetryableRequestTimeoutSecs(), 601);

ANNOTATE_UNPROTECTED_WRITE(FLAGS_client_read_write_timeout_ms) = 661000;
ASSERT_OK(master->Restart());
ASSERT_EQ(GetMasterRaftConsensus()->TEST_RetryableRequestTimeoutSecs(), 660);

ANNOTATE_UNPROTECTED_WRITE(FLAGS_ysql_client_read_write_timeout_ms) = 60000;
ANNOTATE_UNPROTECTED_WRITE(FLAGS_client_read_write_timeout_ms) = 60000;
ASSERT_OK(master->Restart());
ASSERT_EQ(GetMasterRaftConsensus()->TEST_RetryableRequestTimeoutSecs(), 60);

ANNOTATE_UNPROTECTED_WRITE(FLAGS_ysql_client_read_write_timeout_ms) = 661000;
ASSERT_OK(master->Restart());
ASSERT_EQ(GetMasterRaftConsensus()->TEST_RetryableRequestTimeoutSecs(), 660);
}

TEST_F_EX(RetryableRequestTest, TestRetryableRequestTooOld, SingleServerRetryableRequestTest) {
auto* tablet_server = mini_cluster()->mini_tablet_server(0);
const auto tablet_id = ASSERT_RESULT(GetOnlyTabletId(table_.name()));
Expand Down
8 changes: 5 additions & 3 deletions src/yb/master/sys_catalog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@

#include <boost/optional.hpp>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <rapidjson/document.h>

#include "yb/client/client.h"
Expand All @@ -59,6 +58,7 @@
#include "yb/consensus/log_anchor_registry.h"
#include "yb/consensus/opid_util.h"
#include "yb/consensus/quorum_util.h"
#include "yb/consensus/retryable_requests.h"
#include "yb/consensus/state_change_context.h"

#include "yb/docdb/doc_rowwise_iterator.h"
Expand Down Expand Up @@ -563,6 +563,8 @@ Status SysCatalogTable::OpenTablet(const scoped_refptr<tablet::RaftGroupMetadata
tablet::TabletPtr tablet;
scoped_refptr<Log> log;
consensus::ConsensusBootstrapInfo consensus_info;
consensus::RetryableRequests retryable_requests(LogPrefix());
retryable_requests.SetServerClock(master_->clock());
RETURN_NOT_OK(tablet_peer()->SetBootstrapping());
tablet::TabletOptions tablet_options;

Expand Down Expand Up @@ -615,7 +617,7 @@ Status SysCatalogTable::OpenTablet(const scoped_refptr<tablet::RaftGroupMetadata
.listener = tablet_peer()->status_listener(),
.append_pool = append_pool(),
.allocation_pool = allocation_pool_.get(),
.retryable_requests = nullptr,
.retryable_requests = &retryable_requests,
};
RETURN_NOT_OK(BootstrapTablet(data, &tablet, &log, &consensus_info));

Expand All @@ -633,7 +635,7 @@ Status SysCatalogTable::OpenTablet(const scoped_refptr<tablet::RaftGroupMetadata
tablet->GetTabletMetricsEntity(),
raft_pool(),
tablet_prepare_pool(),
nullptr /* retryable_requests */,
&retryable_requests,
nullptr /* consensus_meta */,
multi_raft_manager_.get()),
"Failed to Init() TabletPeer");
Expand Down
7 changes: 5 additions & 2 deletions src/yb/tablet/tablet_bootstrap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
#include "yb/gutil/strings/substitute.h"
#include "yb/gutil/thread_annotations.h"

#include "yb/master/sys_catalog_constants.h"
#include "yb/rpc/rpc_fwd.h"

#include "yb/tablet/tablet_fwd.h"
Expand Down Expand Up @@ -543,8 +544,10 @@ class TabletBootstrap {
const bool has_blocks = VERIFY_RESULT(OpenTablet());

if (data_.retryable_requests) {
data_.retryable_requests->SetRequestTimeout(
client::RetryableRequestTimeoutSecs(tablet_->table_type()));
const auto retryable_request_timeout_secs = tablet_id == master::kSysCatalogTabletId
? client::SysCatalogRetryableRequestTimeoutSecs()
: client::RetryableRequestTimeoutSecs(tablet_->table_type());
data_.retryable_requests->SetRequestTimeout(retryable_request_timeout_secs);
}

if (FLAGS_TEST_dump_docdb_before_tablet_bootstrap) {
Expand Down

0 comments on commit a6541ad

Please sign in to comment.