Skip to content

Commit

Permalink
[#20676] YSQL, ASH: Update default query_id in ASH metadata
Browse files Browse the repository at this point in the history
Summary:
This diff sets a default query_id of `5` to ASH metadata so that catalog requests
are not ignored and we can know the distribution of time spent between catalog
and storage requests. The root_request_id is still empty for these catalog requests,
as there is no point in having different root_request_id for different catalog requests now.

In future, we would want to have the actual query_id for query catalog requests and
only the catalog requests that are not tied to any specific query can have this query_id
of `5`.

This diff also gets rid of the field `yb_is_ash_metadata_set` since now every outgoing
Perform RPC will have a default query_id. But this causes another problem, an idle ysql
session will continue to fill the circular buffer with `ClientRead` wait events, which is not
ideal, because this might overwrite actual useful information. We filter these out so
that it doesn't eat up space in the circular buffer.
Jira: DB-9677

Test Plan: ./yb_build.sh --java-test TestYbAsh#testCatalogRequests

Reviewers: jason

Reviewed By: jason

Subscribers: amitanand, hbhanawat, yql

Differential Revision: https://phorge.dev.yugabyte.com/D33912
  • Loading branch information
abhinab-yb committed May 7, 2024
1 parent 6965586 commit 1fbe96e
Show file tree
Hide file tree
Showing 12 changed files with 83 additions and 30 deletions.
33 changes: 28 additions & 5 deletions java/yb-pgsql/src/test/java/org/yb/pgsql/TestYbAsh.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ private void setAshConfigAndRestartCluster(
flagMap.put("ysql_yb_enable_ash", "true");
flagMap.put("ysql_yb_ash_sampling_interval_ms", String.valueOf(sampling_interval));
flagMap.put("ysql_yb_ash_sample_size", String.valueOf(sample_size));
restartClusterWithFlags(Collections.emptyMap(), flagMap);
// flagMap.put("create_initial_sys_catalog_snapshot", "true");
Map<String, String> masterFlagMap = super.getMasterFlags();
restartClusterWithFlags(masterFlagMap, flagMap);
}

private void executePgSleep(Statement statement, long seconds) throws Exception {
Expand Down Expand Up @@ -155,10 +157,9 @@ public void testUtilityStatementsQueryId() throws Exception {
}
statement.execute("TRUNCATE TABLE test_table2");
statement.execute("DROP TABLE test_table2");
// TODO: remove wait_event_component='Postgres' once ASH metadata is propagated to
// TServer
// TODO: remove wait_event_component='YSQL' once all tserver RPCs are instrumented
assertOneRow(statement, "SELECT COUNT(*) FROM " + ASH_VIEW + " WHERE query_id = 0 " +
"AND wait_event_component='Postgres'", 0);
"AND wait_event_component='YSQL'", 0);
}
}

Expand Down Expand Up @@ -213,9 +214,31 @@ public void testPgAuxInfo() throws Exception {
statement.execute(String.format("SELECT v FROM test_table WHERE k=%d", i));
}
int res = getSingleRow(statement, "SELECT COUNT(*) FROM " + ASH_VIEW +
" WHERE wait_event_component='Postgres' AND wait_event_aux IS NOT NULL")
" WHERE wait_event_component='YSQL' AND wait_event_aux IS NOT NULL")
.getLong(0).intValue();
assertEquals(res, 0);
}
}

/**
* Verify that catalog requests are sampled
*/
@Test
public void testCatalogRequests() throws Exception {
// Use small sampling interval so that we are more likely to catch catalog requests
setAshConfigAndRestartCluster(5, ASH_SAMPLE_SIZE);
int catalog_request_query_id = 5;
String catalog_read_wait_event = "CatalogRead";
try (Statement statement = connection.createStatement()) {
statement.execute("CREATE TABLE test_table(k INT, v TEXT)");
for (int i = 0; i < 100; ++i) {
statement.execute(String.format("INSERT INTO test_table VALUES(%d, 'v-%d')", i, i));
statement.execute(String.format("SELECT v FROM test_table WHERE k=%d", i));
}
int res1 = getSingleRow(statement, "SELECT COUNT(*) FROM " + ASH_VIEW +
" WHERE query_id = " + catalog_request_query_id + " OR " +
"wait_event = '" + catalog_read_wait_event + "'").getLong(0).intValue();
assertGreaterThan(res1, 0);
}
}
}
15 changes: 12 additions & 3 deletions src/postgres/src/backend/storage/ipc/procarray.c
Original file line number Diff line number Diff line change
Expand Up @@ -4155,11 +4155,20 @@ YbStorePgAshSamples(TimestampTz sample_time)
PGPROC *proc = &allProcs[pgprocno];

/*
* Don't sample prepared transactions, background workers
* and if the ASH metadata is not set.
* Don't sample prepared transactions, background workers,
* if the address family has not been set yet, or if the wait
* event is something that should ignored.
* We don't need to take lock for reading addr_family because
* we might only miss a few samples. With the default sampling
* interval of 1000ms, missed samples should be at most one per
* session. For large number of samples, this shouldn't matter much.
* We might also have some samples, where the root_request_id and
* query_id has not been set yet, but the number of such samples
* should be pretty low.
*/
if (proc->pid == 0 || proc->isBackgroundWorker ||
proc->yb_is_ash_metadata_set == 0)
proc->yb_ash_metadata.addr_family == AF_UNSPEC ||
YbAshShouldIgnoreWaitEvent(proc->wait_event_info))
continue;

if (YbAshStoreSample(proc, arrayP->numProcs, sample_time,
Expand Down
8 changes: 5 additions & 3 deletions src/postgres/src/backend/storage/lmgr/proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -454,12 +454,15 @@ InitProcess(void)

MemSet(MyProc->yb_ash_metadata.root_request_id, 0,
sizeof(MyProc->yb_ash_metadata.root_request_id));
MyProc->yb_ash_metadata.query_id = 0;
/*
* TODO(asaha): Update the query_id for catalog calls in circular buffer
* once it's calculated
*/
MyProc->yb_ash_metadata.query_id = YBCGetQueryIdForCatalogRequests();
MemSet(MyProc->yb_ash_metadata.client_addr, 0,
sizeof(MyProc->yb_ash_metadata.client_addr));
MyProc->yb_ash_metadata.client_port = 0;
MyProc->yb_ash_metadata.addr_family = AF_UNSPEC;
MyProc->yb_is_ash_metadata_set = false;

/*
* Acquire ownership of the PGPROC's latch, so that we can use WaitLatch
Expand Down Expand Up @@ -604,7 +607,6 @@ InitAuxiliaryProcess(void)
MyProc->lwWaitMode = 0;
MyProc->waitLock = NULL;
MyProc->waitProcLock = NULL;
MyProc->yb_is_ash_metadata_set = false;
#ifdef USE_ASSERT_CHECKING
{
int i;
Expand Down
1 change: 0 additions & 1 deletion src/postgres/src/backend/utils/misc/pg_yb_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -889,7 +889,6 @@ YBInitPostgresBackend(
callbacks.PgstatReportWaitStart = &yb_pgstat_report_wait_start;
YBCPgAshConfig ash_config;
ash_config.metadata = &MyProc->yb_ash_metadata;
ash_config.is_metadata_set = &MyProc->yb_is_ash_metadata_set;
ash_config.yb_enable_ash = &yb_enable_ash;
IpAddressToBytes(&ash_config);
YBCInitPgGate(type_table, count, callbacks, session_id, &ash_config);
Expand Down
35 changes: 26 additions & 9 deletions src/postgres/src/backend/utils/misc/yb_ash.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ yb_ash_ExecutorStart(QueryDesc *queryDesc, int eflags)
* In case of prepared statements, the 'Parse' phase might be skipped.
* We set the ASH metadata here if it's not been set yet.
*/
if (yb_enable_ash && MyProc->yb_is_ash_metadata_set == false)
if (yb_enable_ash && MyProc->yb_ash_metadata.query_id == YBCGetQueryIdForCatalogRequests())
{
/* Query id can be zero here only if pg_stat_statements is disabled */
uint64 query_id = queryDesc->plannedstmt->queryId != 0
Expand Down Expand Up @@ -296,25 +296,24 @@ static void
yb_set_ash_metadata(uint64 query_id)
{
LWLockAcquire(&MyProc->yb_ash_metadata_lock, LW_EXCLUSIVE);

MyProc->yb_ash_metadata.query_id = query_id;
YBCGenerateAshRootRequestId(MyProc->yb_ash_metadata.root_request_id);
MyProc->yb_is_ash_metadata_set = true;

LWLockRelease(&MyProc->yb_ash_metadata_lock);
}

static void
yb_unset_ash_metadata()
{
LWLockAcquire(&MyProc->yb_ash_metadata_lock, LW_EXCLUSIVE);

/*
* when yb_is_ash_metadata_set is set to false, it ensures that we
* won't read the ASH metadata. So it's fine to not null out the values.
* TODO(asaha): add an assert
* Assert(MyProc->yb_ash_metadata.query_id != YBCGetQueryIdForCatalogRequests());
* after fixing GH#21031
*/
MyProc->yb_is_ash_metadata_set = false;

LWLockAcquire(&MyProc->yb_ash_metadata_lock, LW_EXCLUSIVE);
MyProc->yb_ash_metadata.query_id = YBCGetQueryIdForCatalogRequests();
MemSet(MyProc->yb_ash_metadata.root_request_id, 0,
sizeof(MyProc->yb_ash_metadata.root_request_id));
LWLockRelease(&MyProc->yb_ash_metadata_lock);
}

Expand Down Expand Up @@ -363,6 +362,24 @@ yb_ash_utility_query_id(const char *query, int query_len, int query_location)
redacted_query_len, 0));
}

/*
* Events such as ClientRead can take up a lot of space in the circular buffer
* if there is an idle session. We don't want to include such wait events.
* This list may increase in the future.
*/
bool
YbAshShouldIgnoreWaitEvent(uint32 wait_event_info)
{
switch (wait_event_info)
{
case WAIT_EVENT_CLIENT_READ:
return true;
default:
return false;
}
return false;
}

static void
YbAshAcquireBufferLock(bool exclusive)
{
Expand Down
4 changes: 1 addition & 3 deletions src/postgres/src/include/storage/proc.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,14 +243,12 @@ struct PGPROC
bool ybEnteredCriticalSection;

/*
* yb_ash_metadata and yb_is_ash_metadata_set are protected by
* yb_ash_metadata_lock instead of backendLock.
* yb_ash_metadata is protected by yb_ash_metadata_lock instead of backendLock.
* TODO: Investigate if yb_ash_metadata_lock is really needed, or can we
* use backendLock itself if there is no significant drop in performance.
*/
LWLock yb_ash_metadata_lock;
YBCAshMetadata yb_ash_metadata;
bool yb_is_ash_metadata_set;
};

/* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
Expand Down
1 change: 1 addition & 0 deletions src/postgres/src/include/yb_ash.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ extern void YbAshMain(Datum main_arg);

extern void YbAshInstallHooks(void);
extern void YbAshSetSessionId(uint64 session_id);
extern bool YbAshShouldIgnoreWaitEvent(uint32 wait_event_info);

extern bool YbAshStoreSample(PGPROC *proc, int num_procs,
TimestampTz sample_time,
Expand Down
7 changes: 4 additions & 3 deletions src/yb/ash/wait_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,10 @@ YB_DEFINE_TYPED_ENUM(WaitStateCode, uint32_t,
// fixed query-ids to identify these background tasks.
YB_DEFINE_TYPED_ENUM(FixedQueryId, uint8_t,
((kQueryIdForLogAppender, 1))
(kQueryIdForFlush)
(kQueryIdForCompaction)
(kQueryIdForRaftUpdateConsensus)
((kQueryIdForFlush, 2))
((kQueryIdForCompaction, 3))
((kQueryIdForRaftUpdateConsensus, 4))
((kQueryIdForCatalogRequests, 5))
);

YB_DEFINE_TYPED_ENUM(WaitStateType, uint8_t,
Expand Down
3 changes: 1 addition & 2 deletions src/yb/yql/pggate/pg_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,7 @@ struct ResponseReadyTraits<Result<rpc::CallData>> {
};

void AshMetadataToPB(const YBCPgAshConfig& ash_config, tserver::PgPerformOptionsPB* options) {
// Don't send ASH metadata if it's not set
if (!(*ash_config.yb_enable_ash) || !(*ash_config.is_metadata_set)) {
if (!(*ash_config.yb_enable_ash)) {
return;
}

Expand Down
4 changes: 4 additions & 0 deletions src/yb/yql/pggate/util/ybc_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,10 @@ const char* YBCGetWaitEventType(uint32_t wait_event_info) {
return NoPrefixName(GetWaitStateType(static_cast<ash::WaitStateCode>(wait_event)));
}

uint8_t YBCGetQueryIdForCatalogRequests() {
return static_cast<uint8_t>(ash::FixedQueryId::kQueryIdForCatalogRequests);
}

} // extern "C"

} // namespace yb::pggate
1 change: 1 addition & 0 deletions src/yb/yql/pggate/util/ybc_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ const char* YBCGetWaitEventName(uint32_t wait_event_info);
const char* YBCGetWaitEventClass(uint32_t wait_event_info);
const char* YBCGetWaitEventComponent(uint32_t wait_event_info);
const char* YBCGetWaitEventType(uint32_t wait_event_info);
uint8_t YBCGetQueryIdForCatalogRequests();

#ifdef __cplusplus
} // extern "C"
Expand Down
1 change: 0 additions & 1 deletion src/yb/yql/pggate/ybc_pg_typedefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,6 @@ typedef struct AshSample {
// A struct to pass ASH postgres config to PgClient
typedef struct PgAshConfig {
YBCAshMetadata* metadata;
bool* is_metadata_set;
bool* yb_enable_ash;
unsigned char yql_endpoint_tserver_uuid[16];
// length of host should be equal to INET6_ADDRSTRLEN
Expand Down

0 comments on commit 1fbe96e

Please sign in to comment.