Skip to content

Commit

Permalink
feat: expose GFE latency metrics (#1473)
Browse files Browse the repository at this point in the history
* GFE Latency

* feat: GFE Latency

* Busy waiting for header missing count

* Thread sleep before checking view Data

* lint errors and changing method signatures in unit test

* lint changes

* Adding pattern for Admin calls

* Correcting regex pattern
  • Loading branch information
KiranmayiB committed Oct 7, 2021
1 parent ff2a713 commit de82f78
Show file tree
Hide file tree
Showing 5 changed files with 675 additions and 2 deletions.
7 changes: 6 additions & 1 deletion google-cloud-spanner/pom.xml
Expand Up @@ -98,7 +98,7 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<configuration>
<ignoredDependencies>io.grpc:grpc-protobuf-lite,org.hamcrest:hamcrest,org.hamcrest:hamcrest-core,com.google.errorprone:error_prone_annotations,org.openjdk.jmh:jmh-generator-annprocess,com.google.api.grpc:grpc-google-cloud-spanner-v1,com.google.api.grpc:grpc-google-cloud-spanner-admin-instance-v1,com.google.api.grpc:grpc-google-cloud-spanner-admin-database-v1,javax.annotation:javax.annotation-api</ignoredDependencies>
<ignoredDependencies>io.grpc:grpc-protobuf-lite,org.hamcrest:hamcrest,org.hamcrest:hamcrest-core,com.google.errorprone:error_prone_annotations,org.openjdk.jmh:jmh-generator-annprocess,com.google.api.grpc:grpc-google-cloud-spanner-v1,com.google.api.grpc:grpc-google-cloud-spanner-admin-instance-v1,com.google.api.grpc:grpc-google-cloud-spanner-admin-database-v1,javax.annotation:javax.annotation-api,io.opencensus:opencensus-impl</ignoredDependencies>
</configuration>
</plugin>
</plugins>
Expand Down Expand Up @@ -179,6 +179,11 @@
<groupId>io.opencensus</groupId>
<artifactId>opencensus-contrib-grpc-util</artifactId>
</dependency>
<dependency>
<groupId>io.opencensus</groupId>
<artifactId>opencensus-impl</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.auth</groupId>
<artifactId>google-auth-library-oauth2-http</artifactId>
Expand Down
@@ -0,0 +1,141 @@
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.spanner.spi.v1;

import static com.google.cloud.spanner.spi.v1.SpannerRpcViews.DATABASE_ID;
import static com.google.cloud.spanner.spi.v1.SpannerRpcViews.INSTANCE_ID;
import static com.google.cloud.spanner.spi.v1.SpannerRpcViews.METHOD;
import static com.google.cloud.spanner.spi.v1.SpannerRpcViews.PROJECT_ID;
import static com.google.cloud.spanner.spi.v1.SpannerRpcViews.SPANNER_GFE_HEADER_MISSING_COUNT;
import static com.google.cloud.spanner.spi.v1.SpannerRpcViews.SPANNER_GFE_LATENCY;

import io.grpc.CallOptions;
import io.grpc.Channel;
import io.grpc.ClientCall;
import io.grpc.ClientInterceptor;
import io.grpc.ForwardingClientCall.SimpleForwardingClientCall;
import io.grpc.ForwardingClientCallListener.SimpleForwardingClientCallListener;
import io.grpc.Metadata;
import io.grpc.MethodDescriptor;
import io.opencensus.stats.MeasureMap;
import io.opencensus.stats.Stats;
import io.opencensus.stats.StatsRecorder;
import io.opencensus.tags.TagContext;
import io.opencensus.tags.TagValue;
import io.opencensus.tags.Tagger;
import io.opencensus.tags.Tags;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Intercepts all gRPC calls to extract server-timing header. Captures GFE Latency and GFE Header
* Missing count metrics.
*/
class HeaderInterceptor implements ClientInterceptor {

private static final Metadata.Key<String> SERVER_TIMING_HEADER_KEY =
Metadata.Key.of("server-timing", Metadata.ASCII_STRING_MARSHALLER);
private static final Pattern SERVER_TIMING_HEADER_PATTERN = Pattern.compile(".*dur=(?<dur>\\d+)");
private static final Metadata.Key<String> GOOGLE_CLOUD_RESOURCE_PREFIX_KEY =
Metadata.Key.of("google-cloud-resource-prefix", Metadata.ASCII_STRING_MARSHALLER);
private static final Pattern GOOGLE_CLOUD_RESOURCE_PREFIX_PATTERN =
Pattern.compile(
".*projects/(?<project>\\p{ASCII}[^/]*)(/instances/(?<instance>\\p{ASCII}[^/]*))?(/databases/(?<database>\\p{ASCII}[^/]*))?");

// Get the global singleton Tagger object.
private static final Tagger TAGGER = Tags.getTagger();
private static final StatsRecorder STATS_RECORDER = Stats.getStatsRecorder();

private static final Logger LOGGER = Logger.getLogger(HeaderInterceptor.class.getName());
private static final Level LEVEL = Level.INFO;

HeaderInterceptor() {}

@Override
public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(
MethodDescriptor<ReqT, RespT> method, CallOptions callOptions, Channel next) {
return new SimpleForwardingClientCall<ReqT, RespT>(next.newCall(method, callOptions)) {
@Override
public void start(Listener<RespT> responseListener, Metadata headers) {
TagContext tagContext = getTagContext(headers, method.getFullMethodName());
super.start(
new SimpleForwardingClientCallListener<RespT>(responseListener) {
@Override
public void onHeaders(Metadata metadata) {

processHeader(metadata, tagContext);
super.onHeaders(metadata);
}
},
headers);
}
};
}

private void processHeader(Metadata metadata, TagContext tagContext) {
MeasureMap measureMap = STATS_RECORDER.newMeasureMap();
if (metadata.get(SERVER_TIMING_HEADER_KEY) != null) {
String serverTiming = metadata.get(SERVER_TIMING_HEADER_KEY);
Matcher matcher = SERVER_TIMING_HEADER_PATTERN.matcher(serverTiming);
if (matcher.find()) {
try {
long latency = Long.parseLong(matcher.group("dur"));
measureMap.put(SPANNER_GFE_LATENCY, latency).record(tagContext);
measureMap.put(SPANNER_GFE_HEADER_MISSING_COUNT, 0L).record(tagContext);
} catch (NumberFormatException e) {
LOGGER.log(LEVEL, "Invalid server-timing object in header", matcher.group("dur"));
}
}
} else {
measureMap.put(SPANNER_GFE_HEADER_MISSING_COUNT, 1L).record(tagContext);
}
}

private TagContext getTagContext(
String method, String projectId, String instanceId, String databaseId) {
return TAGGER
.currentBuilder()
.putLocal(PROJECT_ID, TagValue.create(projectId))
.putLocal(INSTANCE_ID, TagValue.create(instanceId))
.putLocal(DATABASE_ID, TagValue.create(databaseId))
.putLocal(METHOD, TagValue.create(method))
.build();
}

private TagContext getTagContext(Metadata headers, String method) {
String projectId = "undefined-project";
String instanceId = "undefined-database";
String databaseId = "undefined-database";
if (headers.get(GOOGLE_CLOUD_RESOURCE_PREFIX_KEY) != null) {
String googleResourcePrefix = headers.get(GOOGLE_CLOUD_RESOURCE_PREFIX_KEY);
Matcher matcher = GOOGLE_CLOUD_RESOURCE_PREFIX_PATTERN.matcher(googleResourcePrefix);
if (matcher.find()) {
projectId = matcher.group("project");
if (matcher.group("instance") != null) {
instanceId = matcher.group("instance");
}
if (matcher.group("database") != null) {
databaseId = matcher.group("database");
}
} else {
LOGGER.log(LEVEL, "Error parsing google cloud resource header: " + googleResourcePrefix);
}
}
return getTagContext(method, projectId, instanceId, databaseId);
}
}
Expand Up @@ -33,7 +33,8 @@ public class SpannerInterceptorProvider implements GrpcInterceptorProvider {
private static final List<ClientInterceptor> defaultInterceptors =
ImmutableList.of(
new SpannerErrorInterceptor(),
new LoggingInterceptor(Logger.getLogger(GapicSpannerRpc.class.getName()), Level.FINER));
new LoggingInterceptor(Logger.getLogger(GapicSpannerRpc.class.getName()), Level.FINER),
new HeaderInterceptor());

private final List<ClientInterceptor> clientInterceptors;

Expand Down
@@ -0,0 +1,118 @@
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.spanner.spi.v1;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import io.opencensus.stats.Aggregation;
import io.opencensus.stats.Aggregation.Distribution;
import io.opencensus.stats.Aggregation.Sum;
import io.opencensus.stats.BucketBoundaries;
import io.opencensus.stats.Measure.MeasureLong;
import io.opencensus.stats.Stats;
import io.opencensus.stats.View;
import io.opencensus.stats.ViewManager;
import io.opencensus.tags.TagKey;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

@VisibleForTesting
public class SpannerRpcViews {

/** Unit to represent milliseconds. */
private static final String MILLISECOND = "ms";
/** Unit to represent counts. */
private static final String COUNT = "1";

/** TagKeys */
public static final TagKey METHOD = TagKey.create("method");

public static final TagKey PROJECT_ID = TagKey.create("project_id");
public static final TagKey INSTANCE_ID = TagKey.create("instance_id");
public static final TagKey DATABASE_ID = TagKey.create("database");

/** GFE t4t7 latency extracted from server-timing header. */
public static final MeasureLong SPANNER_GFE_LATENCY =
MeasureLong.create(
"cloud.google.com/java/spanner/gfe_latency",
"Latency between Google's network receiving an RPC and reading back the first byte of the response",
MILLISECOND);
/** Number of responses without the server-timing header. */
public static final MeasureLong SPANNER_GFE_HEADER_MISSING_COUNT =
MeasureLong.create(
"cloud.google.com/java/spanner/gfe_header_missing_count",
"Number of RPC responses received without the server-timing header, most likely means that the RPC never reached Google's network",
COUNT);

static final List<Double> RPC_MILLIS_BUCKET_BOUNDARIES =
Collections.unmodifiableList(
Arrays.asList(
0.0, 0.01, 0.05, 0.1, 0.3, 0.6, 0.8, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 13.0,
16.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0, 100.0, 130.0, 160.0, 200.0, 250.0,
300.0, 400.0, 500.0, 650.0, 800.0, 1000.0, 2000.0, 5000.0, 10000.0, 20000.0, 50000.0,
100000.0));
static final Aggregation AGGREGATION_WITH_MILLIS_HISTOGRAM =
Distribution.create(BucketBoundaries.create(RPC_MILLIS_BUCKET_BOUNDARIES));
static final View SPANNER_GFE_LATENCY_VIEW =
View.create(
View.Name.create("cloud.google.com/java/spanner/gfe_latency"),
"Latency between Google's network receiving an RPC and reading back the first byte of the response",
SPANNER_GFE_LATENCY,
AGGREGATION_WITH_MILLIS_HISTOGRAM,
ImmutableList.of(METHOD, PROJECT_ID, INSTANCE_ID, DATABASE_ID));

private static final Aggregation SUM = Sum.create();
static final View SPANNER_GFE_HEADER_MISSING_COUNT_VIEW =
View.create(
View.Name.create("cloud.google.com/java/spanner/gfe_header_missing_count"),
"Number of RPC responses received without the server-timing header, most likely means that the RPC never reached Google's network",
SPANNER_GFE_HEADER_MISSING_COUNT,
SUM,
ImmutableList.of(METHOD, PROJECT_ID, INSTANCE_ID, DATABASE_ID));

public static ViewManager viewManager = Stats.getViewManager();

/**
* Register views for GFE metrics, including gfe_latency and gfe_header_missing_count. gfe_latency
* measures the latency between Google's network receives an RPC and reads back the first byte of
* the response. gfe_header_missing_count is a counter of the number of RPC responses without a
* server-timing header.
*/
@VisibleForTesting
public static void registerGfeLatencyAndHeaderMissingCountViews() {
viewManager.registerView(SPANNER_GFE_LATENCY_VIEW);
viewManager.registerView(SPANNER_GFE_HEADER_MISSING_COUNT_VIEW);
}

/**
* Register GFE Latency view. gfe_latency measures the latency between Google's network receives
* an RPC and reads back the first byte of the response.
*/
@VisibleForTesting
public static void registerGfeLatencyView() {
viewManager.registerView(SPANNER_GFE_LATENCY_VIEW);
}

/**
* Register GFE Header Missing Count view. gfe_header_missing_count is a counter of the number of
* RPC responses without a server-timing header.
*/
@VisibleForTesting
public static void registerGfeHeaderMissingCountView() {
viewManager.registerView(SPANNER_GFE_HEADER_MISSING_COUNT_VIEW);
}
}

0 comments on commit de82f78

Please sign in to comment.