Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: GFE Latency #1473

Merged
merged 9 commits into from Oct 7, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 6 additions & 1 deletion google-cloud-spanner/pom.xml
Expand Up @@ -98,7 +98,7 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<configuration>
<ignoredDependencies>io.grpc:grpc-protobuf-lite,org.hamcrest:hamcrest,org.hamcrest:hamcrest-core,com.google.errorprone:error_prone_annotations,org.openjdk.jmh:jmh-generator-annprocess,com.google.api.grpc:grpc-google-cloud-spanner-v1,com.google.api.grpc:grpc-google-cloud-spanner-admin-instance-v1,com.google.api.grpc:grpc-google-cloud-spanner-admin-database-v1,javax.annotation:javax.annotation-api</ignoredDependencies>
<ignoredDependencies>io.grpc:grpc-protobuf-lite,org.hamcrest:hamcrest,org.hamcrest:hamcrest-core,com.google.errorprone:error_prone_annotations,org.openjdk.jmh:jmh-generator-annprocess,com.google.api.grpc:grpc-google-cloud-spanner-v1,com.google.api.grpc:grpc-google-cloud-spanner-admin-instance-v1,com.google.api.grpc:grpc-google-cloud-spanner-admin-database-v1,javax.annotation:javax.annotation-api,io.opencensus:opencensus-impl</ignoredDependencies>
</configuration>
</plugin>
</plugins>
Expand Down Expand Up @@ -179,6 +179,11 @@
<groupId>io.opencensus</groupId>
<artifactId>opencensus-contrib-grpc-util</artifactId>
</dependency>
<dependency>
<groupId>io.opencensus</groupId>
<artifactId>opencensus-impl</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.auth</groupId>
<artifactId>google-auth-library-oauth2-http</artifactId>
Expand Down
@@ -0,0 +1,129 @@
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.spanner.spi.v1;

import static com.google.cloud.spanner.spi.v1.SpannerRpcViews.*;

import com.google.cloud.spanner.DatabaseId;
import io.grpc.*;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

import io.grpc.ForwardingClientCall.SimpleForwardingClientCall;
import io.grpc.ForwardingClientCallListener.SimpleForwardingClientCallListener;
import io.opencensus.stats.*;
import io.opencensus.tags.*;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Intercepts all gRPC calls to extract server-timing header. Captures GFE Latency and GFE Header
* Missing count metrics.
*/
class HeaderInterceptor implements ClientInterceptor {

private static final Metadata.Key<String> SERVER_TIMING_HEADER_KEY =
Metadata.Key.of("server-timing", Metadata.ASCII_STRING_MARSHALLER);
private static final Pattern SERVER_TIMING_HEADER_PATTERN = Pattern.compile(".*dur=(?<dur>\\d+)");
private static final Metadata.Key<String> GOOGLE_CLOUD_RESOURCE_PREFIX_KEY =
Metadata.Key.of("google-cloud-resource-prefix", Metadata.ASCII_STRING_MARSHALLER);
private static final Pattern GOOGLE_CLOUD_RESOURCE_PREFIX_PATTERN =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we still need this?

Pattern.compile(
".*projects/(?<project>\\w\\p{ASCII}+)/instances/(?<instance>\\w\\p{ASCII}+)/databases/(?<database>\\w\\p{ASCII}+)");

// Get the global singleton Tagger object.
private static final Tagger tagger = Tags.getTagger();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use snake upper case for constants (static final): https://google.github.io/styleguide/javaguide.html#s5.2.4-constant-names


private static final Logger logger = Logger.getLogger(HeaderInterceptor.class.getName());
private static final Level level = Level.INFO;

HeaderInterceptor() {}

@Override
public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(
MethodDescriptor<ReqT, RespT> method, CallOptions callOptions, Channel next) {
return new SimpleForwardingClientCall<ReqT, RespT>(next.newCall(method, callOptions)) {
@Override
public void start(Listener<RespT> responseListener, Metadata headers) {
TagContext tagContext = getTagContext(headers, method.getFullMethodName());
super.start(
new SimpleForwardingClientCallListener<RespT>(responseListener) {
@Override
public void onHeaders(Metadata metadata) {
processHeader(metadata, tagContext);
super.onHeaders(metadata);
}
},
headers);
}
};
}

private void processHeader(Metadata metadata, TagContext tagContext) {
MeasureMap measureMap = STATS_RECORDER.newMeasureMap();
if (metadata.get(SERVER_TIMING_HEADER_KEY) != null) {
String serverTiming = metadata.get(SERVER_TIMING_HEADER_KEY);
Matcher matcher = SERVER_TIMING_HEADER_PATTERN.matcher(serverTiming);
if (matcher.find()) {
try {
long latency = Long.parseLong(matcher.group("dur"));
measureMap.put(SPANNER_GFE_LATENCY, latency).record(tagContext);
measureMap.put(SPANNER_GFE_HEADER_MISSING_COUNT, 0L).record(tagContext);
} catch (NumberFormatException e) {
logger.log(level, "invalid server-timing object in header");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add the matcher.group("dur") in the log message as well

}
}
} else {
measureMap.put(SPANNER_GFE_HEADER_MISSING_COUNT, 1L).record(tagContext);
}
}

private TagContext getTagContext(
String method, String projectId, String instanceId, String databaseId) {
return tagger
.currentBuilder()
.putLocal(SpannerRpcViews.PROJECT_ID, TagValue.create(projectId))
.putLocal(INSTANCE_ID, TagValue.create(instanceId))
.putLocal(DATABASE_ID, TagValue.create(databaseId))
.putLocal(SpannerRpcViews.METHOD, TagValue.create(method))
.build();
}

private TagContext getTagContext(String method) {
return tagger
.currentBuilder()
.putLocal(PROJECT_ID, TagValue.create("undefined-project"))
.putLocal(INSTANCE_ID, TagValue.create("undefined-instance"))
.putLocal(DATABASE_ID, TagValue.create("undefined-database"))
.putLocal(SpannerRpcViews.METHOD, TagValue.create(method))
.build();
}

private TagContext getTagContext(Metadata headers, String method) {
if (headers.get(GOOGLE_CLOUD_RESOURCE_PREFIX_KEY) != null) {
String googleResourcePrefix = headers.get(GOOGLE_CLOUD_RESOURCE_PREFIX_KEY);
try {
DatabaseId database = DatabaseId.of(googleResourcePrefix);
String databaseId = database.getDatabase();
String instanceId = database.getInstanceId().getInstance();
String projectId = database.getInstanceId().getProject();
return getTagContext(method, projectId, instanceId, databaseId);
} catch (IllegalArgumentException e) {
logger.log(level, e.toString());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you log the whole exception with a message as well?

Maybe logger.log(level, "Error parsing google cloud resource header", e)

}
}
return getTagContext(method);
}
}
Expand Up @@ -33,7 +33,8 @@ public class SpannerInterceptorProvider implements GrpcInterceptorProvider {
private static final List<ClientInterceptor> defaultInterceptors =
ImmutableList.of(
new SpannerErrorInterceptor(),
new LoggingInterceptor(Logger.getLogger(GapicSpannerRpc.class.getName()), Level.FINER));
new LoggingInterceptor(Logger.getLogger(GapicSpannerRpc.class.getName()), Level.FINER),
new HeaderInterceptor());

private final List<ClientInterceptor> clientInterceptors;

Expand Down
@@ -0,0 +1,121 @@
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.spanner.spi.v1;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import io.opencensus.stats.*;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't use * imports

import io.opencensus.stats.Aggregation.Distribution;
import io.opencensus.stats.Aggregation.Sum;
import io.opencensus.stats.Measure.MeasureLong;
import io.opencensus.tags.TagKey;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

@VisibleForTesting
public class SpannerRpcViews {

/** Unit to represent milliseconds. */
private static final String MILLISECOND = "ms";
/** Unit to represent counts. */
private static final String COUNT = "1";

/** TagKeys */
public static final TagKey METHOD = TagKey.create("method");

public static final TagKey PROJECT_ID = TagKey.create("project_id");
public static final TagKey INSTANCE_ID = TagKey.create("instance_id");
public static final TagKey DATABASE_ID = TagKey.create("database");

public static final StatsRecorder STATS_RECORDER = Stats.getStatsRecorder();
/** GFE t4t7 latency extracted from server-timing header. */
public static final MeasureLong SPANNER_GFE_LATENCY =
MeasureLong.create(
"cloud.google.com/java/spanner/gfe_latency",
"Latency between Google's network receiving an RPC and reading back the first byte of the response",
MILLISECOND);
/** Number of responses without the server-timing header. */
public static final MeasureLong SPANNER_GFE_HEADER_MISSING_COUNT =
MeasureLong.create(
"cloud.google.com/java/spanner/gfe_header_missing_count",
"Number of RPC responses received without the server-timing header, most likely means that the RPC never reached Google's network",
COUNT);

static final List<Double> RPC_MILLIS_BUCKET_BOUNDARIES =
Collections.unmodifiableList(
Arrays.asList(
0.0, 0.01, 0.05, 0.1, 0.3, 0.6, 0.8, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 13.0,
16.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0, 100.0, 130.0, 160.0, 200.0, 250.0,
300.0, 400.0, 500.0, 650.0, 800.0, 1000.0, 2000.0, 5000.0, 10000.0, 20000.0, 50000.0,
100000.0));
static final Aggregation AGGREGATION_WITH_MILLIS_HISTOGRAM =
Distribution.create(BucketBoundaries.create(RPC_MILLIS_BUCKET_BOUNDARIES));
static final View SPANNER_GFE_LATENCY_VIEW =
View.create(
View.Name.create("cloud.google.com/java/spanner/gfe_latency"),
"Latency between Google's network receiving an RPC and reading back the first byte of the response",
SPANNER_GFE_LATENCY,
AGGREGATION_WITH_MILLIS_HISTOGRAM,
ImmutableList.of(METHOD, PROJECT_ID, INSTANCE_ID, DATABASE_ID));

private static final Aggregation SUM = Sum.create();
static final View SPANNER_GFE_HEADER_MISSING_COUNT_VIEW =
View.create(
View.Name.create("cloud.google.com/java/spanner/gfe_header_missing_count"),
"Number of RPC responses received without the server-timing header, most likely means that the RPC never reached Google's network",
SPANNER_GFE_HEADER_MISSING_COUNT,
SUM,
ImmutableList.of(METHOD, PROJECT_ID, INSTANCE_ID, DATABASE_ID));

public static ViewManager viewManager = Stats.getViewManager();

/**
* Register views for GFE metrics, including gfe_latency and gfe_header_missing_count. gfe_latency
* measures the latency between Google's network receives an RPC and reads back the first byte of
* the response. gfe_header_missing_count is a counter of the number of RPC responses without a
* server-timing header.
*/
@VisibleForTesting
public static void registerGfeLatencyAndHeaderMissingCountViews() {
viewManager.registerView(SPANNER_GFE_LATENCY_VIEW);
viewManager.registerView(SPANNER_GFE_HEADER_MISSING_COUNT_VIEW);
}

@VisibleForTesting
public static void registerGfeLatencyAndHeaderMissingCountViews(ViewManager viewManager) {
viewManager.registerView(SPANNER_GFE_LATENCY_VIEW);
viewManager.registerView(SPANNER_GFE_HEADER_MISSING_COUNT_VIEW);
}

/**
* Register GFE Latency view. gfe_latency measures the latency between Google's network receives
* an RPC and reads back the first byte of the response.
*/
@VisibleForTesting
public static void registerGfeLatencyView() {
viewManager.registerView(SPANNER_GFE_LATENCY_VIEW);
}

/**
* Register GFE Header Missing Count view. gfe_header_missing_count is a counter of the number of
* RPC responses without a server-timing header.
*/
@VisibleForTesting
public static void registerGfeHeaderMissingCountView() {
viewManager.registerView(SPANNER_GFE_HEADER_MISSING_COUNT_VIEW);
}
}