New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: GFE Latency #1473
feat: GFE Latency #1473
Changes from 6 commits
393951f
3b1fcdd
d773f71
ee3e9da
f166a52
de6cc25
b112c32
412855d
eb4c17c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
/* | ||
* Copyright 2021 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.google.cloud.spanner.spi.v1; | ||
|
||
import static com.google.cloud.spanner.spi.v1.SpannerRpcViews.*; | ||
|
||
import com.google.cloud.spanner.DatabaseId; | ||
import io.grpc.*; | ||
import io.grpc.ForwardingClientCall.SimpleForwardingClientCall; | ||
import io.grpc.ForwardingClientCallListener.SimpleForwardingClientCallListener; | ||
import io.opencensus.stats.*; | ||
import io.opencensus.tags.*; | ||
import java.util.logging.Level; | ||
import java.util.logging.Logger; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* Intercepts all gRPC calls to extract server-timing header. Captures GFE Latency and GFE Header | ||
* Missing count metrics. | ||
*/ | ||
class HeaderInterceptor implements ClientInterceptor { | ||
|
||
private static final Metadata.Key<String> SERVER_TIMING_HEADER_KEY = | ||
Metadata.Key.of("server-timing", Metadata.ASCII_STRING_MARSHALLER); | ||
private static final Pattern SERVER_TIMING_HEADER_PATTERN = Pattern.compile(".*dur=(?<dur>\\d+)"); | ||
private static final Metadata.Key<String> GOOGLE_CLOUD_RESOURCE_PREFIX_KEY = | ||
Metadata.Key.of("google-cloud-resource-prefix", Metadata.ASCII_STRING_MARSHALLER); | ||
private static final Pattern GOOGLE_CLOUD_RESOURCE_PREFIX_PATTERN = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we still need this? |
||
Pattern.compile( | ||
".*projects/(?<project>\\w\\p{ASCII}+)/instances/(?<instance>\\w\\p{ASCII}+)/databases/(?<database>\\w\\p{ASCII}+)"); | ||
|
||
// Get the global singleton Tagger object. | ||
private static final Tagger tagger = Tags.getTagger(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use snake upper case for constants ( |
||
|
||
private static final Logger logger = Logger.getLogger(HeaderInterceptor.class.getName()); | ||
private static final Level level = Level.INFO; | ||
|
||
HeaderInterceptor() {} | ||
|
||
@Override | ||
public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall( | ||
MethodDescriptor<ReqT, RespT> method, CallOptions callOptions, Channel next) { | ||
return new SimpleForwardingClientCall<ReqT, RespT>(next.newCall(method, callOptions)) { | ||
@Override | ||
public void start(Listener<RespT> responseListener, Metadata headers) { | ||
TagContext tagContext = getTagContext(headers, method.getFullMethodName()); | ||
super.start( | ||
new SimpleForwardingClientCallListener<RespT>(responseListener) { | ||
@Override | ||
public void onHeaders(Metadata metadata) { | ||
processHeader(metadata, tagContext); | ||
super.onHeaders(metadata); | ||
} | ||
}, | ||
headers); | ||
} | ||
}; | ||
} | ||
|
||
private void processHeader(Metadata metadata, TagContext tagContext) { | ||
MeasureMap measureMap = STATS_RECORDER.newMeasureMap(); | ||
if (metadata.get(SERVER_TIMING_HEADER_KEY) != null) { | ||
String serverTiming = metadata.get(SERVER_TIMING_HEADER_KEY); | ||
Matcher matcher = SERVER_TIMING_HEADER_PATTERN.matcher(serverTiming); | ||
if (matcher.find()) { | ||
try { | ||
long latency = Long.parseLong(matcher.group("dur")); | ||
measureMap.put(SPANNER_GFE_LATENCY, latency).record(tagContext); | ||
measureMap.put(SPANNER_GFE_HEADER_MISSING_COUNT, 0L).record(tagContext); | ||
} catch (NumberFormatException e) { | ||
logger.log(level, "invalid server-timing object in header"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add the matcher.group("dur") in the log message as well |
||
} | ||
} | ||
} else { | ||
measureMap.put(SPANNER_GFE_HEADER_MISSING_COUNT, 1L).record(tagContext); | ||
} | ||
} | ||
|
||
private TagContext getTagContext( | ||
String method, String projectId, String instanceId, String databaseId) { | ||
return tagger | ||
.currentBuilder() | ||
.putLocal(SpannerRpcViews.PROJECT_ID, TagValue.create(projectId)) | ||
.putLocal(INSTANCE_ID, TagValue.create(instanceId)) | ||
.putLocal(DATABASE_ID, TagValue.create(databaseId)) | ||
.putLocal(SpannerRpcViews.METHOD, TagValue.create(method)) | ||
.build(); | ||
} | ||
|
||
private TagContext getTagContext(String method) { | ||
return tagger | ||
.currentBuilder() | ||
.putLocal(PROJECT_ID, TagValue.create("undefined-project")) | ||
.putLocal(INSTANCE_ID, TagValue.create("undefined-instance")) | ||
.putLocal(DATABASE_ID, TagValue.create("undefined-database")) | ||
.putLocal(SpannerRpcViews.METHOD, TagValue.create(method)) | ||
.build(); | ||
} | ||
|
||
private TagContext getTagContext(Metadata headers, String method) { | ||
if (headers.get(GOOGLE_CLOUD_RESOURCE_PREFIX_KEY) != null) { | ||
String googleResourcePrefix = headers.get(GOOGLE_CLOUD_RESOURCE_PREFIX_KEY); | ||
try { | ||
DatabaseId database = DatabaseId.of(googleResourcePrefix); | ||
String databaseId = database.getDatabase(); | ||
String instanceId = database.getInstanceId().getInstance(); | ||
String projectId = database.getInstanceId().getProject(); | ||
return getTagContext(method, projectId, instanceId, databaseId); | ||
} catch (IllegalArgumentException e) { | ||
logger.log(level, e.toString()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you log the whole exception with a message as well? Maybe |
||
} | ||
} | ||
return getTagContext(method); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
/* | ||
* Copyright 2021 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.google.cloud.spanner.spi.v1; | ||
|
||
import com.google.common.annotations.VisibleForTesting; | ||
import com.google.common.collect.ImmutableList; | ||
import io.opencensus.stats.*; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please don't use |
||
import io.opencensus.stats.Aggregation.Distribution; | ||
import io.opencensus.stats.Aggregation.Sum; | ||
import io.opencensus.stats.Measure.MeasureLong; | ||
import io.opencensus.tags.TagKey; | ||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.List; | ||
|
||
@VisibleForTesting | ||
public class SpannerRpcViews { | ||
|
||
/** Unit to represent milliseconds. */ | ||
private static final String MILLISECOND = "ms"; | ||
/** Unit to represent counts. */ | ||
private static final String COUNT = "1"; | ||
|
||
/** TagKeys */ | ||
public static final TagKey METHOD = TagKey.create("method"); | ||
|
||
public static final TagKey PROJECT_ID = TagKey.create("project_id"); | ||
public static final TagKey INSTANCE_ID = TagKey.create("instance_id"); | ||
public static final TagKey DATABASE_ID = TagKey.create("database"); | ||
|
||
public static final StatsRecorder STATS_RECORDER = Stats.getStatsRecorder(); | ||
/** GFE t4t7 latency extracted from server-timing header. */ | ||
public static final MeasureLong SPANNER_GFE_LATENCY = | ||
MeasureLong.create( | ||
"cloud.google.com/java/spanner/gfe_latency", | ||
"Latency between Google's network receiving an RPC and reading back the first byte of the response", | ||
MILLISECOND); | ||
/** Number of responses without the server-timing header. */ | ||
public static final MeasureLong SPANNER_GFE_HEADER_MISSING_COUNT = | ||
MeasureLong.create( | ||
"cloud.google.com/java/spanner/gfe_header_missing_count", | ||
"Number of RPC responses received without the server-timing header, most likely means that the RPC never reached Google's network", | ||
COUNT); | ||
|
||
static final List<Double> RPC_MILLIS_BUCKET_BOUNDARIES = | ||
Collections.unmodifiableList( | ||
Arrays.asList( | ||
0.0, 0.01, 0.05, 0.1, 0.3, 0.6, 0.8, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 13.0, | ||
16.0, 20.0, 25.0, 30.0, 40.0, 50.0, 65.0, 80.0, 100.0, 130.0, 160.0, 200.0, 250.0, | ||
300.0, 400.0, 500.0, 650.0, 800.0, 1000.0, 2000.0, 5000.0, 10000.0, 20000.0, 50000.0, | ||
100000.0)); | ||
static final Aggregation AGGREGATION_WITH_MILLIS_HISTOGRAM = | ||
Distribution.create(BucketBoundaries.create(RPC_MILLIS_BUCKET_BOUNDARIES)); | ||
static final View SPANNER_GFE_LATENCY_VIEW = | ||
View.create( | ||
View.Name.create("cloud.google.com/java/spanner/gfe_latency"), | ||
"Latency between Google's network receiving an RPC and reading back the first byte of the response", | ||
SPANNER_GFE_LATENCY, | ||
AGGREGATION_WITH_MILLIS_HISTOGRAM, | ||
ImmutableList.of(METHOD, PROJECT_ID, INSTANCE_ID, DATABASE_ID)); | ||
|
||
private static final Aggregation SUM = Sum.create(); | ||
static final View SPANNER_GFE_HEADER_MISSING_COUNT_VIEW = | ||
View.create( | ||
View.Name.create("cloud.google.com/java/spanner/gfe_header_missing_count"), | ||
"Number of RPC responses received without the server-timing header, most likely means that the RPC never reached Google's network", | ||
SPANNER_GFE_HEADER_MISSING_COUNT, | ||
SUM, | ||
ImmutableList.of(METHOD, PROJECT_ID, INSTANCE_ID, DATABASE_ID)); | ||
|
||
public static ViewManager viewManager = Stats.getViewManager(); | ||
|
||
/** | ||
* Register views for GFE metrics, including gfe_latency and gfe_header_missing_count. gfe_latency | ||
* measures the latency between Google's network receives an RPC and reads back the first byte of | ||
* the response. gfe_header_missing_count is a counter of the number of RPC responses without a | ||
* server-timing header. | ||
*/ | ||
@VisibleForTesting | ||
public static void registerGfeLatencyAndHeaderMissingCountViews() { | ||
viewManager.registerView(SPANNER_GFE_LATENCY_VIEW); | ||
viewManager.registerView(SPANNER_GFE_HEADER_MISSING_COUNT_VIEW); | ||
} | ||
|
||
@VisibleForTesting | ||
public static void registerGfeLatencyAndHeaderMissingCountViews(ViewManager viewManager) { | ||
viewManager.registerView(SPANNER_GFE_LATENCY_VIEW); | ||
viewManager.registerView(SPANNER_GFE_HEADER_MISSING_COUNT_VIEW); | ||
} | ||
|
||
/** | ||
* Register GFE Latency view. gfe_latency measures the latency between Google's network receives | ||
* an RPC and reads back the first byte of the response. | ||
*/ | ||
@VisibleForTesting | ||
public static void registerGfeLatencyView() { | ||
viewManager.registerView(SPANNER_GFE_LATENCY_VIEW); | ||
} | ||
|
||
/** | ||
* Register GFE Header Missing Count view. gfe_header_missing_count is a counter of the number of | ||
* RPC responses without a server-timing header. | ||
*/ | ||
@VisibleForTesting | ||
public static void registerGfeHeaderMissingCountView() { | ||
viewManager.registerView(SPANNER_GFE_HEADER_MISSING_COUNT_VIEW); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please don't use
*
imports (https://google.github.io/styleguide/javaguide.html#s3.3.1-wildcard-imports)