Skip to content
This repository has been archived by the owner on Sep 26, 2023. It is now read-only.

Commit

Permalink
fix: truncate RPC timeouts to time remaining in totalTimeout (#1191)
Browse files Browse the repository at this point in the history
* fix: truncate RPC timeouts to time remaining

* comment time left <= 0 rpcTimeout behavior

* comment shouldRetry polling relationship

* add more commentary to polling test for clarity
  • Loading branch information
noahdietz committed Sep 28, 2020
1 parent f3c1d3e commit 1d0c940
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 6 deletions.
Expand Up @@ -100,19 +100,37 @@ public TimedAttemptSettings createNextAttempt(TimedAttemptSettings prevSettings)
(long) (settings.getRetryDelayMultiplier() * prevSettings.getRetryDelay().toMillis());
newRetryDelay = Math.min(newRetryDelay, settings.getMaxRetryDelay().toMillis());
}
Duration randomDelay = Duration.ofMillis(nextRandomLong(newRetryDelay));

// The rpc timeout is determined as follows:
// attempt #0 - use the initialRpcTimeout;
// attempt #1+ - use the calculated value.
// attempt #1+ - use the calculated value, or the time remaining in totalTimeout if the
// calculated value would exceed the totalTimeout.
long newRpcTimeout =
(long) (settings.getRpcTimeoutMultiplier() * prevSettings.getRpcTimeout().toMillis());
newRpcTimeout = Math.min(newRpcTimeout, settings.getMaxRpcTimeout().toMillis());

// The totalTimeout could be zero if a callable is only using maxAttempts to limit retries.
// If set, calculate time remaining in the totalTimeout since the start, taking into account the
// next attempt's delay, in order to truncate the RPC timeout should it exceed the totalTimeout.
if (!settings.getTotalTimeout().isZero()) {
Duration timeElapsed =
Duration.ofNanos(clock.nanoTime())
.minus(Duration.ofNanos(prevSettings.getFirstAttemptStartTimeNanos()));
Duration timeLeft = globalSettings.getTotalTimeout().minus(timeElapsed).minus(randomDelay);

// If timeLeft at this point is < 0, the shouldRetry logic will prevent
// the attempt from being made as it would exceed the totalTimeout. A negative RPC timeout
// will result in a deadline in the past, which should will always fail prior to making a
// network call.
newRpcTimeout = Math.min(newRpcTimeout, timeLeft.toMillis());
}

return TimedAttemptSettings.newBuilder()
.setGlobalSettings(prevSettings.getGlobalSettings())
.setRetryDelay(Duration.ofMillis(newRetryDelay))
.setRpcTimeout(Duration.ofMillis(newRpcTimeout))
.setRandomizedRetryDelay(Duration.ofMillis(nextRandomLong(newRetryDelay)))
.setRandomizedRetryDelay(randomDelay)
.setAttemptCount(prevSettings.getAttemptCount() + 1)
.setOverallAttemptCount(prevSettings.getOverallAttemptCount() + 1)
.setFirstAttemptStartTimeNanos(prevSettings.getFirstAttemptStartTimeNanos())
Expand Down Expand Up @@ -144,7 +162,16 @@ public boolean shouldRetry(TimedAttemptSettings nextAttemptSettings) {
- nextAttemptSettings.getFirstAttemptStartTimeNanos()
+ nextAttemptSettings.getRandomizedRetryDelay().toNanos();

// If totalTimeout limit is defined, check that it hasn't been crossed
// If totalTimeout limit is defined, check that it hasn't been crossed.
//
// Note: if the potential time spent is exactly equal to the totalTimeout,
// the attempt will still be allowed. This might not be desired, but if we
// enforce it, it could have potentially negative side effects on LRO polling.
// Specifically, if a polling retry attempt is denied, the LRO is canceled, and
// if a polling retry attempt is denied because its delay would *reach* the
// totalTimeout, the LRO would be canceled prematurely. The problem here is that
// totalTimeout doubles as the polling threshold and also the time limit for an
// operation to finish.
if (totalTimeout > 0 && totalTimeSpentNanos > totalTimeout) {
return false;
}
Expand Down
Expand Up @@ -29,6 +29,7 @@
*/
package com.google.api.gax.retrying;

import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
Expand Down Expand Up @@ -89,6 +90,25 @@ public void testCreateNextAttempt() {
assertEquals(Duration.ofMillis(4L), thirdAttempt.getRpcTimeout());
}

@Test
public void testTruncateToTotalTimeout() {
RetrySettings timeoutSettings =
retrySettings
.toBuilder()
.setInitialRpcTimeout(Duration.ofSeconds(4L))
.setMaxRpcTimeout(Duration.ofSeconds(4L))
.setTotalTimeout(Duration.ofSeconds(4L))
.build();
ExponentialRetryAlgorithm timeoutAlg = new ExponentialRetryAlgorithm(timeoutSettings, clock);

TimedAttemptSettings firstAttempt = timeoutAlg.createFirstAttempt();
TimedAttemptSettings secondAttempt = timeoutAlg.createNextAttempt(firstAttempt);
assertThat(firstAttempt.getRpcTimeout()).isGreaterThan(secondAttempt.getRpcTimeout());

TimedAttemptSettings thirdAttempt = timeoutAlg.createNextAttempt(secondAttempt);
assertThat(secondAttempt.getRpcTimeout()).isGreaterThan(thirdAttempt.getRpcTimeout());
}

@Test
public void testShouldRetryTrue() {
TimedAttemptSettings attempt = algorithm.createFirstAttempt();
Expand Down
Expand Up @@ -94,11 +94,14 @@ public class OperationCallableImplTest {
.setInitialRetryDelay(Duration.ofMillis(1L))
.setRetryDelayMultiplier(1)
.setMaxRetryDelay(Duration.ofMillis(1L))
.setInitialRpcTimeout(Duration.ZERO) // supposed to be ignored
.setInitialRpcTimeout(
Duration.ZERO) // supposed to be ignored, but are not actually, so we set to zero
.setMaxAttempts(0)
.setJittered(false)
.setRpcTimeoutMultiplier(1) // supposed to be ignored
.setMaxRpcTimeout(Duration.ZERO) // supposed to be ignored
.setRpcTimeoutMultiplier(
1) // supposed to be ignored, but are not actually, so we set to one
.setMaxRpcTimeout(
Duration.ZERO) // supposed to be ignored, but are not actually, so we set to zero
.setTotalTimeout(Duration.ofMillis(5L))
.build();

Expand Down Expand Up @@ -475,9 +478,16 @@ public void testFutureCallPollRPCTimeout() throws Exception {
OperationTimedPollAlgorithm.create(
FAST_RECHECKING_SETTINGS
.toBuilder()
// Update the polling algorithm to set per-RPC timeouts instead of the default zero.
//
// This is non-standard, as these fields have been documented as "should be ignored"
// for LRO polling. They are not actually ignored in code, so they changing them
// here has an actual affect. This test verifies that they work as such, but in
// practice generated clients set the RPC timeouts to 0 to be ignored.
.setInitialRpcTimeout(Duration.ofMillis(100))
.setMaxRpcTimeout(Duration.ofSeconds(1))
.setRpcTimeoutMultiplier(2)
.setTotalTimeout(Duration.ofSeconds(5L))
.build(),
clock);
callSettings = callSettings.toBuilder().setPollingAlgorithm(pollingAlgorithm).build();
Expand Down

0 comments on commit 1d0c940

Please sign in to comment.