diff --git a/gax/src/main/java/com/google/api/gax/retrying/ExponentialRetryAlgorithm.java b/gax/src/main/java/com/google/api/gax/retrying/ExponentialRetryAlgorithm.java index ae53ade27..ecc91bb9e 100644 --- a/gax/src/main/java/com/google/api/gax/retrying/ExponentialRetryAlgorithm.java +++ b/gax/src/main/java/com/google/api/gax/retrying/ExponentialRetryAlgorithm.java @@ -100,19 +100,37 @@ public TimedAttemptSettings createNextAttempt(TimedAttemptSettings prevSettings) (long) (settings.getRetryDelayMultiplier() * prevSettings.getRetryDelay().toMillis()); newRetryDelay = Math.min(newRetryDelay, settings.getMaxRetryDelay().toMillis()); } + Duration randomDelay = Duration.ofMillis(nextRandomLong(newRetryDelay)); // The rpc timeout is determined as follows: // attempt #0 - use the initialRpcTimeout; - // attempt #1+ - use the calculated value. + // attempt #1+ - use the calculated value, or the time remaining in totalTimeout if the + // calculated value would exceed the totalTimeout. long newRpcTimeout = (long) (settings.getRpcTimeoutMultiplier() * prevSettings.getRpcTimeout().toMillis()); newRpcTimeout = Math.min(newRpcTimeout, settings.getMaxRpcTimeout().toMillis()); + // The totalTimeout could be zero if a callable is only using maxAttempts to limit retries. + // If set, calculate time remaining in the totalTimeout since the start, taking into account the + // next attempt's delay, in order to truncate the RPC timeout should it exceed the totalTimeout. + if (!settings.getTotalTimeout().isZero()) { + Duration timeElapsed = + Duration.ofNanos(clock.nanoTime()) + .minus(Duration.ofNanos(prevSettings.getFirstAttemptStartTimeNanos())); + Duration timeLeft = globalSettings.getTotalTimeout().minus(timeElapsed).minus(randomDelay); + + // If timeLeft at this point is < 0, the shouldRetry logic will prevent + // the attempt from being made as it would exceed the totalTimeout. A negative RPC timeout + // will result in a deadline in the past, which should will always fail prior to making a + // network call. + newRpcTimeout = Math.min(newRpcTimeout, timeLeft.toMillis()); + } + return TimedAttemptSettings.newBuilder() .setGlobalSettings(prevSettings.getGlobalSettings()) .setRetryDelay(Duration.ofMillis(newRetryDelay)) .setRpcTimeout(Duration.ofMillis(newRpcTimeout)) - .setRandomizedRetryDelay(Duration.ofMillis(nextRandomLong(newRetryDelay))) + .setRandomizedRetryDelay(randomDelay) .setAttemptCount(prevSettings.getAttemptCount() + 1) .setOverallAttemptCount(prevSettings.getOverallAttemptCount() + 1) .setFirstAttemptStartTimeNanos(prevSettings.getFirstAttemptStartTimeNanos()) @@ -144,7 +162,16 @@ public boolean shouldRetry(TimedAttemptSettings nextAttemptSettings) { - nextAttemptSettings.getFirstAttemptStartTimeNanos() + nextAttemptSettings.getRandomizedRetryDelay().toNanos(); - // If totalTimeout limit is defined, check that it hasn't been crossed + // If totalTimeout limit is defined, check that it hasn't been crossed. + // + // Note: if the potential time spent is exactly equal to the totalTimeout, + // the attempt will still be allowed. This might not be desired, but if we + // enforce it, it could have potentially negative side effects on LRO polling. + // Specifically, if a polling retry attempt is denied, the LRO is canceled, and + // if a polling retry attempt is denied because its delay would *reach* the + // totalTimeout, the LRO would be canceled prematurely. The problem here is that + // totalTimeout doubles as the polling threshold and also the time limit for an + // operation to finish. if (totalTimeout > 0 && totalTimeSpentNanos > totalTimeout) { return false; } diff --git a/gax/src/test/java/com/google/api/gax/retrying/ExponentialRetryAlgorithmTest.java b/gax/src/test/java/com/google/api/gax/retrying/ExponentialRetryAlgorithmTest.java index 81d536ef5..da804a1b5 100644 --- a/gax/src/test/java/com/google/api/gax/retrying/ExponentialRetryAlgorithmTest.java +++ b/gax/src/test/java/com/google/api/gax/retrying/ExponentialRetryAlgorithmTest.java @@ -29,6 +29,7 @@ */ package com.google.api.gax.retrying; +import static com.google.common.truth.Truth.assertThat; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -89,6 +90,25 @@ public void testCreateNextAttempt() { assertEquals(Duration.ofMillis(4L), thirdAttempt.getRpcTimeout()); } + @Test + public void testTruncateToTotalTimeout() { + RetrySettings timeoutSettings = + retrySettings + .toBuilder() + .setInitialRpcTimeout(Duration.ofSeconds(4L)) + .setMaxRpcTimeout(Duration.ofSeconds(4L)) + .setTotalTimeout(Duration.ofSeconds(4L)) + .build(); + ExponentialRetryAlgorithm timeoutAlg = new ExponentialRetryAlgorithm(timeoutSettings, clock); + + TimedAttemptSettings firstAttempt = timeoutAlg.createFirstAttempt(); + TimedAttemptSettings secondAttempt = timeoutAlg.createNextAttempt(firstAttempt); + assertThat(firstAttempt.getRpcTimeout()).isGreaterThan(secondAttempt.getRpcTimeout()); + + TimedAttemptSettings thirdAttempt = timeoutAlg.createNextAttempt(secondAttempt); + assertThat(secondAttempt.getRpcTimeout()).isGreaterThan(thirdAttempt.getRpcTimeout()); + } + @Test public void testShouldRetryTrue() { TimedAttemptSettings attempt = algorithm.createFirstAttempt(); diff --git a/gax/src/test/java/com/google/api/gax/rpc/OperationCallableImplTest.java b/gax/src/test/java/com/google/api/gax/rpc/OperationCallableImplTest.java index 40d58a99f..edcd4908a 100644 --- a/gax/src/test/java/com/google/api/gax/rpc/OperationCallableImplTest.java +++ b/gax/src/test/java/com/google/api/gax/rpc/OperationCallableImplTest.java @@ -94,11 +94,14 @@ public class OperationCallableImplTest { .setInitialRetryDelay(Duration.ofMillis(1L)) .setRetryDelayMultiplier(1) .setMaxRetryDelay(Duration.ofMillis(1L)) - .setInitialRpcTimeout(Duration.ZERO) // supposed to be ignored + .setInitialRpcTimeout( + Duration.ZERO) // supposed to be ignored, but are not actually, so we set to zero .setMaxAttempts(0) .setJittered(false) - .setRpcTimeoutMultiplier(1) // supposed to be ignored - .setMaxRpcTimeout(Duration.ZERO) // supposed to be ignored + .setRpcTimeoutMultiplier( + 1) // supposed to be ignored, but are not actually, so we set to one + .setMaxRpcTimeout( + Duration.ZERO) // supposed to be ignored, but are not actually, so we set to zero .setTotalTimeout(Duration.ofMillis(5L)) .build(); @@ -475,9 +478,16 @@ public void testFutureCallPollRPCTimeout() throws Exception { OperationTimedPollAlgorithm.create( FAST_RECHECKING_SETTINGS .toBuilder() + // Update the polling algorithm to set per-RPC timeouts instead of the default zero. + // + // This is non-standard, as these fields have been documented as "should be ignored" + // for LRO polling. They are not actually ignored in code, so they changing them + // here has an actual affect. This test verifies that they work as such, but in + // practice generated clients set the RPC timeouts to 0 to be ignored. .setInitialRpcTimeout(Duration.ofMillis(100)) .setMaxRpcTimeout(Duration.ofSeconds(1)) .setRpcTimeoutMultiplier(2) + .setTotalTimeout(Duration.ofSeconds(5L)) .build(), clock); callSettings = callSettings.toBuilder().setPollingAlgorithm(pollingAlgorithm).build();