Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: improve error detection and reporting for BlobWriteChannel retry state #846

Merged
merged 1 commit into from Jun 1, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -26,6 +26,7 @@
import com.google.cloud.WriteChannel;
import com.google.cloud.storage.spi.v1.StorageRpc;
import com.google.common.collect.Maps;
import java.math.BigInteger;
import java.net.URL;
import java.util.Map;
import java.util.concurrent.Callable;
Expand Down Expand Up @@ -83,14 +84,52 @@ private StorageObject getRemoteStorageObject() {
.get(getEntity().toPb(), Maps.newEnumMap(StorageRpc.Option.class));
}

private StorageException unrecoverableState(
int chunkOffset, int chunkLength, long localPosition, long remotePosition, boolean last) {
private static StorageException unrecoverableState(
String uploadId,
int chunkOffset,
int chunkLength,
long localPosition,
long remotePosition,
boolean last) {
return unrecoverableState(
uploadId,
chunkOffset,
chunkLength,
localPosition,
remotePosition,
last,
"Unable to recover in upload.\nThis may be a symptom of multiple clients uploading to the same upload session.");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think multiple clients uploading to the same session is possible? The session should be created by the client and then only used by that client AFAIK.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@frankyn Do you have context on why this was the original message? (I refactored it to a parameter so I could reuse the details formatting code).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's possible if the upload URI is shared between clients. GCS API supports it, however it's very unlikely it's a possible case.

}

private static StorageException errorResolvingMetadataLastChunk(
String uploadId,
int chunkOffset,
int chunkLength,
long localPosition,
long remotePosition,
boolean last) {
return unrecoverableState(
uploadId,
chunkOffset,
chunkLength,
localPosition,
remotePosition,
last,
"Unable to load object metadata to determine if last chunk was successfully written");
}

private static StorageException unrecoverableState(
String uploadId,
int chunkOffset,
int chunkLength,
long localPosition,
long remotePosition,
boolean last,
String message) {
StringBuilder sb = new StringBuilder();
sb.append("Unable to recover in upload.\n");
sb.append(
"This may be a symptom of multiple clients uploading to the same upload session.\n\n");
sb.append(message).append("\n\n");
sb.append("For debugging purposes:\n");
sb.append("uploadId: ").append(getUploadId()).append('\n');
sb.append("uploadId: ").append(uploadId).append('\n');
sb.append("chunkOffset: ").append(chunkOffset).append('\n');
sb.append("chunkLength: ").append(chunkLength).append('\n');
sb.append("localOffset: ").append(localPosition).append('\n');
Expand Down Expand Up @@ -162,7 +201,7 @@ public void run() {
// Get remote offset from API
final long localPosition = getPosition();
// For each request it should be possible to retry from its location in this code
final long remotePosition = isRetrying() ? getRemotePosition() : getPosition();
final long remotePosition = isRetrying() ? getRemotePosition() : localPosition;
final int chunkOffset = (int) (remotePosition - localPosition);
final int chunkLength = length - chunkOffset;
final boolean uploadAlreadyComplete = remotePosition == -1;
Expand All @@ -176,10 +215,38 @@ public void run() {
if (storageObject == null) {
storageObject = getRemoteStorageObject();
}
// the following checks are defined here explicitly to provide a more
// informative if either storageObject is unable to be resolved or it's size is
// unable to be determined. This scenario is a very rare case of failure that
// can arise when packets are lost.
if (storageObject == null) {
throw errorResolvingMetadataLastChunk(
getUploadId(),
chunkOffset,
chunkLength,
localPosition,
remotePosition,
lastChunk);
}
// Verify that with the final chunk we match the blob length
if (storageObject.getSize().longValue() != getPosition() + length) {
BigInteger size = storageObject.getSize();
if (size == null) {
throw errorResolvingMetadataLastChunk(
getUploadId(),
chunkOffset,
chunkLength,
localPosition,
remotePosition,
lastChunk);
}
if (size.longValue() != getPosition() + length) {
throw unrecoverableState(
chunkOffset, chunkLength, localPosition, remotePosition, lastChunk);
getUploadId(),
chunkOffset,
chunkLength,
localPosition,
remotePosition,
lastChunk);
}
retrying = false;
} else if (uploadAlreadyComplete && !lastChunk && !checkingForLastChunk) {
Expand All @@ -201,7 +268,12 @@ public void run() {
} else {
// Case 4 && Case 8 && Case 9
throw unrecoverableState(
chunkOffset, chunkLength, localPosition, remotePosition, lastChunk);
getUploadId(),
chunkOffset,
chunkLength,
localPosition,
remotePosition,
lastChunk);
}
}
}),
Expand Down