Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

retry on get_object_size #2836

Merged
merged 1 commit into from Apr 27, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
38 changes: 36 additions & 2 deletions deeplake/core/storage/s3.py
Expand Up @@ -635,11 +635,45 @@ def get_presigned_url(self, key, full=False):
self._presigned_urls[path] = (url, time.time())
return url

def _get_object_size(self, path: str) -> int:
obj = self.resource.Object(self.bucket, path)
return obj.content_length

def get_object_size(self, path: str) -> int:
self._check_update_creds()
path = "".join((self.path, path))
obj = self.resource.Object(self.bucket, path)
return obj.content_length

try:
return self._get_object_size(path)
except botocore.exceptions.ClientError as err:
if err.response["Error"]["Code"] == "NoSuchKey":
raise KeyError(err) from err
if err.response["Error"]["Code"] == "InvalidAccessKeyId":
new_error_cls: Type[S3GetError] = S3GetAccessError
else:
new_error_cls = S3GetError
with S3ResetReloadCredentialsManager(self, new_error_cls):
return self._get_object_size(path)
except CONNECTION_ERRORS as err:
tries = self.num_tries
retry_wait = 0
for i in range(1, tries + 1):
always_warn(f"Encountered connection error, retry {i} out of {tries}")
retry_wait = self._retry_wait_and_extend(retry_wait, err)

try:
ret = self._get_object_size(path)
always_warn(
f"Connection re-established after {i} {['retries', 'retry'][i==1]}."
)
return ret
except Exception:
pass
raise S3GetError(err) from err
except botocore.exceptions.NoCredentialsError as err:
raise S3GetAccessError from err
except Exception as err:
raise S3GetError(err) from err

def get_object_from_full_url(self, url: str):
root = url.replace("s3://", "")
Expand Down