Skip to content

Commit

Permalink
Merge pull request #2849 from activeloopai/s3_custom_config
Browse files Browse the repository at this point in the history
allow custom configs for s3
  • Loading branch information
activesoull committed May 9, 2024
2 parents a398a6f + 4364aca commit 2f22e5f
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
17 changes: 9 additions & 8 deletions deeplake/core/storage/s3.py
Expand Up @@ -6,7 +6,7 @@
import botocore # type: ignore
import posixpath
import ssl
from typing import Dict, Optional, Tuple, Type
from typing import Dict, Optional, Tuple, Type, Any
from datetime import datetime, timezone
from botocore.session import ComponentLocator
from deeplake.client.client import DeepLakeBackendClient
Expand Down Expand Up @@ -96,6 +96,7 @@ def __init__(
aws_region: Optional[str] = None,
profile_name: Optional[str] = None,
token: Optional[str] = None,
config: Optional[Any] = None,
**kwargs,
):
"""Initializes the S3Provider
Expand All @@ -118,6 +119,7 @@ def __init__(
profile_name (str, optional): Specifies the AWS profile name to use.
token (str, optional): Activeloop token, used for fetching credentials for Deep Lake datasets (if this is underlying storage for Deep Lake dataset).
This is optional, tokens are normally autogenerated.
config (Any, Optional): s3 client configuration provided by the user. Defaults to None.
**kwargs: Additional arguments to pass to the S3 client. Includes: ``expiration``.
"""
self.root = root
Expand All @@ -132,7 +134,7 @@ def __init__(
self.tag: Optional[str] = None
self.token: Optional[str] = token
self.loaded_creds_from_environment = False
self.client_config = deeplake.config["s3"]
self.client_config = config
self.start_time = time.time()
self.profile_name = profile_name
self._initialize_s3_parameters()
Expand Down Expand Up @@ -234,17 +236,16 @@ def __getitem__(self, path):
def _get_bytes(
self, path, start_byte: Optional[int] = None, end_byte: Optional[int] = None
):
range_kwarg = {}
if start_byte is not None and end_byte is not None:
if start_byte == end_byte:
return b""
range = f"bytes={start_byte}-{end_byte - 1}"
range_kwarg["Range"] = f"bytes={start_byte}-{end_byte - 1}"
elif start_byte is not None:
range = f"bytes={start_byte}-"
range_kwarg["Range"] = f"bytes={start_byte}-"
elif end_byte is not None:
range = f"bytes=0-{end_byte - 1}"
else:
range = ""
resp = self.client.get_object(Bucket=self.bucket, Key=path, Range=range)
range_kwarg["Range"] = f"bytes=0-{end_byte - 1}"
resp = self.client.get_object(Bucket=self.bucket, Key=path, **range_kwarg)
return resp["Body"].read()

def get_bytes(
Expand Down
2 changes: 2 additions & 0 deletions deeplake/util/storage.py
Expand Up @@ -84,6 +84,7 @@ def storage_provider_from_path(
session_token = creds.get("aws_session_token")
endpoint_url = creds.get("endpoint_url")
region = creds.get("aws_region") or creds.get("region")
config = creds.get("config", None) or deeplake.config["s3"]
profile = creds.get("profile_name")
storage = S3Provider(
path,
Expand All @@ -94,6 +95,7 @@ def storage_provider_from_path(
region,
profile_name=profile,
token=token,
config=config,
)
storage.creds_used = creds_used
else:
Expand Down

0 comments on commit 2f22e5f

Please sign in to comment.