Skip to content

Commit

Permalink
Merge pull request #2579 from activeloopai/add_offset
Browse files Browse the repository at this point in the history
Added .offset parameter to enterprise dataloader
  • Loading branch information
levongh committed Sep 6, 2023
2 parents cd773dc + 7ff62dd commit 1169406
Show file tree
Hide file tree
Showing 22 changed files with 238 additions and 171 deletions.
5 changes: 4 additions & 1 deletion deeplake/__init__.py
Expand Up @@ -69,8 +69,11 @@
"deepcopy",
"like",
"list",
"ingest",
"ingest_classification",
"ingest_coco",
"ingest_yolo",
"ingest_kaggle",
"ingest_dataframe",
"ingest_huggingface",
"compressions",
"htypes",
Expand Down
6 changes: 3 additions & 3 deletions deeplake/api/tests/test_api.py
Expand Up @@ -90,7 +90,6 @@ def test_persist(ds_generator):

ds2 = ds_generator()

ds2.storage["dataset_meta.json"] == ds_new.storage["dataset_meta.json"]
assert len(ds2) == 4
assert_array_equal(ds2.label.numpy(), np.array([[1], [2], [3], [4]]))

Expand Down Expand Up @@ -983,7 +982,6 @@ def test_dataset_deepcopy(path, hub_token, num_workers, progressbar):
dest_path = "_".join((path, "dest1"))

src_ds = deeplake.empty(src_path, overwrite=True, token=hub_token)
# dest_ds = deeplake.empty(dest_path, overwrite=True, token=hub_token)

with src_ds:
src_ds.info.update(key=0)
Expand Down Expand Up @@ -1922,7 +1920,9 @@ def test_dataset_copy(
[
("local_ds_generator", "local_path", "hub_cloud_dev_token"),
pytest.param(
"s3_ds_generator", "s3_path", "hub_cloud_dev_token",
"s3_ds_generator",
"s3_path",
"hub_cloud_dev_token",
marks=pytest.mark.slow,
),
pytest.param(
Expand Down
2 changes: 0 additions & 2 deletions deeplake/api/tests/test_info.py
Expand Up @@ -130,8 +130,6 @@ def test_update_reference_manually(local_ds_generator):
l.append(99)
ds.info.update()

ds = local_ds_generator()

assert l == [1, 2, 3, 99]


Expand Down
2 changes: 0 additions & 2 deletions deeplake/api/tests/test_meta.py
Expand Up @@ -35,8 +35,6 @@ def test_subsequent_updates(local_ds_generator):
assert len(ds) == 10
assert ds.tensor.shape == (10, 100, 100)

ds = local_ds_generator()

with local_ds_generator() as ds:
for _ in range(5):
ds.tensor.append(np.ones((100, 200)))
Expand Down
14 changes: 7 additions & 7 deletions deeplake/api/tests/test_reset.py
Expand Up @@ -54,10 +54,10 @@ def test_load_corrupt_dataset(path):
save_head = ds.pending_commit_id

with pytest.raises(DatasetCorruptError):
ds = deeplake.load(path, access_method=access_method)
deeplake.load(path, access_method=access_method)

with pytest.raises(ReadOnlyModeError):
ds = deeplake.load(
deeplake.load(
path, read_only=True, access_method=access_method, reset=True
)

Expand Down Expand Up @@ -116,7 +116,7 @@ def test_load_corrupted_branch(local_path):
save_head = ds.pending_commit_id

with pytest.raises(DatasetCorruptError):
ds = deeplake.load(f"{local_path}@alt")
deeplake.load(f"{local_path}@alt")

ds = deeplake.load(f"{local_path}@alt", reset=True)
verify_reset_on_checkout(ds, "alt", main_2, save_head, {"abc": [[1], [2]]})
Expand All @@ -131,10 +131,10 @@ def test_load_corrupted_branch(local_path):
save_head = ds.pending_commit_id

with pytest.raises(DatasetCorruptError):
ds = deeplake.load(f"{local_path}@alt")
deeplake.load(f"{local_path}@alt")

with pytest.raises(DatasetCorruptError):
ds = deeplake.load(f"{local_path}@{save_head}")
deeplake.load(f"{local_path}@{save_head}")

ds = deeplake.load(f"{local_path}@alt", reset=True)
verify_reset_on_checkout(ds, "alt", alt_2, save_head, {"abc": [[1], [2], [3], [4]]})
Expand Down Expand Up @@ -200,10 +200,10 @@ def test_load_corrupt_dataset_with_no_commits(local_path):
corrupt_ds(ds, "abc", 1)

with pytest.raises(DatasetCorruptError):
ds = deeplake.load(local_path)
deeplake.load(local_path)

with pytest.raises(ReadOnlyModeError):
ds = deeplake.load(local_path, read_only=True, reset=True)
deeplake.load(local_path, read_only=True, reset=True)

ds = deeplake.load(local_path, reset=True)

Expand Down
5 changes: 2 additions & 3 deletions deeplake/api/tests/test_update_samples.py
Expand Up @@ -51,9 +51,8 @@ def _make_update_assert_equal(
# this is necessary because `expected` uses `aslist=True` to handle dynamic cases.
# with `aslist=False`, this wouldn't be necessary.
expected_value = value
if hasattr(value, "__len__"):
if len(value) == 1:
expected_value = value[0]
if hasattr(value, "__len__") and len(value) == 1:
expected_value = value[0]

# make updates
tensor[index] = value
Expand Down
4 changes: 2 additions & 2 deletions deeplake/api/tests/test_video.py
Expand Up @@ -111,7 +111,7 @@ def test_video_timestamps(vstream_path, hub_token):
ds = deeplake.load(vstream_path, read_only=True, token=hub_token)

with pytest.raises(ValueError):
stamps = ds.mp4_videos[:2].timestamps
ds.mp4_videos[:2].timestamps

stamps = ds.large_video[0, 12000:1199:-100].timestamps

Expand All @@ -131,7 +131,7 @@ def test_video_exception(local_ds):
with local_ds as ds:
ds.create_tensor("abc")
with pytest.raises(Exception):
stamps = ds.abc.timestamps
ds.abc.timestamps


@pytest.mark.skipif(
Expand Down
2 changes: 1 addition & 1 deletion deeplake/auto/structured/dataframe.py
Expand Up @@ -58,7 +58,7 @@ def _get_most_frequent_image_extension(self, fn_iterator):

if len(fn_iterator) == 0:
raise IngestionError(
f"Cannot determine the most frequent image compression because no valid image files were provided."
"Cannot determine the most frequent image compression because no valid image files were provided."
)

supported_image_extensions = tuple(
Expand Down
2 changes: 1 addition & 1 deletion deeplake/cli/test_cli.py
Expand Up @@ -27,5 +27,5 @@ def test_cli_auth(hub_cloud_dev_credentials, hub_cloud_dev_token, method):
def test_bad_token():
runner = CliRunner()

result = runner.invoke(login, f"-t abcd")
result = runner.invoke(login, "-t abcd")
assert isinstance(result.exception, LoginException)
3 changes: 2 additions & 1 deletion deeplake/constants.py
Expand Up @@ -159,7 +159,8 @@
"gcp://",
"gs://",
"az://",
"azure://" "gdrive://",
"azure://",
"gdrive://",
)

_ENABLE_HUB_SUB_DATASETS = False
Expand Down
2 changes: 0 additions & 2 deletions deeplake/core/compression.py
Expand Up @@ -157,8 +157,6 @@ def compress_bytes(
if not buffer:
return b""
if compression == "lz4":
if not buffer:
return b""
return numcodecs.lz4.compress(buffer)
else:
raise SampleCompressionError(
Expand Down
2 changes: 1 addition & 1 deletion deeplake/core/dataset/dataset.py
Expand Up @@ -1941,7 +1941,7 @@ def _send_branch_creation_event(self, *args, **kwargs):
def _send_branch_deletion_event(self, *args, **kwargs):
"""overridden in DeepLakeCloudDataset"""

def _first_load_init(self):
def _first_load_init(self, verbose=True):
"""overridden in DeepLakeCloudDataset"""

@property
Expand Down
6 changes: 3 additions & 3 deletions deeplake/core/tensor.py
Expand Up @@ -1350,7 +1350,7 @@ def dict(self, fetch_chunks: bool = False):
def list(self, fetch_chunks: bool = False):
"""Return list data. Only applicable for tensors with 'list' base htype."""
if self.base_htype != "list":
raise Exception(f"Only supported for list tensors.")
raise Exception("Only supported for list tensors.")

if self.ndim == 1:
return list(self.numpy(fetch_chunks=fetch_chunks))
Expand All @@ -1360,14 +1360,14 @@ def list(self, fetch_chunks: bool = False):
def path(self, fetch_chunks: bool = False):
"""Return path data. Only applicable for linked tensors"""
if not self.is_link:
raise Exception(f"Only supported for linked tensors.")
raise Exception("Only supported for linked tensors.")
assert isinstance(self.chunk_engine, LinkedChunkEngine)
return self.chunk_engine.path(self.index, fetch_chunks=fetch_chunks)

def creds_key(self):
"""Return path data. Only applicable for linked tensors"""
if not self.is_link:
raise Exception(f"Only supported for linked tensors.")
raise Exception("Only supported for linked tensors.")
if self.index.values[0].subscriptable() or len(self.index.values) > 1:
raise ValueError("_linked_sample can be used only on exatcly 1 sample.")
assert isinstance(self.chunk_engine, LinkedChunkEngine)
Expand Down
2 changes: 1 addition & 1 deletion deeplake/core/transform/test_transform.py
Expand Up @@ -138,7 +138,7 @@ def add_image(sample_in, samples_out):

@deeplake.compute
def add_images(i, sample_out):
for i in range(5):
for _ in range(5):
image = deeplake.read(get_dummy_data_path("images/flower.png"))
sample_out.append({"image": image})

Expand Down
2 changes: 1 addition & 1 deletion deeplake/core/transform/transform.py
Expand Up @@ -323,7 +323,7 @@ def my_fn(sample_in: Any, samples_out, my_arg0, my_arg1=0):
index=index,
sample=sample,
samples_processed=samples_processed,
suggest=suggest,
suggest=suggest,
) from e
finally:
reload_and_rechunk(
Expand Down
7 changes: 3 additions & 4 deletions deeplake/enterprise/convert_to_libdeeplake.py
Expand Up @@ -211,8 +211,7 @@ def dataset_to_libdeeplake(hub2_dataset):
commit_id = hub2_dataset.pending_commit_id
libdeeplake_dataset.checkout(commit_id)
slice_ = hub2_dataset.index.values[0].value
if slice_ != slice(None):
if isinstance(slice_, tuple):
slice_ = list(slice_)
libdeeplake_dataset = libdeeplake_dataset[slice_]
if slice_ != slice(None)and isinstance(slice_, tuple):
slice_ = list(slice_)
libdeeplake_dataset = libdeeplake_dataset[slice_]
return libdeeplake_dataset

0 comments on commit 1169406

Please sign in to comment.