Skip to content

Commit

Permalink
DEEP-10: Add check for indra=True, to ensure it's used with read_only…
Browse files Browse the repository at this point in the history
…=True. (#2826)

* Add check for indra=True, to ensure it's used with read_only=True.
* Fixed sample info for indra tensor.
* Bump libdeeplake version.
  • Loading branch information
khustup2 committed Apr 19, 2024
1 parent 3e3990f commit 32f4ff5
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 12 deletions.
22 changes: 12 additions & 10 deletions deeplake/api/dataset.py
Expand Up @@ -81,6 +81,16 @@
from deeplake.core.storage.deeplake_memory_object import DeepLakeMemoryObject


def _check_indra_and_read_only_flags(indra: bool, read_only: Optional[bool]):
if indra == False:
return
if read_only == True:
return
raise ValueError(
"'indra = True' is only available for read_only datasets. Please also specify 'read_only = True'."
)


class dataset:
@staticmethod
@spinner
Expand Down Expand Up @@ -206,6 +216,7 @@ def init(
Note:
Any changes made to the dataset in download / local mode will only be made to the local copy and will not be reflected in the original dataset.
"""
_check_indra_and_read_only_flags(indra, read_only)
access_method, num_workers, scheduler = parse_access_method(access_method)
check_access_method(access_method, overwrite, unlink)

Expand Down Expand Up @@ -383,7 +394,6 @@ def empty(
lock_timeout: Optional[int] = 0,
verbose: bool = True,
index_params: Optional[Dict[str, Union[int, str]]] = None,
indra: bool = USE_INDRA,
) -> Dataset:
"""Creates an empty dataset
Expand All @@ -408,7 +418,6 @@ def empty(
lock_timeout (int): Number of seconds to wait before throwing a LockException. If None, wait indefinitely
lock_enabled (bool): If true, the dataset manages a write lock. NOTE: Only set to False if you are managing concurrent access externally.
index_params: Optional[Dict[str, Union[int, str]]]: Index parameters used while creating vector store, passed down to dataset.
indra (bool): Flag indicating whether indra api should be used to create the dataset. Defaults to false
Returns:
Dataset: Dataset created using the arguments provided.
Expand Down Expand Up @@ -448,7 +457,6 @@ def empty(
token=token,
memory_cache_size=memory_cache_size,
local_cache_size=local_cache_size,
indra=indra,
)

feature_report_path(
Expand Down Expand Up @@ -615,6 +623,7 @@ def load(
Note:
Any changes made to the dataset in download / local mode will only be made to the local copy and will not be reflected in the original dataset.
"""
_check_indra_and_read_only_flags(indra, read_only)
access_method, num_workers, scheduler = parse_access_method(access_method)
check_access_method(access_method, overwrite=False, unlink=unlink)

Expand Down Expand Up @@ -1508,7 +1517,6 @@ def ingest_coco(
num_workers: int = 0,
token: Optional[str] = None,
connect_kwargs: Optional[Dict] = None,
indra: bool = USE_INDRA,
**dataset_kwargs,
) -> Dataset:
"""Ingest images and annotations in COCO format to a Deep Lake Dataset. The source data can be stored locally or in the cloud.
Expand Down Expand Up @@ -1562,7 +1570,6 @@ def ingest_coco(
num_workers (int): The number of workers to use for ingestion. Set to ``0`` by default.
token (Optional[str]): The token to use for accessing the dataset and/or connecting it to Deep Lake.
connect_kwargs (Optional[Dict]): If specified, the dataset will be connected to Deep Lake, and connect_kwargs will be passed to :meth:`Dataset.connect <deeplake.core.dataset.Dataset.connect>`.
indra (bool): Flag indicating whether indra api should be used to create the dataset. Defaults to false
**dataset_kwargs: Any arguments passed here will be forwarded to the dataset creator function. See :func:`deeplake.empty`.
Returns:
Expand Down Expand Up @@ -1605,7 +1612,6 @@ def ingest_coco(
creds=dest_creds,
verbose=False,
token=token,
indra=indra,
**dataset_kwargs,
)
if connect_kwargs is not None:
Expand Down Expand Up @@ -1637,7 +1643,6 @@ def ingest_yolo(
num_workers: int = 0,
token: Optional[str] = None,
connect_kwargs: Optional[Dict] = None,
indra: bool = USE_INDRA,
**dataset_kwargs,
) -> Dataset:
"""Ingest images and annotations (bounding boxes or polygons) in YOLO format to a Deep Lake Dataset. The source data can be stored locally or in the cloud.
Expand Down Expand Up @@ -1686,7 +1691,6 @@ def ingest_yolo(
num_workers (int): The number of workers to use for ingestion. Set to ``0`` by default.
token (Optional[str]): The token to use for accessing the dataset and/or connecting it to Deep Lake.
connect_kwargs (Optional[Dict]): If specified, the dataset will be connected to Deep Lake, and connect_kwargs will be passed to :meth:`Dataset.connect <deeplake.core.dataset.Dataset.connect>`.
indra (bool): Flag indicating whether indra api should be used to create the dataset. Defaults to false
**dataset_kwargs: Any arguments passed here will be forwarded to the dataset creator function. See :func:`deeplake.empty`.
Returns:
Expand Down Expand Up @@ -1738,7 +1742,6 @@ def ingest_yolo(
creds=dest_creds,
verbose=False,
token=token,
indra=indra,
**dataset_kwargs,
)
if connect_kwargs is not None:
Expand Down Expand Up @@ -1899,7 +1902,6 @@ def ingest_classification(
creds=dest_creds,
token=token,
verbose=False,
indra=indra,
**dataset_kwargs,
)
if connect_kwargs is not None:
Expand Down
12 changes: 11 additions & 1 deletion deeplake/core/dataset/indra_tensor_view.py
@@ -1,6 +1,6 @@
import deeplake.util.shape_interval as shape_interval
from deeplake.core import tensor
from typing import List, Union, Optional
from typing import Dict, List, Union, Optional
from deeplake.core.index import Index
from deeplake.core.tensor import Any
import numpy as np
Expand Down Expand Up @@ -151,6 +151,16 @@ def index(self):
except:
return Index(slice(0, len(self)))

@property
def sample_info(self):
try:
r = self.indra_tensor.sample_info
if not self.index.values[0].subscriptable():
r = r[0]
return r
except:
return None

@property
def shape_interval(self):
return shape_interval.ShapeInterval(
Expand Down
1 change: 1 addition & 0 deletions deeplake/core/tests/test_indra_dataset.py
Expand Up @@ -172,6 +172,7 @@ def test_query(local_auth_ds_generator):

indra_ds = dataset_to_libdeeplake(deeplake_ds)
deeplake_indra_ds = IndraDatasetView(indra_ds=indra_ds)
assert deeplake_indra_ds.image.sample_info == deeplake_ds.image.sample_info

view = deeplake_indra_ds.query("SELECT * GROUP BY label")
assert len(view) == 10
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -70,7 +70,7 @@ def libdeeplake_available():
extras_require["all"] = [req_map[r] for r in all_extras]

if libdeeplake_available():
libdeeplake = "libdeeplake==0.0.118"
libdeeplake = "libdeeplake==0.0.119"
extras_require["enterprise"] = [libdeeplake, "pyjwt"]
extras_require["all"].append(libdeeplake)
install_requires.append(libdeeplake)
Expand Down

0 comments on commit 32f4ff5

Please sign in to comment.