DEEP-10: Add check for indra=True, to ensure it's used with read_only…

…=True. (#2826) * Add check for indra=True, to ensure it's used with read_only=True. * Fixed sample info for indra tensor. * Bump libdeeplake version.
activeloopai · Apr 19, 2024 · 32f4ff5 · 32f4ff5
1 parent 3e3990f
commit 32f4ff5
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 12 deletions.
diff --git a/deeplake/api/dataset.py b/deeplake/api/dataset.py
@@ -81,6 +81,16 @@
 from deeplake.core.storage.deeplake_memory_object import DeepLakeMemoryObject
 
 
+def _check_indra_and_read_only_flags(indra: bool, read_only: Optional[bool]):
+    if indra == False:
+        return
+    if read_only == True:
+        return
+    raise ValueError(
+        "'indra = True' is only available for read_only datasets. Please also specify 'read_only = True'."
+    )
+
+
 class dataset:
     @staticmethod
     @spinner
@@ -206,6 +216,7 @@ def init(
         Note:
             Any changes made to the dataset in download / local mode will only be made to the local copy and will not be reflected in the original dataset.
         """
+        _check_indra_and_read_only_flags(indra, read_only)
         access_method, num_workers, scheduler = parse_access_method(access_method)
         check_access_method(access_method, overwrite, unlink)
 
@@ -383,7 +394,6 @@ def empty(
         lock_timeout: Optional[int] = 0,
         verbose: bool = True,
         index_params: Optional[Dict[str, Union[int, str]]] = None,
-        indra: bool = USE_INDRA,
     ) -> Dataset:
         """Creates an empty dataset
 
@@ -408,7 +418,6 @@ def empty(
             lock_timeout (int): Number of seconds to wait before throwing a LockException. If None, wait indefinitely
             lock_enabled (bool): If true, the dataset manages a write lock. NOTE: Only set to False if you are managing concurrent access externally.
             index_params: Optional[Dict[str, Union[int, str]]]: Index parameters used while creating vector store, passed down to dataset.
-            indra (bool): Flag indicating whether indra api should be used to create the dataset. Defaults to false
 
         Returns:
             Dataset: Dataset created using the arguments provided.
@@ -448,7 +457,6 @@ def empty(
                 token=token,
                 memory_cache_size=memory_cache_size,
                 local_cache_size=local_cache_size,
-                indra=indra,
             )
 
             feature_report_path(
@@ -615,6 +623,7 @@ def load(
         Note:
             Any changes made to the dataset in download / local mode will only be made to the local copy and will not be reflected in the original dataset.
         """
+        _check_indra_and_read_only_flags(indra, read_only)
         access_method, num_workers, scheduler = parse_access_method(access_method)
         check_access_method(access_method, overwrite=False, unlink=unlink)
 
@@ -1508,7 +1517,6 @@ def ingest_coco(
         num_workers: int = 0,
         token: Optional[str] = None,
         connect_kwargs: Optional[Dict] = None,
-        indra: bool = USE_INDRA,
         **dataset_kwargs,
     ) -> Dataset:
         """Ingest images and annotations in COCO format to a Deep Lake Dataset. The source data can be stored locally or in the cloud.
@@ -1562,7 +1570,6 @@ def ingest_coco(
             num_workers (int): The number of workers to use for ingestion. Set to ``0`` by default.
             token (Optional[str]): The token to use for accessing the dataset and/or connecting it to Deep Lake.
             connect_kwargs (Optional[Dict]): If specified, the dataset will be connected to Deep Lake, and connect_kwargs will be passed to :meth:`Dataset.connect <deeplake.core.dataset.Dataset.connect>`.
-            indra (bool): Flag indicating whether indra api should be used to create the dataset. Defaults to false
             **dataset_kwargs: Any arguments passed here will be forwarded to the dataset creator function. See :func:`deeplake.empty`.
 
         Returns:
@@ -1605,7 +1612,6 @@ def ingest_coco(
             creds=dest_creds,
             verbose=False,
             token=token,
-            indra=indra,
             **dataset_kwargs,
         )
         if connect_kwargs is not None:
@@ -1637,7 +1643,6 @@ def ingest_yolo(
         num_workers: int = 0,
         token: Optional[str] = None,
         connect_kwargs: Optional[Dict] = None,
-        indra: bool = USE_INDRA,
         **dataset_kwargs,
     ) -> Dataset:
         """Ingest images and annotations (bounding boxes or polygons) in YOLO format to a Deep Lake Dataset. The source data can be stored locally or in the cloud.
@@ -1686,7 +1691,6 @@ def ingest_yolo(
             num_workers (int): The number of workers to use for ingestion. Set to ``0`` by default.
             token (Optional[str]): The token to use for accessing the dataset and/or connecting it to Deep Lake.
             connect_kwargs (Optional[Dict]): If specified, the dataset will be connected to Deep Lake, and connect_kwargs will be passed to :meth:`Dataset.connect <deeplake.core.dataset.Dataset.connect>`.
-            indra (bool): Flag indicating whether indra api should be used to create the dataset. Defaults to false
             **dataset_kwargs: Any arguments passed here will be forwarded to the dataset creator function. See :func:`deeplake.empty`.
 
         Returns:
@@ -1738,7 +1742,6 @@ def ingest_yolo(
             creds=dest_creds,
             verbose=False,
             token=token,
-            indra=indra,
             **dataset_kwargs,
         )
         if connect_kwargs is not None:
@@ -1899,7 +1902,6 @@ def ingest_classification(
                 creds=dest_creds,
                 token=token,
                 verbose=False,
-                indra=indra,
                 **dataset_kwargs,
             )
             if connect_kwargs is not None:

diff --git a/deeplake/core/dataset/indra_tensor_view.py b/deeplake/core/dataset/indra_tensor_view.py
@@ -1,6 +1,6 @@
 import deeplake.util.shape_interval as shape_interval
 from deeplake.core import tensor
-from typing import List, Union, Optional
+from typing import Dict, List, Union, Optional
 from deeplake.core.index import Index
 from deeplake.core.tensor import Any
 import numpy as np
@@ -151,6 +151,16 @@ def index(self):
         except:
             return Index(slice(0, len(self)))
 
+    @property
+    def sample_info(self):
+        try:
+            r = self.indra_tensor.sample_info
+            if not self.index.values[0].subscriptable():
+                r = r[0]
+            return r
+        except:
+            return None
+
     @property
     def shape_interval(self):
         return shape_interval.ShapeInterval(

diff --git a/deeplake/core/tests/test_indra_dataset.py b/deeplake/core/tests/test_indra_dataset.py
@@ -172,6 +172,7 @@ def test_query(local_auth_ds_generator):
 
     indra_ds = dataset_to_libdeeplake(deeplake_ds)
     deeplake_indra_ds = IndraDatasetView(indra_ds=indra_ds)
+    assert deeplake_indra_ds.image.sample_info == deeplake_ds.image.sample_info
 
     view = deeplake_indra_ds.query("SELECT * GROUP BY label")
     assert len(view) == 10

diff --git a/setup.py b/setup.py
@@ -70,7 +70,7 @@ def libdeeplake_available():
 extras_require["all"] = [req_map[r] for r in all_extras]
 
 if libdeeplake_available():
-    libdeeplake = "libdeeplake==0.0.118"
+    libdeeplake = "libdeeplake==0.0.119"
     extras_require["enterprise"] = [libdeeplake, "pyjwt"]
     extras_require["all"].append(libdeeplake)
     install_requires.append(libdeeplake)