diff --git a/deeplake/api/dataset.py b/deeplake/api/dataset.py index 5706ee2656..4c9ee5ae3d 100644 --- a/deeplake/api/dataset.py +++ b/deeplake/api/dataset.py @@ -81,6 +81,16 @@ from deeplake.core.storage.deeplake_memory_object import DeepLakeMemoryObject +def _check_indra_and_read_only_flags(indra: bool, read_only: Optional[bool]): + if indra == False: + return + if read_only == True: + return + raise ValueError( + "'indra = True' is only available for read_only datasets. Please also specify 'read_only = True'." + ) + + class dataset: @staticmethod @spinner @@ -206,6 +216,7 @@ def init( Note: Any changes made to the dataset in download / local mode will only be made to the local copy and will not be reflected in the original dataset. """ + _check_indra_and_read_only_flags(indra, read_only) access_method, num_workers, scheduler = parse_access_method(access_method) check_access_method(access_method, overwrite, unlink) @@ -383,7 +394,6 @@ def empty( lock_timeout: Optional[int] = 0, verbose: bool = True, index_params: Optional[Dict[str, Union[int, str]]] = None, - indra: bool = USE_INDRA, ) -> Dataset: """Creates an empty dataset @@ -408,7 +418,6 @@ def empty( lock_timeout (int): Number of seconds to wait before throwing a LockException. If None, wait indefinitely lock_enabled (bool): If true, the dataset manages a write lock. NOTE: Only set to False if you are managing concurrent access externally. index_params: Optional[Dict[str, Union[int, str]]]: Index parameters used while creating vector store, passed down to dataset. - indra (bool): Flag indicating whether indra api should be used to create the dataset. Defaults to false Returns: Dataset: Dataset created using the arguments provided. @@ -448,7 +457,6 @@ def empty( token=token, memory_cache_size=memory_cache_size, local_cache_size=local_cache_size, - indra=indra, ) feature_report_path( @@ -615,6 +623,7 @@ def load( Note: Any changes made to the dataset in download / local mode will only be made to the local copy and will not be reflected in the original dataset. """ + _check_indra_and_read_only_flags(indra, read_only) access_method, num_workers, scheduler = parse_access_method(access_method) check_access_method(access_method, overwrite=False, unlink=unlink) @@ -1508,7 +1517,6 @@ def ingest_coco( num_workers: int = 0, token: Optional[str] = None, connect_kwargs: Optional[Dict] = None, - indra: bool = USE_INDRA, **dataset_kwargs, ) -> Dataset: """Ingest images and annotations in COCO format to a Deep Lake Dataset. The source data can be stored locally or in the cloud. @@ -1562,7 +1570,6 @@ def ingest_coco( num_workers (int): The number of workers to use for ingestion. Set to ``0`` by default. token (Optional[str]): The token to use for accessing the dataset and/or connecting it to Deep Lake. connect_kwargs (Optional[Dict]): If specified, the dataset will be connected to Deep Lake, and connect_kwargs will be passed to :meth:`Dataset.connect `. - indra (bool): Flag indicating whether indra api should be used to create the dataset. Defaults to false **dataset_kwargs: Any arguments passed here will be forwarded to the dataset creator function. See :func:`deeplake.empty`. Returns: @@ -1605,7 +1612,6 @@ def ingest_coco( creds=dest_creds, verbose=False, token=token, - indra=indra, **dataset_kwargs, ) if connect_kwargs is not None: @@ -1637,7 +1643,6 @@ def ingest_yolo( num_workers: int = 0, token: Optional[str] = None, connect_kwargs: Optional[Dict] = None, - indra: bool = USE_INDRA, **dataset_kwargs, ) -> Dataset: """Ingest images and annotations (bounding boxes or polygons) in YOLO format to a Deep Lake Dataset. The source data can be stored locally or in the cloud. @@ -1686,7 +1691,6 @@ def ingest_yolo( num_workers (int): The number of workers to use for ingestion. Set to ``0`` by default. token (Optional[str]): The token to use for accessing the dataset and/or connecting it to Deep Lake. connect_kwargs (Optional[Dict]): If specified, the dataset will be connected to Deep Lake, and connect_kwargs will be passed to :meth:`Dataset.connect `. - indra (bool): Flag indicating whether indra api should be used to create the dataset. Defaults to false **dataset_kwargs: Any arguments passed here will be forwarded to the dataset creator function. See :func:`deeplake.empty`. Returns: @@ -1738,7 +1742,6 @@ def ingest_yolo( creds=dest_creds, verbose=False, token=token, - indra=indra, **dataset_kwargs, ) if connect_kwargs is not None: @@ -1899,7 +1902,6 @@ def ingest_classification( creds=dest_creds, token=token, verbose=False, - indra=indra, **dataset_kwargs, ) if connect_kwargs is not None: diff --git a/deeplake/core/dataset/indra_tensor_view.py b/deeplake/core/dataset/indra_tensor_view.py index 63e748f1ff..273ee906df 100644 --- a/deeplake/core/dataset/indra_tensor_view.py +++ b/deeplake/core/dataset/indra_tensor_view.py @@ -1,6 +1,6 @@ import deeplake.util.shape_interval as shape_interval from deeplake.core import tensor -from typing import List, Union, Optional +from typing import Dict, List, Union, Optional from deeplake.core.index import Index from deeplake.core.tensor import Any import numpy as np @@ -151,6 +151,16 @@ def index(self): except: return Index(slice(0, len(self))) + @property + def sample_info(self): + try: + r = self.indra_tensor.sample_info + if not self.index.values[0].subscriptable(): + r = r[0] + return r + except: + return None + @property def shape_interval(self): return shape_interval.ShapeInterval( diff --git a/deeplake/core/tests/test_indra_dataset.py b/deeplake/core/tests/test_indra_dataset.py index 2da1204be1..8759d0b50e 100644 --- a/deeplake/core/tests/test_indra_dataset.py +++ b/deeplake/core/tests/test_indra_dataset.py @@ -172,6 +172,7 @@ def test_query(local_auth_ds_generator): indra_ds = dataset_to_libdeeplake(deeplake_ds) deeplake_indra_ds = IndraDatasetView(indra_ds=indra_ds) + assert deeplake_indra_ds.image.sample_info == deeplake_ds.image.sample_info view = deeplake_indra_ds.query("SELECT * GROUP BY label") assert len(view) == 10 diff --git a/setup.py b/setup.py index 1b2e06755f..f2e8497569 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ def libdeeplake_available(): extras_require["all"] = [req_map[r] for r in all_extras] if libdeeplake_available(): - libdeeplake = "libdeeplake==0.0.118" + libdeeplake = "libdeeplake==0.0.119" extras_require["enterprise"] = [libdeeplake, "pyjwt"] extras_require["all"].append(libdeeplake) install_requires.append(libdeeplake)