activeloopai · adolkhan · Oct 8, 2023 · Oct 9, 2023 · Oct 9, 2023 · Oct 9, 2023
diff --git a/deeplake/core/vectorstore/deep_memory.py b/deeplake/core/vectorstore/deep_memory.py
@@ -192,7 +192,6 @@ def status(self, job_id: str):
 
         Examples:
             >>> vectorstore.deep_memory.status(job_id)
-
             --------------------------------------------------------------
             |                  6508464cd80cab681bfcfff3                  |
             --------------------------------------------------------------
@@ -281,7 +280,7 @@ def evaluate(
         """Evaluate a model on DeepMemory managed service.
 
         Examples:
-            # Evaluate a model with embedding function
+            >>> #1. Evaluate a model with embedding function
             >>> relevance: List[List[Tuple[str, int]]] = [[("doc_id_1", 1), ("doc_id_2", 1)], [("doc_id_3", 1)]]
             >>> # doc_id_1, doc_id_2, doc_id_3 are the ids of the documents in the corpus dataset that is relevant to the queries. It is stored in the `id` tensor of the corpus dataset.
             >>> queries: List[str] = ["What is the capital of India?", "What is the capital of France?"]
@@ -291,8 +290,7 @@ def evaluate(
             ...     queries=queries,
             ...     embedding_function=embedding_function,
             ... )
-
-            # Evaluate a model with precomputed embeddings
+            >>> #2. Evaluate a model with precomputed embeddings
             >>> relevance: List[List[Tuple[str, int]]] = [[("doc_id_1", 1), ("doc_id_2", 1)], [("doc_id_3", 1)]]
             >>> # doc_id_1, doc_id_2, doc_id_3 are the ids of the documents in the corpus dataset that is relevant to the queries. It is stored in the `id` tensor of the corpus dataset.
             >>> queries: List[str] = ["What is the capital of India?", "What is the capital of France?"]
@@ -302,8 +300,7 @@ def evaluate(
             ...     queries=queries,
             ...     embedding=embedding,
             ... )
-
-            # Evaluate a model with precomputed embeddings and log queries
+            >>> #3. Evaluate a model with precomputed embeddings and log queries
             >>> relevance: List[List[Tuple[str, int]]] = [[("doc_id_1", 1), ("doc_id_2", 1)], [("doc_id_3", 1)]]
             >>> # doc_id_1, doc_id_2, doc_id_3 are the ids of the documents in the corpus dataset that is relevant to the queries. It is stored in the `id` tensor of the corpus dataset.
             >>> queries: List[str] = ["What is the capital of India?", "What is the capital of France?"]
@@ -316,8 +313,7 @@ def evaluate(
             ...         "log_queries": True,
             ...     }
             ... )
-
-            # Evaluate a model with precomputed embeddings and log queries, and custom branch
+            >>> #4. Evaluate a model with precomputed embeddings and log queries, and custom branch
             >>> relevance: List[List[Tuple[str, int]]] = [[("doc_id_1", 1), ("doc_id_2", 1)], [("doc_id_3", 1)]]
             >>> # doc_id_1, doc_id_2, doc_id_3 are the ids of the documents in the corpus dataset that is relevant to the queries. It is stored in the `id` tensor of the corpus dataset.
             >>> queries: List[str] = ["What is the capital of India?", "What is the capital of France?"]

diff --git a/deeplake/core/vectorstore/deeplake_vectorstore.py b/deeplake/core/vectorstore/deeplake_vectorstore.py
@@ -64,13 +64,11 @@ def __init__(
             >>> data = VectorStore(
             ...        path = "./my_vector_store",
             ... )
-
             >>> # Create a vector store in the Deep Lake Managed Tensor Database
             >>> data = VectorStore(
             ...        path = "hub://org_id/dataset_name",
             ...        runtime = {"tensor_db": True},
             ... )
-
             >>> # Create a vector store with custom tensors
             >>> data = VectorStore(
             ...        path = "./my_vector_store",
@@ -233,22 +231,19 @@ def add(
             >>> metadatas = [{"timestamp": "01:20"}, {"timestamp": "01:22"}]
             >>> emebdding_fn = lambda x: [[1, 2, 3]] * len(x)
             >>> embedding_fn_2 = lambda x: [[4, 5]] * len(x)
-
             >>> # Directly upload embeddings
             >>> deeplake_vector_store.add(
             ...     text = texts,
             ...     embedding = embeddings,
             ...     metadata = metadatas,
             ... )
-
             >>> # Upload embedding via embedding function
             >>> deeplake_vector_store.add(
             ...     text = texts,
             ...     metadata = metadatas,
             ...     embedding_function = embedding_fn,
             ...     embedding_data = texts,
             ... )
-
             >>> # Upload embedding via embedding function to a user-defined embedding tensor
             >>> deeplake_vector_store.add(
             ...     text = texts,
@@ -257,22 +252,19 @@ def add(
             ...     embedding_data = texts,
             ...     embedding_tensor = "embedding_1",
             ... )
-
             >>> # Multiple embedding functions (user defined embedding tensors must be specified)
             >>> deeplake_vector_store.add(
             ...     embedding_tensor = ["embedding_1", "embedding_2"]
             ...     embedding_function = [embedding_fn, embedding_fn_2],
             ...     embedding_data = [texts, texts],
             ... )
-
             >>> # Alternative syntax for multiple embedding functions
             >>> deeplake_vector_store.add(
             ...     text = texts,
             ...     metadata = metadatas,
             ...     embedding_tensor_1 = (embedding_fn, texts),
             ...     embedding_tensor_2 = (embedding_fn_2, texts),
             ... )
-
             >>> # Add data to fully custom tensors
             >>> deeplake_vector_store.add(
             ...     tensor_A = [1, 2],
@@ -396,21 +388,18 @@ def search(
             ...        embedding = [1, 2, 3],
             ...        exec_option = "python",
             ... )
-
             >>> # Search using an embedding function and data for embedding
             >>> data = vector_store.search(
             ...        embedding_data = "What does this chatbot do?",
             ...        embedding_function = query_embedding_fn,
             ...        exec_option = "compute_engine",
             ... )
-
             >>> # Add a filter to your search
             >>> data = vector_store.search(
             ...        embedding = np.ones(3),
             ...        exec_option = "python",
             ...        filter = {"json_tensor_name": {"key: value"}, "json_tensor_name_2": {"key_2: value_2"},...}, # Only valid for exec_option = "python"
             ... )
-
             >>> # Search using TQL
             >>> data = vector_store.search(
             ...        query = "select * where ..... <add TQL syntax>",
@@ -553,12 +542,10 @@ def delete(
         Examples:
             >>> # Delete using ids:
             >>> data = vector_store.delete(ids)
-
             >>> # Delete data using filter
             >>> data = vector_store.delete(
             ...        filter = {"json_tensor_name": {"key: value"}, "json_tensor_name_2": {"key_2: value_2"}},
             ... )
-
             >>> # Delete data using TQL
             >>> data = vector_store.delete(
             ...        query = "select * where ..... <add TQL syntax>",
@@ -649,7 +636,6 @@ def update_embedding(
             ...    embedding_tensor = "embedding",
             ...    embedding_function = embedding_function,
             ... )
-
             >>> # Update data using filter and several embedding_tensors, several embedding_source_tensors
             >>> # and several embedding_functions:
             >>> data = vector_store.update(
@@ -658,7 +644,6 @@ def update_embedding(
             ...     filter = {"json_tensor_name": {"key: value"}, "json_tensor_name_2": {"key_2: value_2"}},
             ...     embedding_tensor = ["text_embedding", "metadata_embedding"]
             ... )
-
             >>> # Update data using TQL, if new embedding function is not specified the embedding_function used
             >>> # during initialization will be used
             >>> data = vector_store.update(

diff --git a/deeplake/core/vectorstore/test_deepmemory.py b/deeplake/core/vectorstore/test_deepmemory.py
@@ -565,3 +565,4 @@ def test_deepmemory_search_on_local_datasets(
     output = corpus.search(embedding=query_embedding, deep_memory=True, k=10)
 
     assert correct_id in output["id"]
+    assert "score" in output
diff --git a/deeplake/core/vectorstore/vector_search/indra/search_algorithm.py b/deeplake/core/vectorstore/vector_search/indra/search_algorithm.py
@@ -4,6 +4,7 @@
 from deeplake.core.vectorstore.vector_search.indra import query
 from deeplake.core.vectorstore.vector_search import utils
 from deeplake.core.dataset import Dataset as DeepLakeDataset
+from deeplake.core.dataset.deeplake_query_dataset import DeepLakeQueryDataset
 from deeplake.enterprise.util import raise_indra_installation_error
 
 
@@ -83,8 +84,9 @@ def search(
         api.tql.prepare_deepmemory_metrics(indra_dataset)
 
         indra_view = indra_dataset.query(tql_query)
-        indexes = indra_view.indexes
-        view = deeplake_dataset[indexes]
+
+        view = DeepLakeQueryDataset(deeplake_ds=deeplake_dataset, indra_ds=indra_view)
+        view._tql_query = tql_query
 
         return_data = {}
 

diff --git a/docs/source/deeplake.VectorStore.rst b/docs/source/deeplake.VectorStore.rst
@@ -0,0 +1,158 @@
+deeplake.VectorStore
+--------------------
+
+.. autoclass:: deeplake.core.vectorstore.deeplake_vectorstore.VectorStore
+   :members:
+   :show-inheritance:
+
+   .. automethod:: __init__
+      :noindex:
+      :template: method
+
+      .. rubric:: Signature
+
+      .. code-block:: python
+
+         __init__(path: Union[str, pathlib.Path],
+                  tensor_params: List[Dict[str, object]] = [
+                      {'name': 'text', 'htype': 'text', ... },
+                      {...},
+                      {...},
+                      {...}],
+                  embedding_function: Optional[Callable] = None,
+                  read_only: Optional[bool] = None,
+                  ingestion_batch_size: int = 1000,
+                  index_params: Optional[Dict[str, Union[int, str]]] = None,
+                  num_workers: int = 0,
+                  exec_option: str = 'auto',
+                  token: Optional[str] = None,
+                  overwrite: bool = False,
+                  verbose: bool = True,
+                  runtime: Optional[Dict] = None,
+                  creds: Optional[Union[str, Dict]] = None,
+                  org_id: Optional[str] = None,
+                  logger: Logger = ...,
+                  branch: str = 'main',
+                  **kwargs: Any)
+
+      :param path: Path to the vector store.
+      :type path: Union[str, pathlib.Path]
+
+      :param tensor_params: Parameters for tensors with default configurations.
+      :type tensor_params: List[Dict[str, object]], optional
+
+      :param embedding_function: Function for embeddings. Default is None.
+      :type embedding_function: Optional[Callable], optional
+
+      :param read_only: Flag for read-only mode. Default is None.
+      :type read_only: Optional[bool], optional
+
+      :param ingestion_batch_size: Batch size for ingestion. Default is 1000.
+      :type ingestion_batch_size: int, optional
+
+      :param index_params: Parameters for indexing. Default is None.
+      :type index_params: Optional[Dict[str, Union[int, str]]], optional
+
+      :param num_workers: Number of workers. Default is 0.
+      :type num_workers: int, optional
+
+      :param exec_option: Execution option. Default is 'auto'.
+      :type exec_option: str, optional
+
+      :param token: Token for authentication. Default is None.
+      :type token: Optional[str], optional
+
+      :param overwrite: Flag to overwrite existing data. Default is False.
+      :type overwrite: bool, optional
+
+      :param verbose: Flag for verbose logging. Default is True.
+      :type verbose: bool, optional
+
+      :param runtime: Runtime configurations. Default is None.
+      :type runtime: Optional[Dict], optional
+
+      :param creds: Credentials for authentication. Default is None.
+      :type creds: Optional[Union[str, Dict]], optional
+
+      :param org_id: Organization ID. Default is None.
+      :type org_id: Optional[str], optional
+
+      :param logger: Logger object. Default provided.
+      :type logger: Logger, optional
+
+      :param branch: Branch name. Default is 'main'.
+      :type branch: str, optional
+
+      :param kwargs: Additional keyword arguments.
+      :type kwargs: Any, optional
+
+   .. automethod:: add
+      :noindex:
+      :template: method
+
+      .. rubric:: Signature
+
+      .. code-block:: python
+
+         add(embedding_function: Optional[Union[Callable, List[Callable]]] = None,
+             embedding_data: Optional[Union[List, List[List]]] = None,
+             embedding_tensor: Optional[Union[str, List[str]]] = None,
+             return_ids: bool = False,
+             rate_limiter: Dict = {'bytes_per_minute': 1800000.0, 'enabled': False},
+             batch_byte_size: int = 10000,
+             **tensors) → Optional[List[str]]
+
+      :param embedding_function: Embedding function(s). Default is None.
+      :type embedding_function: Optional[Union[Callable, List[Callable]]], optional
+
+      :param embedding_data: Data for embeddings. Default is None.
+      :type embedding_data: Optional[Union[List, List[List]]], optional
+
+      :param embedding_tensor: Name of the tensor(s) for embedding. Default is None.
+      :type embedding_tensor: Optional[Union[str, List[str]]], optional
+
+      :param return_ids: Flag to return IDs. Default is False.
+      :type return_ids: bool, optional
+
+      :param rate_limiter: Rate limiting configuration. Default provided.
+      :type rate_limiter: Dict, optional
+
+      :param batch_byte_size: Batch byte size. Default is 10000.
+      :type batch_byte_size: int, optional
+
+      :param tensors: Additional tensors.
+      :type tensors: Any, optional
+
+   .. automethod:: delete
+      :noindex:
+      :template: method
+
+      .. rubric:: Signature
+
+      .. code-block:: python
+
+         delete(row_ids: Optional[List[str]] = None,
+                ids: Optional[List[str]] = None,
+                filter: Optional[Union[Dict, Callable]] = None,
+                query: Optional[str] = None,
+                exec_option: Optional[str] = None,
+                delete_all: Optional[bool] = None) → bool
+
+      :param row_ids: Row IDs to delete. Default is None.
+      :type row_ids: Optional[List[str]], optional
+
+      :param ids: IDs to delete. Default is None.
+      :type ids: Optional[List[str]], optional
+
+      :param filter: Filter for rows to delete. Can be a dictionary or callable. Default is None.
+      :type filter: Optional[Union[Dict, Callable]], optional
+
+      :param query: Query to determine rows to delete. Default is None.
+      :type query: Optional[str], optional
+
+      :param exec_option: Execution option for deletion. Default is None.
+      :type exec_option: Optional[str], optional
+
+      :param delete_all: Flag to delete all entries. Default is None.
+      :type delete_all: Optional[bool], optional
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -50,6 +50,7 @@ Deep Lake is an open-source database for AI.
    :caption: API Reference
 
    deeplake
+   deeplake.VectorStore
    deeplake.core
    deeplake.core.dataset
    deeplake.core.tensor