How to insert bm25 embedding to milvus #32806
Unanswered
dudwo7783
asked this question in
Q&A and General discussion
Replies: 1 comment 2 replies
-
hi @dudwo7783 , the provided error message doesn't have sufficient information to pin down to the root cause. Can you provide more info? Things that might be helpful:
|
Beta Was this translation helpful? Give feedback.
2 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
I am using Milvus 2.4.1 to perform multi-vector search using BM25 and ANN (Approximate Nearest Neighbors).
I generated token-level embeddings using the BM25 embedding function.
The result of the BM25 embedding function is in the scipy.sparse._csr.csr_matrix format.
To add vectors of this format to Milvus, I added a field with the DataType.SPARSE_FLOAT_VECTOR data type to the schema and attempted to load the data into that field.
However, I encountered the following error, indicating a mismatch in the data type.
`
DataNotMatchException Traceback (most recent call last)
[... skipping hidden 1 frame]
Cell In[91], line 1
----> 1 mr = collection.insert([ids, car_types, h1s, h2s, h3s, img_urls, table_img_urls, table_contents, noun_token, embedding_cont, cons, doc_id, vectors, bm25_vectors])
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/orm/collection.py:513, in Collection.insert(self, data, partition_name, timeout, **kwargs)
512 entities = Prepare.prepare_insert_data(data, self.schema)
--> 513 return conn.batch_insert(
514 self._name,
515 entities,
516 partition_name,
517 timeout=timeout,
518 schema=self._schema_dict,
519 **kwargs,
520 )
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:147, in error_handler..wrapper..handler(*args, **kwargs)
146 LOGGER.error(f"RPC error: [{inner_name}], {e}, Time:{record_dict}")
--> 147 raise e from e
148 except grpc.FutureTimeoutError as e:
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:143, in error_handler..wrapper..handler(*args, **kwargs)
142 record_dict["RPC start"] = str(datetime.datetime.now())
--> 143 return func(*args, **kwargs)
144 except MilvusException as e:
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:182, in tracing_request..wrapper..handler(self, *args, **kwargs)
181 self.set_onetime_request_id(req_id)
--> 182 return func(self, *args, **kwargs)
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:122, in retry_on_rpc_failure..wrapper..handler(*args, **kwargs)
121 else:
--> 122 raise e from e
123 except Exception as e:
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:87, in retry_on_rpc_failure..wrapper..handler(*args, **kwargs)
86 try:
---> 87 return func(*args, **kwargs)
88 except grpc.RpcError as e:
89 # Do not retry on these codes
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/grpc_handler.py:579, in GrpcHandler.batch_insert(self, collection_name, entities, partition_name, timeout, **kwargs)
578 return MutationFuture(None, None, err)
--> 579 raise err from err
580 else:
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/grpc_handler.py:562, in GrpcHandler.batch_insert(self, collection_name, entities, partition_name, timeout, **kwargs)
561 try:
--> 562 request = self._prepare_batch_insert_request(
563 collection_name, entities, partition_name, timeout, **kwargs
564 )
565 rf = self._stub.Insert.future(request, timeout=timeout)
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/grpc_handler.py:546, in GrpcHandler._prepare_batch_insert_request(self, collection_name, entities, partition_name, timeout, **kwargs)
541 fields_info = schema["fields"]
543 return (
544 param
545 if param
--> 546 else Prepare.batch_insert_param(collection_name, entities, partition_name, fields_info)
547 )
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/prepare.py:538, in Prepare.batch_insert_param(cls, collection_name, entities, partition_name, fields_info)
536 request = milvus_types.InsertRequest(collection_name=collection_name, partition_name=tag)
--> 538 return cls._parse_batch_request(request, entities, fields_info, location)
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/prepare.py:519, in Prepare._parse_batch_request(request, entities, fields_info, location)
518 except (TypeError, ValueError) as e:
--> 519 raise DataNotMatchException(message=ExceptionsMessage.DataTypeInconsistent) from e
521 if pre_field_size == 0:
DataNotMatchException: <DataNotMatchException: (code=1, message=The Input data type is inconsistent with defined schema, please check it.)>
The above exception was the direct cause of the following exception:
DataNotMatchException Traceback (most recent call last)
[... skipping hidden 1 frame]
Cell In[91], line 1
----> 1 mr = collection.insert([ids, car_types, h1s, h2s, h3s, img_urls, table_img_urls, table_contents, noun_token, embedding_cont, cons, doc_id, vectors, bm25_vectors])
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/orm/collection.py:513, in Collection.insert(self, data, partition_name, timeout, **kwargs)
512 entities = Prepare.prepare_insert_data(data, self.schema)
--> 513 return conn.batch_insert(
514 self._name,
515 entities,
516 partition_name,
517 timeout=timeout,
518 schema=self._schema_dict,
519 **kwargs,
520 )
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:147, in error_handler..wrapper..handler(*args, **kwargs)
146 LOGGER.error(f"RPC error: [{inner_name}], {e}, Time:{record_dict}")
--> 147 raise e from e
148 except grpc.FutureTimeoutError as e:
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:143, in error_handler..wrapper..handler(*args, **kwargs)
142 record_dict["RPC start"] = str(datetime.datetime.now())
--> 143 return func(*args, **kwargs)
144 except MilvusException as e:
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:182, in tracing_request..wrapper..handler(self, *args, **kwargs)
181 self.set_onetime_request_id(req_id)
--> 182 return func(self, *args, **kwargs)
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:122, in retry_on_rpc_failure..wrapper..handler(*args, **kwargs)
121 else:
--> 122 raise e from e
123 except Exception as e:
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:87, in retry_on_rpc_failure..wrapper..handler(*args, **kwargs)
86 try:
---> 87 return func(*args, **kwargs)
88 except grpc.RpcError as e:
89 # Do not retry on these codes
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/grpc_handler.py:579, in GrpcHandler.batch_insert(self, collection_name, entities, partition_name, timeout, **kwargs)
578 return MutationFuture(None, None, err)
--> 579 raise err from err
580 else:
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/grpc_handler.py:562, in GrpcHandler.batch_insert(self, collection_name, entities, partition_name, timeout, **kwargs)
561 try:
--> 562 request = self._prepare_batch_insert_request(
563 collection_name, entities, partition_name, timeout, **kwargs
564 )
565 rf = self._stub.Insert.future(request, timeout=timeout)
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/grpc_handler.py:546, in GrpcHandler._prepare_batch_insert_request(self, collection_name, entities, partition_name, timeout, **kwargs)
541 fields_info = schema["fields"]
543 return (
544 param
545 if param
--> 546 else Prepare.batch_insert_param(collection_name, entities, partition_name, fields_info)
547 )
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/prepare.py:538, in Prepare.batch_insert_param(cls, collection_name, entities, partition_name, fields_info)
536 request = milvus_types.InsertRequest(collection_name=collection_name, partition_name=tag)
--> 538 return cls._parse_batch_request(request, entities, fields_info, location)
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/prepare.py:519, in Prepare._parse_batch_request(request, entities, fields_info, location)
518 except (TypeError, ValueError) as e:
--> 519 raise DataNotMatchException(message=ExceptionsMessage.DataTypeInconsistent) from e
521 if pre_field_size == 0:
DataNotMatchException: <DataNotMatchException: (code=1, message=The Input data type is inconsistent with defined schema, please check it.)>
The above exception was the direct cause of the following exception:
DataNotMatchException Traceback (most recent call last)
Cell In[91], line 1
----> 1 mr = collection.insert([ids, car_types, h1s, h2s, h3s, img_urls, table_img_urls, table_contents, noun_token, embedding_cont, cons, doc_id, vectors, bm25_vectors])
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/orm/collection.py:513, in Collection.insert(self, data, partition_name, timeout, **kwargs)
511 check_insert_schema(self.schema, data)
512 entities = Prepare.prepare_insert_data(data, self.schema)
--> 513 return conn.batch_insert(
514 self._name,
515 entities,
516 partition_name,
517 timeout=timeout,
518 schema=self._schema_dict,
519 **kwargs,
520 )
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:147, in error_handler..wrapper..handler(*args, **kwargs)
145 record_dict["RPC error"] = str(datetime.datetime.now())
146 LOGGER.error(f"RPC error: [{inner_name}], {e}, Time:{record_dict}")
--> 147 raise e from e
148 except grpc.FutureTimeoutError as e:
149 record_dict["gRPC timeout"] = str(datetime.datetime.now())
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:143, in error_handler..wrapper..handler(*args, **kwargs)
141 try:
142 record_dict["RPC start"] = str(datetime.datetime.now())
--> 143 return func(*args, **kwargs)
144 except MilvusException as e:
145 record_dict["RPC error"] = str(datetime.datetime.now())
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:182, in tracing_request..wrapper..handler(self, *args, **kwargs)
180 if req_id:
181 self.set_onetime_request_id(req_id)
--> 182 return func(self, *args, **kwargs)
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:122, in retry_on_rpc_failure..wrapper..handler(*args, **kwargs)
120 back_off = min(back_off * back_off_multiplier, max_back_off)
121 else:
--> 122 raise e from e
123 except Exception as e:
124 raise e from e
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/decorators.py:87, in retry_on_rpc_failure..wrapper..handler(*args, **kwargs)
85 while True:
86 try:
---> 87 return func(*args, **kwargs)
88 except grpc.RpcError as e:
89 # Do not retry on these codes
90 if e.code() in IGNORE_RETRY_CODES:
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/grpc_handler.py:579, in GrpcHandler.batch_insert(self, collection_name, entities, partition_name, timeout, **kwargs)
577 if kwargs.get("_async", False):
578 return MutationFuture(None, None, err)
--> 579 raise err from err
580 else:
581 return m
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/grpc_handler.py:562, in GrpcHandler.batch_insert(self, collection_name, entities, partition_name, timeout, **kwargs)
559 raise ParamError(message="Invalid binary vector data exists")
561 try:
--> 562 request = self._prepare_batch_insert_request(
563 collection_name, entities, partition_name, timeout, **kwargs
564 )
565 rf = self._stub.Insert.future(request, timeout=timeout)
566 if kwargs.get("_async", False):
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/grpc_handler.py:546, in GrpcHandler._prepare_batch_insert_request(self, collection_name, entities, partition_name, timeout, **kwargs)
539 schema = self.describe_collection(collection_name, timeout=timeout, **kwargs)
541 fields_info = schema["fields"]
543 return (
544 param
545 if param
--> 546 else Prepare.batch_insert_param(collection_name, entities, partition_name, fields_info)
547 )
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/prepare.py:538, in Prepare.batch_insert_param(cls, collection_name, entities, partition_name, fields_info)
535 tag = partition_name if isinstance(partition_name, str) else ""
536 request = milvus_types.InsertRequest(collection_name=collection_name, partition_name=tag)
--> 538 return cls._parse_batch_request(request, entities, fields_info, location)
File ~/.pyenv/versions/myMechanic/lib/python3.8/site-packages/pymilvus/client/prepare.py:519, in Prepare._parse_batch_request(request, entities, fields_info, location)
517 request.fields_data.append(field_data)
518 except (TypeError, ValueError) as e:
--> 519 raise DataNotMatchException(message=ExceptionsMessage.DataTypeInconsistent) from e
521 if pre_field_size == 0:
522 raise ParamError(message=ExceptionsMessage.NumberRowsInvalid)
DataNotMatchException: <DataNotMatchException: (code=1, message=The Input data type is inconsistent with defined schema, please check it.)>
`
Please advise me on how I can store the BM25 embeddings in Milvus.
Beta Was this translation helpful? Give feedback.
All reactions