Skip to content

Commit

Permalink
Really fix the regression in flat search cutoff.
Browse files Browse the repository at this point in the history
  • Loading branch information
kishorenc committed Apr 26, 2024
1 parent 397518c commit 58c8198
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
6 changes: 5 additions & 1 deletion src/index.cpp
Expand Up @@ -3069,6 +3069,10 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
group_by_field_it_vec = get_group_by_field_iterators(group_by_fields);
}

std::sort(dist_results.begin(), dist_results.end(), [](const auto& p1, const auto& p2) {
return p1.first < p2.first;
});

for (auto& dist_result : dist_results) {
auto& seq_id = dist_result.second.seq_id;
auto references = std::move(dist_result.second.reference_filter_results);
Expand All @@ -3077,7 +3081,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
continue;
}

if(vector_query.query_doc_given && nearest_ids.size() >= k-1) {
if(vector_query.query_doc_given && nearest_ids.size() == k-1) {
// When id based vector query is made, we ask for K+1 results to account for the query
// record itself being returned. However, when the filter clause does not match the
// query record, we will end up returning 1 extra hit.
Expand Down
24 changes: 22 additions & 2 deletions test/collection_vector_search_test.cpp
Expand Up @@ -768,7 +768,7 @@ TEST_F(CollectionVectorTest, VecSearchWithFiltering) {
ASSERT_EQ(10, results["hits"].size());

// with points:<10, flat-search
results = coll1->search("*", {}, "points:<10", {}, {}, {0}, 20, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
results = coll1->search("*", {}, "points:<10", {}, {}, {0}, 3, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
"", 10, {}, {}, {}, 0,
Expand All @@ -778,13 +778,33 @@ TEST_F(CollectionVectorTest, VecSearchWithFiltering) {
false, true, "vec:([0.96826, 0.94, 0.39557, 0.306488], flat_search_cutoff: 1000)").get();

ASSERT_EQ(10, results["found"].get<size_t>());
ASSERT_EQ(10, results["hits"].size());
ASSERT_EQ(3, results["hits"].size());
ASSERT_FLOAT_EQ(3.409385e-05, results["hits"][0]["vector_distance"].get<float>());
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());

ASSERT_FLOAT_EQ(0.016780376, results["hits"][1]["vector_distance"].get<float>());
ASSERT_EQ("5", results["hits"][1]["document"]["id"].get<std::string>());

results = coll1->search("*", {}, "points:<10", {}, {}, {0}, 3, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
"", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7,
fallback,
4, {off}, 32767, 32767, 2,
false, true, "vec:([], id: 3, flat_search_cutoff: 1000)").get();

ASSERT_EQ(3, results["hits"].size());

LOG(INFO) << results["hits"][0];
LOG(INFO) << results["hits"][1];

ASSERT_EQ("9", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_FLOAT_EQ(0.050603985, results["hits"][0]["vector_distance"].get<float>());

ASSERT_EQ("5", results["hits"][1]["document"]["id"].get<std::string>());
ASSERT_FLOAT_EQ(0.100155532, results["hits"][1]["vector_distance"].get<float>());

// single point

results = coll1->search("*", {}, "points:1", {}, {}, {0}, 20, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
Expand Down

0 comments on commit 58c8198

Please sign in to comment.