Skip to content

Commit

Permalink
[2.8] Fix deadlock in vecsim hybrid query with timeout [MOD-6510, MOD…
Browse files Browse the repository at this point in the history
…-6244] (#4498)

* fix the deadlock + add test

(cherry picked from commit c4e8c93)

* formatting

(cherry picked from commit 7f531b7)

* formatting2

(cherry picked from commit d22e5d4)

* formatting3

(cherry picked from commit 2cffb86)

* formatting4

(cherry picked from commit b4622ec)

* formatting5

(cherry picked from commit 26d3b34)

* formatting6

(cherry picked from commit 0fbdbe1)

* formatting7

(cherry picked from commit 1c8a540)

---------

Co-authored-by: alon <alonreshef24@gmail.com>
  • Loading branch information
github-actions[bot] and alonre24 committed Mar 1, 2024
1 parent 32fdaca commit 86206d4
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 3 deletions.
5 changes: 2 additions & 3 deletions src/hybrid_reader.c
Expand Up @@ -176,7 +176,6 @@ static VecSimQueryReply_Code computeDistances(HybridIterator *hr) {
while (hr->child->Read(hr->child->ctx, &cur_child_res) != INDEXREAD_EOF) {
if (TimedOut_WithCtx(&hr->timeoutCtx)) {
rc = VecSim_QueryReply_TimedOut;
VecSimTieredIndex_ReleaseSharedLocks(hr->index);
break;
}
double metric = VecSimIndex_GetDistanceFrom_Unsafe(hr->index, cur_child_res->docId, qvector);
Expand Down Expand Up @@ -280,12 +279,12 @@ static VecSimQueryReply_Code prepareResults(HybridIterator *hr) {

if (reviewHybridSearchPolicy(hr, n_res_left, child_num_estimated, &child_num_estimated)) {
// Change policy from batches to AD-HOC BF.
VecSimBatchIterator_Free(batch_it);
hr->searchMode = VECSIM_HYBRID_BATCHES_TO_ADHOC_BF;
// Clean the saved results, and restart the hybrid search in ad-hoc BF mode.
mmh_clear(hr->topResults);
hr->child->Rewind(hr->child->ctx);
code = computeDistances(hr);
break;
return computeDistances(hr);
}
}
VecSimBatchIterator_Free(batch_it);
Expand Down
19 changes: 19 additions & 0 deletions tests/pytests/test_issues.py
Expand Up @@ -1019,3 +1019,22 @@ def test_mod6186(env):
env.expect('FT.EXPLAINCLI idx *abc').equal(['SUFFIX{*abc}', ''])
env.expect('FT.EXPLAINCLI idx *abc*').equal(['INFIX{*abc*}', ''])

@skip(cluster=True)
def test_mod6510_vecsim_hybrid_adhoc_timeout(env):
dim = 1000
n_vectors = 50000
env.expect(config_cmd(), 'set', 'ON_TIMEOUT', 'FAIL').ok()

# Create HNSW index which is large enough, so we'll get timeout later on.
env.expect(f'FT.CREATE idx SCHEMA v VECTOR HNSW 10 DIM {dim} DISTANCE_METRIC L2 TYPE FLOAT32 M 2 EF_CONSTRUCTION 5'
f' t text').equal('OK')
for i in range(n_vectors):
env.expect('HSET', i, 'v', create_np_array_typed(np.random.rand(dim)).tobytes(), 't', f'meta data').equal(2)

# There was a bug causing this to unlock the tiered HNSW index locks twice (in case of timeout + adhoc BF mode).
query_vec = create_np_array_typed(np.random.rand(dim))
env.expect('FT.SEARCH', 'idx', 'meta=>[KNN 5 @v $vec_param HYBRID_POLICY ADHOC_BF]', 'NOCONTENT',
'PARAMS', 2, 'vec_param', query_vec.tobytes(), 'TIMEOUT', 1, 'DIALECT', 2)\
.error().contains('Timeout limit was reached')
# Then, when we delete inplace and tried to acquire the locks again for write, we got a deadlock.
env.expect('DEL 0').equal(1)

0 comments on commit 86206d4

Please sign in to comment.