From 3489e6ab493b5191b879b4405b868a4004bbb88e Mon Sep 17 00:00:00 2001 From: Christian Heide <11767963+jch-13@users.noreply.github.com> Date: Mon, 4 Oct 2021 09:23:50 +0200 Subject: [PATCH] fix: backward search yielding potentially incorrect positions on FM-Index (#454) (#455) --- src/data_structures/fmindex.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/data_structures/fmindex.rs b/src/data_structures/fmindex.rs index 124f5a898..370c9fc18 100644 --- a/src/data_structures/fmindex.rs +++ b/src/data_structures/fmindex.rs @@ -127,7 +127,7 @@ pub trait FMIndexable { // The symbol was not found if we end up with an empty interval. // Terminate the LF-mapping process. - if l == r { + if l > r { break; } } @@ -540,6 +540,24 @@ mod tests { assert_eq!(positions, []); } + #[test] + fn test_fmindex_backward_search_optimization() { + let text = b"GATTACA$"; + let pattern = &text[..text.len() - 1]; + let alphabet = dna::n_alphabet(); + let sa = suffix_array(text); + let bwt = bwt(text, &sa); + let less = less(&bwt, &alphabet); + let occ = Occ::new(&bwt, 3, &alphabet); + let fm = FMIndex::new(&bwt, &less, &occ); + + let sai = fm.backward_search(pattern.iter()); + + let positions = sai.occ(&sa); + + assert_eq!(positions, [0]); + } + #[test] fn test_smems() { let orig_text = b"GCCTTAACAT";