diff --git a/src/seq_analysis/orf.rs b/src/seq_analysis/orf.rs index 5f0418a3c..14db709bb 100644 --- a/src/seq_analysis/orf.rs +++ b/src/seq_analysis/orf.rs @@ -93,16 +93,18 @@ pub struct Orf { /// The current algorithm state. struct State { - start_pos: [Option; 3], + start_pos: [Vec; 3], codon: VecDeque, + found: VecDeque, } impl State { /// Create new state. pub fn new() -> Self { State { - start_pos: [None, None, None], + start_pos: [Vec::new(), Vec::new(), Vec::new()], codon: VecDeque::new(), + found: VecDeque::new(), } } } @@ -126,9 +128,13 @@ where type Item = Orf; fn next(&mut self) -> Option { - let mut result: Option = None; let mut offset: usize; + // return any orfs already found + if !self.state.found.is_empty() { + return self.state.found.pop_front(); + } + for (index, nuc) in self.seq.by_ref() { // update the codon if self.state.codon.len() >= 3 { @@ -137,28 +143,34 @@ where self.state.codon.push_back(*nuc.borrow()); offset = (index + 1) % 3; + // check if entering orf + if self.finder.start_codons.contains(&self.state.codon) { + self.state.start_pos[offset].push(index); + } // inside orf - if self.state.start_pos[offset].is_some() { + if !self.state.start_pos[offset].is_empty() { // check if leaving orf if self.finder.stop_codons.contains(&self.state.codon) { - // check if length is sufficient - if index + 1 - self.state.start_pos[offset].unwrap() > self.finder.min_len { - // build results - result = Some(Orf { - start: self.state.start_pos[offset].unwrap() - 2, - end: index + 1, - offset: offset as i8, - }); + for start_pos in &self.state.start_pos[offset] { + // check if length is sufficient + if index + 1 - start_pos > self.finder.min_len { + // build results + self.state.found.push_back(Orf { + start: start_pos - 2, + end: index + 1, + offset: offset as i8, + }); + // if the first orf is too short, so are the others + } else { + break; + } } // reinitialize - self.state.start_pos[offset] = None; + self.state.start_pos[offset] = Vec::new(); } - // check if entering orf - } else if self.finder.start_codons.contains(&self.state.codon) { - self.state.start_pos[offset] = Some(index); } - if result.is_some() { - return result; + if !self.state.found.is_empty() { + return self.state.found.pop_front(); } } None @@ -225,4 +237,28 @@ mod tests { ]; assert_eq!(expected, finder.find_all(sequence).collect::>()); } + + #[test] + fn test_three_nested_and_offset_orfs() { + let finder = basic_finder(); + let sequence = b"ATGGGGATGGGGGGATGGAAAAATAAGTAG"; + let expected = vec![ + Orf { + start: 14, + end: 26, + offset: 2, + }, + Orf { + start: 0, + end: 30, + offset: 0, + }, + Orf { + start: 6, + end: 30, + offset: 0, + }, + ]; + assert_eq!(expected, finder.find_all(sequence).collect::>()); + } }