New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: detect nested ORFs #501
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -93,16 +93,18 @@ pub struct Orf { | |
|
||
/// The current algorithm state. | ||
struct State { | ||
start_pos: [Option<usize>; 3], | ||
start_pos: [Vec<usize>; 3], | ||
codon: VecDeque<u8>, | ||
found: VecDeque<Orf>, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If more than one ORF is found after |
||
} | ||
|
||
impl State { | ||
/// Create new state. | ||
pub fn new() -> Self { | ||
State { | ||
start_pos: [None, None, None], | ||
start_pos: [Vec::new(), Vec::new(), Vec::new()], | ||
codon: VecDeque::new(), | ||
found: VecDeque::new(), | ||
} | ||
} | ||
} | ||
|
@@ -126,9 +128,13 @@ where | |
type Item = Orf; | ||
|
||
fn next(&mut self) -> Option<Orf> { | ||
let mut result: Option<Orf> = None; | ||
let mut offset: usize; | ||
|
||
// return any orfs already found | ||
if !self.state.found.is_empty() { | ||
return self.state.found.pop_front(); | ||
} | ||
|
||
Comment on lines
+133
to
+137
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there are already found ORFs than haven't been reported yet, then this queue should be exhausted before moving back to scanning |
||
for (index, nuc) in self.seq.by_ref() { | ||
// update the codon | ||
if self.state.codon.len() >= 3 { | ||
|
@@ -137,28 +143,34 @@ where | |
self.state.codon.push_back(*nuc.borrow()); | ||
offset = (index + 1) % 3; | ||
|
||
// check if entering orf | ||
if self.finder.start_codons.contains(&self.state.codon) { | ||
self.state.start_pos[offset].push(index); | ||
} | ||
Comment on lines
+146
to
+149
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No longer can be an |
||
// inside orf | ||
if self.state.start_pos[offset].is_some() { | ||
if !self.state.start_pos[offset].is_empty() { | ||
// check if leaving orf | ||
if self.finder.stop_codons.contains(&self.state.codon) { | ||
// check if length is sufficient | ||
if index + 1 - self.state.start_pos[offset].unwrap() > self.finder.min_len { | ||
// build results | ||
result = Some(Orf { | ||
start: self.state.start_pos[offset].unwrap() - 2, | ||
end: index + 1, | ||
offset: offset as i8, | ||
}); | ||
for start_pos in &self.state.start_pos[offset] { | ||
// check if length is sufficient | ||
if index + 1 - start_pos > self.finder.min_len { | ||
// build results | ||
self.state.found.push_back(Orf { | ||
start: start_pos - 2, | ||
end: index + 1, | ||
offset: offset as i8, | ||
}); | ||
// if the first orf is too short, so are the others | ||
} else { | ||
break; | ||
} | ||
Comment on lines
+154
to
+166
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When a stop codon is found, loop through and queue all of the ORFs that can be constructed from the different start sites. Since these start sites are ordered from first to last (as they appear in the sequence), we can assume that if the ORF constructed from one start site is too small, then all of the subsequent start sites will yield an even smaller, also invalid ORF. To save work in these cases, we break early from the loop. |
||
} | ||
// reinitialize | ||
self.state.start_pos[offset] = None; | ||
self.state.start_pos[offset] = Vec::new(); | ||
} | ||
// check if entering orf | ||
} else if self.finder.start_codons.contains(&self.state.codon) { | ||
self.state.start_pos[offset] = Some(index); | ||
} | ||
if result.is_some() { | ||
return result; | ||
if !self.state.found.is_empty() { | ||
return self.state.found.pop_front(); | ||
} | ||
} | ||
None | ||
|
@@ -225,4 +237,28 @@ mod tests { | |
]; | ||
assert_eq!(expected, finder.find_all(sequence).collect::<Vec<Orf>>()); | ||
} | ||
|
||
#[test] | ||
fn test_three_nested_and_offset_orfs() { | ||
let finder = basic_finder(); | ||
let sequence = b"ATGGGGATGGGGGGATGGAAAAATAAGTAG"; | ||
let expected = vec![ | ||
Orf { | ||
start: 14, | ||
end: 26, | ||
offset: 2, | ||
}, | ||
Orf { | ||
start: 0, | ||
end: 30, | ||
offset: 0, | ||
}, | ||
Orf { | ||
start: 6, | ||
end: 30, | ||
offset: 0, | ||
}, | ||
]; | ||
assert_eq!(expected, finder.find_all(sequence).collect::<Vec<Orf>>()); | ||
} | ||
Comment on lines
+240
to
+263
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tests that nesting is detected and doesn't break scanning in other reading frames |
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since there can now be 0 or more active start positions (instead of just 0 or 1), we move to
Vec
instead ofOption