Skip to content

Commit

Permalink
feat: detect nested ORFs (#501)
Browse files Browse the repository at this point in the history
  • Loading branch information
TheLostLambda committed Sep 6, 2022
1 parent 8d7565b commit 19b6c36
Showing 1 changed file with 54 additions and 18 deletions.
72 changes: 54 additions & 18 deletions src/seq_analysis/orf.rs
Expand Up @@ -93,16 +93,18 @@ pub struct Orf {

/// The current algorithm state.
struct State {
start_pos: [Option<usize>; 3],
start_pos: [Vec<usize>; 3],
codon: VecDeque<u8>,
found: VecDeque<Orf>,
}

impl State {
/// Create new state.
pub fn new() -> Self {
State {
start_pos: [None, None, None],
start_pos: [Vec::new(), Vec::new(), Vec::new()],
codon: VecDeque::new(),
found: VecDeque::new(),
}
}
}
Expand All @@ -126,9 +128,13 @@ where
type Item = Orf;

fn next(&mut self) -> Option<Orf> {
let mut result: Option<Orf> = None;
let mut offset: usize;

// return any orfs already found
if !self.state.found.is_empty() {
return self.state.found.pop_front();
}

for (index, nuc) in self.seq.by_ref() {
// update the codon
if self.state.codon.len() >= 3 {
Expand All @@ -137,28 +143,34 @@ where
self.state.codon.push_back(*nuc.borrow());
offset = (index + 1) % 3;

// check if entering orf
if self.finder.start_codons.contains(&self.state.codon) {
self.state.start_pos[offset].push(index);
}
// inside orf
if self.state.start_pos[offset].is_some() {
if !self.state.start_pos[offset].is_empty() {
// check if leaving orf
if self.finder.stop_codons.contains(&self.state.codon) {
// check if length is sufficient
if index + 1 - self.state.start_pos[offset].unwrap() > self.finder.min_len {
// build results
result = Some(Orf {
start: self.state.start_pos[offset].unwrap() - 2,
end: index + 1,
offset: offset as i8,
});
for start_pos in &self.state.start_pos[offset] {
// check if length is sufficient
if index + 1 - start_pos > self.finder.min_len {
// build results
self.state.found.push_back(Orf {
start: start_pos - 2,
end: index + 1,
offset: offset as i8,
});
// if the first orf is too short, so are the others
} else {
break;
}
}
// reinitialize
self.state.start_pos[offset] = None;
self.state.start_pos[offset] = Vec::new();
}
// check if entering orf
} else if self.finder.start_codons.contains(&self.state.codon) {
self.state.start_pos[offset] = Some(index);
}
if result.is_some() {
return result;
if !self.state.found.is_empty() {
return self.state.found.pop_front();
}
}
None
Expand Down Expand Up @@ -225,4 +237,28 @@ mod tests {
];
assert_eq!(expected, finder.find_all(sequence).collect::<Vec<Orf>>());
}

#[test]
fn test_three_nested_and_offset_orfs() {
let finder = basic_finder();
let sequence = b"ATGGGGATGGGGGGATGGAAAAATAAGTAG";
let expected = vec![
Orf {
start: 14,
end: 26,
offset: 2,
},
Orf {
start: 0,
end: 30,
offset: 0,
},
Orf {
start: 6,
end: 30,
offset: 0,
},
];
assert_eq!(expected, finder.find_all(sequence).collect::<Vec<Orf>>());
}
}

0 comments on commit 19b6c36

Please sign in to comment.