Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: base specific hop parameters in homopoly-pair-hmm #480

Merged
merged 2 commits into from Feb 23, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
98 changes: 86 additions & 12 deletions src/stats/pairhmm/homopolypairhmm.rs
Expand Up @@ -112,6 +112,16 @@ impl State {
_ => false,
}
}

fn base(&self) -> Option<u8> {
match self {
MatchA | HopAX | HopAY => Some(b'A'),
MatchC | HopCX | HopCY => Some(b'C'),
MatchG | HopGX | HopGY => Some(b'G'),
MatchT | HopTX | HopTY => Some(b'T'),
_ => None,
}
}
}

const STATES: [State; 14] = [
Expand Down Expand Up @@ -165,6 +175,39 @@ pub trait HopParameters {
fn prob_hop_y_extend(&self) -> LogProb;
}

/// Trait for parametrization of `PairHMM` hop behavior.
pub trait BaseSpecificHopParameters {
/// Probability to start hop in x.
fn prob_hop_x_with_base(&self, base: u8) -> LogProb;

/// Probability to start hop in y.
fn prob_hop_y_with_base(&self, base: u8) -> LogProb;

/// Probability to extend hop in x.
fn prob_hop_x_extend_with_base(&self, base: u8) -> LogProb;

/// Probability to extend hop in y.
fn prob_hop_y_extend_with_base(&self, base: u8) -> LogProb;
}

impl<H: HopParameters> BaseSpecificHopParameters for H {
fn prob_hop_x_with_base(&self, _base: u8) -> LogProb {
self.prob_hop_x()
}

fn prob_hop_y_with_base(&self, _base: u8) -> LogProb {
self.prob_hop_y()
}

fn prob_hop_x_extend_with_base(&self, _base: u8) -> LogProb {
self.prob_hop_x_extend()
}

fn prob_hop_y_extend_with_base(&self, _base: u8) -> LogProb {
self.prob_hop_y_extend()
}
}

/// A pair Hidden Markov Model for comparing sequences x and y as described by
/// Durbin, R., Eddy, S., Krogh, A., & Mitchison, G. (1998). Biological Sequence Analysis.
/// Current Topics in Genome Analysis 2008. http://doi.org/10.1017/CBO9780511790492.
Expand Down Expand Up @@ -456,41 +499,72 @@ const MATCH_OTHER: [(State, State); 12] = [
(MatchT, MatchA),
];

fn build_transition_table<G: GapParameters, H: HopParameters>(
fn build_transition_table<G: GapParameters, H: BaseSpecificHopParameters>(
gap_params: &G,
hop_params: &H,
) -> HashMap<usize, LogProb> {
let mut transition_probs = HashMap::new();

let prob_hop_x = hop_params.prob_hop_x();
let prob_hop_y = hop_params.prob_hop_y();
let prob_hop_x_extend = hop_params.prob_hop_x_extend();
let prob_hop_y_extend = hop_params.prob_hop_y_extend();

let prob_gap_x = gap_params.prob_gap_x();
let prob_gap_y = gap_params.prob_gap_y();
let prob_gap_x_extend = gap_params.prob_gap_x_extend();
let prob_gap_y_extend = gap_params.prob_gap_y_extend();

MATCH_HOP_X.iter().for_each(|(a, b)| {
transition_probs.insert(*a >> *b, prob_hop_x);
transition_probs.insert(
*a >> *b,
hop_params.prob_hop_x_with_base(b.base().expect("Unsupported base")),
);
});
MATCH_HOP_Y.iter().for_each(|(a, b)| {
transition_probs.insert(*a >> *b, prob_hop_y);
transition_probs.insert(
*a >> *b,
hop_params.prob_hop_y_with_base(b.base().expect("Unsupported base")),
);
});
HOP_X_HOP_X.iter().for_each(|(a, b)| {
transition_probs.insert(*a >> *b, prob_hop_x_extend);
assert_eq!(a.base(), b.base());
transition_probs.insert(
*a >> *b,
hop_params.prob_hop_x_extend_with_base(b.base().expect("Unsupported base")),
);
});
HOP_Y_HOP_Y.iter().for_each(|(a, b)| {
transition_probs.insert(*a >> *b, prob_hop_y_extend);
assert_eq!(a.base(), b.base());
transition_probs.insert(
*a >> *b,
hop_params.prob_hop_y_extend_with_base(b.base().expect("Unsupported base")),
);
});
HOP_X_MATCH.iter().for_each(|(a, b)| {
transition_probs.insert(*a >> *b, prob_hop_x_extend.ln_one_minus_exp());
transition_probs.insert(
*a >> *b,
hop_params
.prob_hop_x_with_base(a.base().expect("Unsupported base"))
.ln_one_minus_exp(),
);
});
HOP_Y_MATCH.iter().for_each(|(a, b)| {
transition_probs.insert(*a >> *b, prob_hop_y_extend.ln_one_minus_exp());
transition_probs.insert(
*a >> *b,
hop_params
.prob_hop_y_with_base(a.base().expect("Unsupported base"))
.ln_one_minus_exp(),
);
});

let prob_hop_x = LogProb::ln_sum_exp(&[
hop_params.prob_hop_x_with_base(b'A'),
hop_params.prob_hop_x_with_base(b'C'),
hop_params.prob_hop_x_with_base(b'G'),
hop_params.prob_hop_x_with_base(b'T'),
]) - LogProb(4.0);
let prob_hop_y = LogProb::ln_sum_exp(&[
hop_params.prob_hop_y_with_base(b'A'),
hop_params.prob_hop_y_with_base(b'C'),
hop_params.prob_hop_y_with_base(b'G'),
hop_params.prob_hop_y_with_base(b'T'),
]) - LogProb(4.0);
let match_same =
LogProb::ln_sum_exp(&[prob_gap_y, prob_gap_x, prob_hop_x, prob_hop_y]).ln_one_minus_exp();
let match_other =
Expand Down