From cf75b6cb5280dde52b26f95f8ec9cd37706a642d Mon Sep 17 00:00:00 2001 From: Till Hartmann Date: Wed, 23 Feb 2022 16:36:39 +0100 Subject: [PATCH] feat!: base specific hop parameters in homopoly-pair-hmm (#480) * base specific HopParameters * mark unused params with underscore --- src/stats/pairhmm/homopolypairhmm.rs | 98 ++++++++++++++++++++++++---- 1 file changed, 86 insertions(+), 12 deletions(-) diff --git a/src/stats/pairhmm/homopolypairhmm.rs b/src/stats/pairhmm/homopolypairhmm.rs index ef4efd82b..7f91a5742 100644 --- a/src/stats/pairhmm/homopolypairhmm.rs +++ b/src/stats/pairhmm/homopolypairhmm.rs @@ -112,6 +112,16 @@ impl State { _ => false, } } + + fn base(&self) -> Option { + match self { + MatchA | HopAX | HopAY => Some(b'A'), + MatchC | HopCX | HopCY => Some(b'C'), + MatchG | HopGX | HopGY => Some(b'G'), + MatchT | HopTX | HopTY => Some(b'T'), + _ => None, + } + } } const STATES: [State; 14] = [ @@ -165,6 +175,39 @@ pub trait HopParameters { fn prob_hop_y_extend(&self) -> LogProb; } +/// Trait for parametrization of `PairHMM` hop behavior. +pub trait BaseSpecificHopParameters { + /// Probability to start hop in x. + fn prob_hop_x_with_base(&self, base: u8) -> LogProb; + + /// Probability to start hop in y. + fn prob_hop_y_with_base(&self, base: u8) -> LogProb; + + /// Probability to extend hop in x. + fn prob_hop_x_extend_with_base(&self, base: u8) -> LogProb; + + /// Probability to extend hop in y. + fn prob_hop_y_extend_with_base(&self, base: u8) -> LogProb; +} + +impl BaseSpecificHopParameters for H { + fn prob_hop_x_with_base(&self, _base: u8) -> LogProb { + self.prob_hop_x() + } + + fn prob_hop_y_with_base(&self, _base: u8) -> LogProb { + self.prob_hop_y() + } + + fn prob_hop_x_extend_with_base(&self, _base: u8) -> LogProb { + self.prob_hop_x_extend() + } + + fn prob_hop_y_extend_with_base(&self, _base: u8) -> LogProb { + self.prob_hop_y_extend() + } +} + /// A pair Hidden Markov Model for comparing sequences x and y as described by /// Durbin, R., Eddy, S., Krogh, A., & Mitchison, G. (1998). Biological Sequence Analysis. /// Current Topics in Genome Analysis 2008. http://doi.org/10.1017/CBO9780511790492. @@ -456,41 +499,72 @@ const MATCH_OTHER: [(State, State); 12] = [ (MatchT, MatchA), ]; -fn build_transition_table( +fn build_transition_table( gap_params: &G, hop_params: &H, ) -> HashMap { let mut transition_probs = HashMap::new(); - let prob_hop_x = hop_params.prob_hop_x(); - let prob_hop_y = hop_params.prob_hop_y(); - let prob_hop_x_extend = hop_params.prob_hop_x_extend(); - let prob_hop_y_extend = hop_params.prob_hop_y_extend(); - let prob_gap_x = gap_params.prob_gap_x(); let prob_gap_y = gap_params.prob_gap_y(); let prob_gap_x_extend = gap_params.prob_gap_x_extend(); let prob_gap_y_extend = gap_params.prob_gap_y_extend(); MATCH_HOP_X.iter().for_each(|(a, b)| { - transition_probs.insert(*a >> *b, prob_hop_x); + transition_probs.insert( + *a >> *b, + hop_params.prob_hop_x_with_base(b.base().expect("Unsupported base")), + ); }); MATCH_HOP_Y.iter().for_each(|(a, b)| { - transition_probs.insert(*a >> *b, prob_hop_y); + transition_probs.insert( + *a >> *b, + hop_params.prob_hop_y_with_base(b.base().expect("Unsupported base")), + ); }); HOP_X_HOP_X.iter().for_each(|(a, b)| { - transition_probs.insert(*a >> *b, prob_hop_x_extend); + assert_eq!(a.base(), b.base()); + transition_probs.insert( + *a >> *b, + hop_params.prob_hop_x_extend_with_base(b.base().expect("Unsupported base")), + ); }); HOP_Y_HOP_Y.iter().for_each(|(a, b)| { - transition_probs.insert(*a >> *b, prob_hop_y_extend); + assert_eq!(a.base(), b.base()); + transition_probs.insert( + *a >> *b, + hop_params.prob_hop_y_extend_with_base(b.base().expect("Unsupported base")), + ); }); HOP_X_MATCH.iter().for_each(|(a, b)| { - transition_probs.insert(*a >> *b, prob_hop_x_extend.ln_one_minus_exp()); + transition_probs.insert( + *a >> *b, + hop_params + .prob_hop_x_with_base(a.base().expect("Unsupported base")) + .ln_one_minus_exp(), + ); }); HOP_Y_MATCH.iter().for_each(|(a, b)| { - transition_probs.insert(*a >> *b, prob_hop_y_extend.ln_one_minus_exp()); + transition_probs.insert( + *a >> *b, + hop_params + .prob_hop_y_with_base(a.base().expect("Unsupported base")) + .ln_one_minus_exp(), + ); }); + let prob_hop_x = LogProb::ln_sum_exp(&[ + hop_params.prob_hop_x_with_base(b'A'), + hop_params.prob_hop_x_with_base(b'C'), + hop_params.prob_hop_x_with_base(b'G'), + hop_params.prob_hop_x_with_base(b'T'), + ]) - LogProb(4.0); + let prob_hop_y = LogProb::ln_sum_exp(&[ + hop_params.prob_hop_y_with_base(b'A'), + hop_params.prob_hop_y_with_base(b'C'), + hop_params.prob_hop_y_with_base(b'G'), + hop_params.prob_hop_y_with_base(b'T'), + ]) - LogProb(4.0); let match_same = LogProb::ln_sum_exp(&[prob_gap_y, prob_gap_x, prob_hop_x, prob_hop_y]).ln_one_minus_exp(); let match_other =