Skip to content

Commit

Permalink
Changed gamma distribution to dirichlet distribution fir continuous w…
Browse files Browse the repository at this point in the history
…eights
  • Loading branch information
fredericlemoine committed Nov 7, 2016
1 parent d782b48 commit 51fa6c6
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 16 deletions.
14 changes: 9 additions & 5 deletions cmd/distboot.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ var distbootSeed int64
var distbootOutput string
var distbootnb int
var distbootmodel string
var distbootgamma bool
var distbootcontinuous bool
var distboolRemoveGaps bool

// distbootCmd represents the distboot command
Expand All @@ -35,7 +35,11 @@ Available Distances:
For example:
goalign build distboot -m k2p -i align.fa -o mats.txt`,
goalign build distboot -m k2p -i align.fa -o mats.txt
If -c is given, then random continuous weights are associated to all sites.
Weights follow a Dirichlet distribution D(n;1,...,1)
`,

Run: func(cmd *cobra.Command, args []string) {
rand.Seed(distbootSeed)
Expand All @@ -45,8 +49,8 @@ goalign build distboot -m k2p -i align.fa -o mats.txt`,
model := distance.Model(distbootmodel, distboolRemoveGaps)
for i := 0; i < distbootnb; i++ {
var weights []float64 = nil
if distbootgamma {
weights = distance.BuildWeights(align)
if distbootcontinuous {
weights = distance.BuildWeightsDirichlet(align)
distMatrix := distance.DistMatrix(align, weights, model, rootcpus)
writeDistBootMatrix(distMatrix, f)
} else {
Expand All @@ -65,7 +69,7 @@ func init() {
distbootCmd.PersistentFlags().StringVarP(&distbootOutput, "output", "o", "stdout", "Distance matrices output file")
distbootCmd.PersistentFlags().StringVarP(&distbootmodel, "model", "m", "k2p", "Model for distance computation")
distbootCmd.PersistentFlags().IntVarP(&distbootnb, "nboot", "n", 1, "Number of bootstrap replicates to build")
distbootCmd.PersistentFlags().BoolVarP(&distbootgamma, "gamma", "g", false, "Bootstraps are done by weighting alignment using gamma generated weights")
distbootCmd.PersistentFlags().BoolVarP(&distbootcontinuous, "continuous", "c", false, "Bootstraps are done by weighting alignment with continuous weights (dirichlet)")
distbootCmd.PersistentFlags().BoolVarP(&distboolRemoveGaps, "rm-gaps", "r", false, "Do not take into account positions containing >=1 gaps")
}

Expand Down
10 changes: 7 additions & 3 deletions cmd/tntweightboot.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@ import (
// tntweightbootCmd represents the tntweightboot command
var tntweightbootCmd = &cobra.Command{
Use: "tntweightboot",
Short: "",
Long: ``,
Short: "Generates a data file to give as TNT input, with continuous weight for sites",
Long: `Generates a data file to give as TNT input, with continuous weight for sites
Weights follow a Dirichlet distribution D(n;1,...,1)
`,
Run: func(cmd *cobra.Command, args []string) {
rand.Seed(weightbootSeed)

Expand All @@ -29,7 +33,7 @@ var tntweightbootCmd = &cobra.Command{
f = openWriteFile(weightbootOutput)
}
var weights []float64 = nil
weights = distance.BuildWeights(al)
weights = distance.BuildWeightsDirichlet(al)
minw := -1.0
maxw := -1.0
for _, w := range weights {
Expand Down
10 changes: 6 additions & 4 deletions cmd/weightboot.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ var weightbootnb int
// weightbootCmd represents the weightboot command
var weightbootCmd = &cobra.Command{
Use: "weightboot",
Short: "generate weights for all positions of the input alignment",
Long: `generate weights for all positions of the input alignment
Short: "generate continous weights for all positions of the input alignment",
Long: `generate continous weights for all positions of the input alignment
If the input alignment contains several alignments, will process the first one only
If the input alignment contains several alignments, will process the first one only.
Weights follow a Dirichlet distribtion D(n;1,...,1)
`,
Run: func(cmd *cobra.Command, args []string) {
Expand All @@ -29,7 +31,7 @@ If the input alignment contains several alignments, will process the first one o
f := openWriteFile(weightbootOutput)
for i := 0; i < weightbootnb; i++ {
var weights []float64 = nil
weights = distance.BuildWeights(al)
weights = distance.BuildWeightsDirichlet(al)
for i, w := range weights {
if i > 0 {
f.WriteString("\t")
Expand Down
16 changes: 14 additions & 2 deletions distance/distance.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ func Model(modelType string, removegaps bool) DistModel {
return model
}

/* Return a normalized vector of weights */
func BuildWeights(al align.Alignment) []float64 {
/* Return a normalized vector of weights following a Gamma distribution*/
func BuildWeightsGamma(al align.Alignment) []float64 {
outweights := make([]float64, al.Length())
// Parameters of the binomial
n := float64(al.Length())
Expand All @@ -76,6 +76,18 @@ func BuildWeights(al align.Alignment) []float64 {
return outweights
}

/* Returns a vector of weights following a Dirichlet distribution D(n ; 1,...,1)
with n alignment length
*/
func BuildWeightsDirichlet(al align.Alignment) []float64 {
alpha := make([]float64, al.Length(), al.Length())
for i := 0; i < al.Length(); i++ {
alpha[i] = 1
}
outweights := stats.Dirichlet(float64(al.Length()), alpha...)
return outweights
}

/* Compute a matrix distance, with weights associated to each alignment positions */
/* If weights == nil, then all weights are considered 1 */
func DistMatrix(al align.Alignment, weights []float64, model DistModel, cpus int) [][]float64 {
Expand Down
5 changes: 3 additions & 2 deletions stats/dirichlet.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import (
// Dirichlet returns a set of random numbers from dirichlet distribution
// See https://en.wikipedia.org/wiki/Dirichlet_distribution#Random_number_generation
// For further information
func Dirichlet(alpha ...float64) []float64 {
// Factor: multiplying factor to apply to all values : Normally it is 1
func Dirichlet(factor float64, alpha ...float64) []float64 {
if len(alpha) <= 2 {
io.ExitWithMessage(errors.New(fmt.Sprintf("Alpha parameter vector for Dirichlet sample contains less than 2 values")))
}
Expand All @@ -23,7 +24,7 @@ func Dirichlet(alpha ...float64) []float64 {
sum += sample[i]
}
for i, _ := range alpha {
sample[i] = sample[i] / sum
sample[i] = factor * sample[i] / sum
}
return sample
}

0 comments on commit 51fa6c6

Please sign in to comment.