Skip to content

Commit

Permalink
Added addid and random commands
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Jan 26, 2017
1 parent 4b38c88 commit 8747ac7
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 0 deletions.
32 changes: 32 additions & 0 deletions align/align.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ const (

)

var stdaminoacid = []rune{'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'}
var stdnucleotides = []rune{'A', 'C', 'G', 'T'}

type Alignment interface {
AddSequence(name string, sequence string, comment string) error
AddSequenceChar(name string, sequence []rune, comment string) error
Expand All @@ -34,6 +37,7 @@ type Alignment interface {
Recombine(rate float64, lenprop float64)
TrimNames(size int) (map[string]string, error)
TrimSequences(trimsize int, fromStart bool) error
AppendSeqIdentifier(identifier string, right bool)
CharStats() map[rune]int64
Alphabet() int
}
Expand Down Expand Up @@ -306,6 +310,21 @@ func (a *align) TrimSequences(trimsize int, fromStart bool) error {
return nil
}

// Append a string to all sequence names of the alignment
// If right is true, then append it to the right of each names,
// otherwise, appends it to the left
func (a *align) AppendSeqIdentifier(identifier string, right bool) {
if len(identifier) != 0 {
for _, seq := range a.seqs {
if right {
seq.name = seq.name + identifier
} else {
seq.name = identifier + seq.name
}
}
}
}

// Samples randomly a subset of the sequences
// And returns this new alignment
// If nb < 1 or nb > nbsequences returns nil and an error
Expand Down Expand Up @@ -388,3 +407,16 @@ func (a *align) CharStats() map[rune]int64 {
func (a *align) Alphabet() int {
return a.alphabet
}

func RandomAlignment(alphabet, length, nbseq int) (Alignment, error) {
al := NewAlign(alphabet)
for i := 0; i < nbseq; i++ {
name := fmt.Sprintf("Seq%04d", i)
if seq, err := RandomSequence(alphabet, length); err != nil {
return nil, err
} else {
al.AddSequenceChar(name, seq, "")
}
}
return al, nil
}
45 changes: 45 additions & 0 deletions align/align_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package align

import (
"fmt"
"strings"
"testing"
)

func TestRandomAlignment(t *testing.T) {
length := 3000
nbseqs := 500
a, err := RandomAlignment(AMINOACIDS, length, nbseqs)
if err != nil {
t.Error(err)
}

if a.Length() != length {
t.Error(fmt.Sprintf("Length should be %d and is %d", length, a.Length()))
}
if a.NbSequences() != nbseqs {
t.Error(fmt.Sprintf("Nb sequences should be %d and is %d", nbseqs, a.NbSequences()))
}
}

func TestAppendIdentifier(t *testing.T) {
a, err := RandomAlignment(AMINOACIDS, 300, 50)
if err != nil {
t.Error(err)

}
a.AppendSeqIdentifier("IDENT", false)

a.IterateChar(func(name string, sequence []rune) {
if !strings.HasPrefix(name, "IDENT") {
t.Error("Sequence name does not start with expected id: IDENT")
}
})

a.AppendSeqIdentifier("IDENT", true)
a.IterateChar(func(name string, sequence []rune) {
if !strings.HasSuffix(name, "IDENT") {
t.Error("Sequence name does not end with expected id: IDENT")
}
})
}
20 changes: 20 additions & 0 deletions align/sequence.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
package align

import (
"errors"
"math/rand"
)

type Sequence interface {
Sequence() string
SequenceChar() []rune
Expand Down Expand Up @@ -35,3 +40,18 @@ func (s *seq) Name() string {
func (s *seq) Comment() string {
return s.comment
}

func RandomSequence(alphabet, length int) ([]rune, error) {
seq := make([]rune, length)
for i := 0; i < length; i++ {
switch alphabet {
case AMINOACIDS:
seq[i] = stdaminoacid[rand.Intn(len(stdaminoacid))]
case NUCLEOTIDS:
seq[i] = stdnucleotides[rand.Intn(len(stdnucleotides))]
default:
return nil, errors.New("Unexpected sequence alphabet type")
}
}
return seq, nil
}
34 changes: 34 additions & 0 deletions cmd/addid.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package cmd

import (
"github.com/spf13/cobra"
)

var addIdOutput string
var addIdName string
var addIdRight bool

// addidCmd represents the addid command
var addidCmd = &cobra.Command{
Use: "addid",
Short: "Adds a string to each sequence identifier of the input alignment",
Long: `Adds a string to each sequence identifier of the input alignment.
The string may be added to the left or to the right of each sequence identifier.
`,
Run: func(cmd *cobra.Command, args []string) {
f := openWriteFile(addIdOutput)
for al := range rootaligns {
al.AppendSeqIdentifier(addIdName, addIdRight)
writeAlign(al, f)
}
f.Close()
},
}

func init() {
RootCmd.AddCommand(addidCmd)
addidCmd.PersistentFlags().StringVarP(&addIdOutput, "out-align", "o", "stdout", "Renamed alignment output file")
addidCmd.PersistentFlags().StringVarP(&addIdName, "name", "n", "none", "String to add to sequence names")
addidCmd.PersistentFlags().BoolVarP(&addIdRight, "right", "r", false, "Adds the String on the right of sequence names (otherwise, adds to left)")
}
53 changes: 53 additions & 0 deletions cmd/random.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package cmd

import (
"math/rand"
"time"

"github.com/fredericlemoine/goalign/align"
"github.com/fredericlemoine/goalign/io"
"github.com/spf13/cobra"
)

var randomLength, randomSize int
var randomAA bool
var randomOutput string
var randomSeed int64

// randomCmd represents the random command
var randomCmd = &cobra.Command{
Use: "random",
Short: "Generate random sequences",
Long: `Generate random sequences.
`,
PersistentPreRun: func(cmd *cobra.Command, args []string) {
},
Run: func(cmd *cobra.Command, args []string) {
rand.Seed(sampleSeed)
var a align.Alignment
var err error
f := openWriteFile(addIdOutput)
if !randomAA {
a, err = align.RandomAlignment(align.NUCLEOTIDS, randomLength, randomSize)
if err != nil {
io.ExitWithMessage(err)
}
} else {
a, err = align.RandomAlignment(align.AMINOACIDS, randomLength, randomSize)
if err != nil {
io.ExitWithMessage(err)
}
}
writeAlign(a, f)
f.Close()
},
}

func init() {
RootCmd.AddCommand(randomCmd)
randomCmd.PersistentFlags().IntVarP(&randomLength, "length", "l", 100, "Length of sequences to generate")
randomCmd.PersistentFlags().IntVarP(&randomSize, "nb-seqs", "n", 10, "Number of sequences to generate")
randomCmd.PersistentFlags().BoolVarP(&randomAA, "amino-acids", "a", false, "Aminoacid sequences (otherwise, nucleotides)")
randomCmd.PersistentFlags().StringVarP(&randomOutput, "out-align", "o", "stdout", "Random alignment output file")
randomCmd.PersistentFlags().Int64VarP(&randomSeed, "seed", "s", time.Now().UTC().UnixNano(), "Initial Random Seed")
}

0 comments on commit 8747ac7

Please sign in to comment.