-
Notifications
You must be signed in to change notification settings - Fork 2
/
ccontigs.jl
55 lines (46 loc) · 1.42 KB
/
ccontigs.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#! /Applications/Julia-0.4.5.app/Contents/Resources/julia/bin/julia
# Geoffrey Hannigan
# University of Michigan
# Setting dependencies
using Bio.Align
using Bio.Seq
using ArgParse
# Setup ArgParse
argx = ArgParseSettings()
@add_arg_table argx begin
"--input", "-i"
help = "Input fasta file for identifying circular contigs."
"--output", "-o"
help = "Output file to write reults."
"--length", "-l"
help = "Length of sequence window for detecting repeats."
default = 100
"--thresh", "-t"
help = "Threshold (percent of length) for circular contig."
default = 0.97
end
parsed_args = parse_args(ARGS, argx)
problem = LocalAlignment()
scoremodel = AffineGapScoreModel(
match=5,
mismatch=-4,
gap_open=-4,
gap_extend=-1
)
filepath = parsed_args["input"]
fileout = parsed_args["output"]
outputfile = open(fileout, "w")
hitlength = parsed_args["length"]
threshold = hitlength * 5 * parsed_args["thresh"]
maxscore = hitlength * 5
print("Perfect alignment score is $maxscore\n")
inputfasta = FASTA.Reader(open( filepath))
for s in inputfasta
alignment_result = pairalign(problem, FASTA.sequence(s , 1:hitlength), FASTA.sequence(s , hitlength:length(FASTA.sequence(s))), scoremodel)
scorestring = score(alignment_result)
seqname = FASTA.identifier(s)
if scorestring > threshold
write(outputfile, "$seqname\t$scorestring\n")
end
end
print("Complete\n")