-
Notifications
You must be signed in to change notification settings - Fork 48
/
concoct
executable file
·90 lines (65 loc) · 2.3 KB
/
concoct
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python
from __future__ import print_function
import sys
import logging
import vbgmm
import numpy as np
from concoct.output import Output
from concoct.parser import arguments
from concoct.input import load_data
from concoct.transform import perform_pca
def main(args):
# Initialize output handling
Output(args.basename,args)
composition, cov, cov_range = load_data(args)
# If there are zero or one contig that exceed the filter, do not continue
if len(composition) < 2:
logging.error('Not enough contigs pass the threshold filter. Exiting!')
sys.exit(-1)
if cov is not None:
joined = composition.join(cov.ix[:,cov_range[0]:cov_range[1]],how="inner")
else:
joined = composition
# Fix special case in pca_components
if args.pca_components == "All":
args.pca_components = joined.shape[1]
#PCA on the contigs that have kmer count greater than length_threshold
transform_filter, pca = perform_pca(
joined,
args.pca_components,
args.seed
)
logging.info('Performed PCA, resulted in %s dimensions' % transform_filter.shape[1])
if not args.no_original_data:
Output.write_original_data(
joined,
args.length_threshold
)
Output.write_pca(
transform_filter,
args.length_threshold,
joined.index,
)
Output.write_pca_components(
pca.components_,
args.length_threshold
)
logging.info('PCA transformed data.')
logging.info('Will call vbgmm with parameters: %s, %s, %s, %s' % (Output.CONCOCT_PATH, args.clusters, args.length_threshold, args.threads))
N_contigs = transform_filter.shape[0]
assign = np.zeros(N_contigs, dtype=np.int32)
assign = vbgmm.fit(np.copy(transform_filter,order='C'), int(args.clusters), int(args.seed), int(args.threads))
Output.write_assign(
assign,
args.length_threshold,
joined.index,
)
logging.info("CONCOCT Finished")
if __name__=="__main__":
args = arguments()
if args.total_percentage_pca == 100:
args.pca_components = "All"
else:
args.pca_components = args.total_percentage_pca/100.0
results = main(args)
print("CONCOCT Finished, the log shows how it went.", file=sys.stderr)