Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #225 from BinPro/develop
Version 1.0.0
- Loading branch information
Showing
49 changed files
with
2,584 additions
and
2,841 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Docker for CONCOCT (http://github.com/BinPro/CONCOCT) v1.0.0 | ||
# VERSION 1.0.0 | ||
# | ||
# This docker creates and sets up an Ubuntu environment with all | ||
# dependencies for CONCOCT v1.0.0 installed. | ||
# | ||
# To login to the docker with a shared directory from the host do: | ||
# | ||
# docker run -v /my/host/shared/directory:/my/docker/location -i -t alneberg/concoct_1.0.0 /bin/bash | ||
# | ||
|
||
FROM ubuntu:18.04 | ||
COPY . /opt/CONCOCT | ||
|
||
# Get basic ubuntu packages needed | ||
RUN apt-get update -qq | ||
RUN apt-get install -qq wget build-essential libgsl0-dev git zip unzip bedtools python-pip | ||
|
||
RUN pip install --upgrade pip | ||
|
||
# Install python dependencies and fetch and install CONCOCT 1.0.0 | ||
RUN cd /opt/CONCOCT;\ | ||
pip install -r requirements.txt;\ | ||
|
||
# wget --no-check-certificate https://github.com/BinPro/CONCOCT/archive/1.0.0.tar.gz;\ | ||
# tar xf 1.0.0.tar.gz;\ | ||
# cd CONCOCT-1.0.0;\ | ||
# python setup.py install | ||
|
||
RUN cd /opt/CONCOCT/;\ | ||
python setup.py install | ||
|
||
RUN cd /opt/CONCOCT/;\ | ||
nosetests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
#!/usr/bin/env python | ||
from __future__ import division | ||
DESC="""A script that iterates over concoct results and reruns the concoct algorithm | ||
for clusters where the median SCG presence is at least 2.""" | ||
|
||
|
||
import sys | ||
import logging | ||
import vbgmm | ||
import numpy as np | ||
import argparse | ||
import pandas as p | ||
|
||
from sklearn.decomposition import PCA | ||
|
||
from concoct.transform import perform_pca | ||
|
||
def main(argv): | ||
parser = argparse.ArgumentParser(description=DESC) | ||
|
||
parser.add_argument("cluster_file", help="string specifying cluster file") | ||
|
||
parser.add_argument("original_data", help="string original but transformed data file") | ||
|
||
parser.add_argument("scg_file", help="string specifying scg frequency file") | ||
|
||
parser.add_argument('-e','--expansion_factor',default=2, type=int, | ||
help=("number of clusters to expand by")) | ||
|
||
parser.add_argument('-t','--threads',default=1, type=int, | ||
help=("number of threads to use defaults to one")) | ||
|
||
args = parser.parse_args() | ||
|
||
clusters = p.read_csv(args.cluster_file, header=None, index_col=0) | ||
|
||
original_data = p.read_csv(args.original_data, header=0, index_col=0) | ||
|
||
original_data_matrix = original_data.as_matrix() | ||
|
||
scg_freq = p.read_csv(args.scg_file, header=0, index_col=0) | ||
|
||
scg_freq_matrix = scg_freq.as_matrix() | ||
|
||
med_scgs = np.median(scg_freq_matrix, axis=1) | ||
|
||
clusters_matrix = clusters.as_matrix() | ||
|
||
cluster_freq = np.bincount(clusters_matrix[:,0]) | ||
|
||
K = cluster_freq.shape[0] | ||
new_clusters_matrix = np.copy(clusters_matrix,order='C') | ||
nNewK = K - 1 | ||
for k in range(K): | ||
if med_scgs[k] > 1: | ||
|
||
select = clusters_matrix == k | ||
|
||
slice_k = original_data_matrix[select[:,0],:] | ||
|
||
index_k = np.where(select[:,0])[0] | ||
|
||
pca_object = PCA(n_components=0.90).fit(slice_k) | ||
transform_k = pca_object.transform(slice_k) | ||
|
||
NK = med_scgs[k]*args.expansion_factor | ||
print "Run CONCOCT for " + str(k) + "with " + str(NK) + "clusters" + " using " + str(args.threads) + "threads" | ||
assigns = vbgmm.fit(np.copy(transform_k,order='C'),int(NK),int(args.threads)) | ||
kK = np.max(assigns) + 1 | ||
|
||
|
||
for a in range(1,kK): | ||
index_a = index_k[assigns == a] | ||
new_clusters_matrix[index_a] = nNewK + a | ||
|
||
nNewK = nNewK + kK - 1 | ||
|
||
new_assign_df = p.DataFrame(new_clusters_matrix,index=original_data.index) | ||
new_assign_df.to_csv("clustering_refine.csv") | ||
if __name__ == "__main__": | ||
main(sys.argv[1:]) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
CC = gcc | ||
CFLAGS = -std=c99 -g -I/usr/local/include/ | ||
EFLAGS = | ||
EFILE = test_vbgmmfit | ||
LIBS = -lgomp -lpthread -lm -lgsl -lgslcblas -L/usr/local/lib | ||
OBJS = c_vbgmm_fit.o test_vbgmm_fit.o | ||
|
||
$(EFILE) : $(OBJS) | ||
@echo "linking..." | ||
$(CC) $(EFLAGS) -o $(EFILE) $(OBJS) $(LIBS) | ||
|
||
$(OBJS) : c_vbgmm_fit.c c_vbgmm_fit.h | ||
$(CC) $(CFLAGS) -c $*.c | ||
|
||
clean: | ||
rm -rf *.o test_vbgmmfit |
Oops, something went wrong.