Skip to content

Commit

Permalink
Simulation Updates for Revision
Browse files Browse the repository at this point in the history
  • Loading branch information
deto committed Dec 17, 2020
1 parent fe9775c commit 24aa531
Show file tree
Hide file tree
Showing 8 changed files with 885 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -36,3 +36,4 @@ SlideSeq/DropViz/DropSeq.util*
*.mtx.gz
*.xls.gz
**/*_log*
misc/*
317 changes: 317 additions & 0 deletions Transcriptomics/Simulation6/Snakefile
@@ -0,0 +1,317 @@
subworkflow data:
workdir:
"../../data/Simulated6"

data_file = "rep{N}/data.loom"
gene_effect_file = "rep{N}/gene_effects.txt"

rule select_threshold:
input:
loom=data(data_file),
params:
N_CELLS=10,
output:
genes="rep{N}/genes/threshold.txt"
script: "pipelineScripts/select_threshold.py"

rule select_for_pairs:
input:
ge_file=data(gene_effect_file),
output:
genes="rep{N}/genes/forPairs.txt"
script: "pipelineScripts/select_for_pairs.py"

rule select_hvg:
input:
loom=data(data_file),
params:
lowXcutoff=0.1,
#highXMeanCutoff=20,
output:
genes="rep{N}/genes/hvg.txt",
geneInfo="rep{N}/genes/hvg_info.txt"
script: "pipelineScripts/select_hvg.R"

rule select_danb:
input:
loom=data(data_file),
params:
#highXMeanCutoff=20,
output:
genes="rep{N}/genes/danb.txt",
geneInfo="rep{N}/genes/danb_info.txt"
script: "pipelineScripts/select_danb.R"

rule PCA_threshold:
message: "Computing PCA on thresholded genes"
input:
loom=data(data_file),
genes=rules.select_threshold.output.genes,
params:
doStandardize=True,
doJackstraw=False,
num_pcs=5,
output:
latent="rep{N}/pca/pca_threshold.txt",
components="rep{N}/pca/pca_components.txt",
script: "pipelineScripts/pca/pca_sim.py"

rule select_pca:
input:
components=rules.PCA_threshold.output.components,
output:
genes="rep{N}/genes/pca.txt",
geneInfo="rep{N}/genes/pca_info.txt"
script: "pipelineScripts/select_pca.py"

rule SCVI:
message: "Computing SCVI on thresholded genes"
input:
loom=data(data_file),
genes=rules.select_threshold.output.genes,
params:
components=10,
output:
latent="rep{N}/scvi/threshold/latent.txt.gz",
model="rep{N}/scvi/threshold/model.pkl",
script: "pipelineScripts/scvi/scviTorch.py"

rule TSNE:
message: "Computing TSNE - hvg genes"
input:
latent=rules.SCVI.output.latent,
output:
out="rep{N}/tsne/tsne.txt",
script: "pipelineScripts/tsne/tsne.py"

rule UMAP:
message: "Computing UMAP"
input:
latent=rules.SCVI.output.latent,
params:
n_neighbors=30,
output:
out="rep{N}/umap/umap.txt",
script: "pipelineScripts/umap/umap.py"

rule TSNE_pca:
message: "Computing TSNE - hvg genes"
input:
latent=rules.PCA_threshold.output.latent,
output:
out="rep{N}/tsne/tsne_pca.txt",
script: "pipelineScripts/tsne/tsne.py"

rule UMAP_pca:
message: "Computing UMAP"
input:
latent=rules.PCA_threshold.output.latent,
params:
n_neighbors=30,
output:
out="rep{N}/umap/umap_pca.txt",
script: "pipelineScripts/umap/umap.py"

rule runHotspot_threshold:
input:
loom=data(data_file),
latent=rules.SCVI.output.latent,
params:
model='danb',
n_neighbors=30,
n_cells_min=10,
output:
results="rep{N}/hotspot/hotspot_threshold.txt"
script: "pipelineScripts/hotspot/runHotspot.py"

rule runHotspot_threshold_300:
input:
loom=data(data_file),
latent=rules.SCVI.output.latent,
params:
model='danb',
n_neighbors=300,
n_cells_min=10,
weighted_graph=True,
output:
results="rep{N}/hotspot/hotspot_threshold_300.txt"
script: "pipelineScripts/hotspot/runHotspot.py"

rule runHotspot_threshold_pca:
input:
loom=data(data_file),
latent=rules.PCA_threshold.output.latent,
params:
model='danb',
n_neighbors=300,
n_cells_min=10,
weighted_graph=True,
output:
results="rep{N}/hotspot/hotspot_pca_threshold.txt"
script: "pipelineScripts/hotspot/runHotspot.py"

rule runHotspotPairs:
input:
loom=data(data_file),
latent=rules.SCVI.output.latent,
hs_results=rules.runHotspot_threshold.output.results,
genes=rules.select_for_pairs.output.genes,
params:
model='danb',
fdrThresh=0.05,
# topN=500,
n_neighbors=30,
# highXMeanCutoff=20,
output:
results_z="rep{N}/hotspot/hotspot_pairs_z.txt.gz"
script: "pipelineScripts/hotspot/runHotspotPairs.py"

rule runRegularPairs:
input:
loom=data(data_file),
latent=rules.SCVI.output.latent,
hs_results=rules.runHotspot_threshold.output.results,
genes=rules.select_for_pairs.output.genes,
params:
model='danb',
fdrThresh=0.05,
# topN=500,
n_neighbors=30,
# highXMeanCutoff=20,
output:
results_lc="rep{N}/hotspot/regular_pairs_lc.txt.gz",
script: "pipelineScripts/hotspot/runRegularPairs.py"

rule extractHotspotModules:
input:
results_z=rules.runHotspotPairs.output.results_z,
params:
min_cluster_genes=15,
core_only=False,
# z_threshold=1.96,
fdr_threshold=.05,
output:
cluster_heatmap="rep{N}/hotspot/module_heatmap.svg",
cluster_output="rep{N}/hotspot/modules.txt",
linkage_output="rep{N}/hotspot/linkage.txt",
script: "pipelineScripts/hotspot/extractHotspotModules.py"


rule extractRegularModules:
input:
results_z=rules.runRegularPairs.output.results_lc,
latent=rules.SCVI.output.latent,
params:
min_cluster_genes=15,
core_only=False,
fdr_threshold=.05,
output:
cluster_heatmap="rep{N}/hotspot/module_heatmap_regular.svg",
cluster_output="rep{N}/hotspot/modules_regular.txt",
linkage_output="rep{N}/hotspot/linkage_regular.txt",
script: "pipelineScripts/hotspot/extractRegularModules.py"


rule vision:
input:
loom=data(data_file),
latent=rules.SCVI.output.latent,
proj_positions=rules.SCVI.output.latent,
tsne=rules.TSNE.output.out,
umap=rules.UMAP.output.out,
output:
out="rep{N}/vision/vision.rds"
script: "pipelineScripts/Vision/vision.R"

rule vision_pca:
input:
loom=data(data_file),
latent=rules.PCA_threshold.output.latent,
proj_positions=rules.PCA_threshold.output.latent,
tsne=rules.TSNE_pca.output.out,
umap=rules.UMAP_pca.output.out,
output:
out="rep{N}/vision/vision_pca.rds"
script: "pipelineScripts/Vision/vision.R"

rule run_WGCNA:
input:
loom=data(data_file),
genes=rules.select_for_pairs.output.genes,
params:
power=1,
min_module_size=15,
output:
cluster_output="rep{N}/wgcna/modules.txt",
scores="rep{N}/wgcna/module_scores.txt",
script: "../../pipelineScripts/wgcna/wgcna.R"

rule run_ICA:
input:
loom=data(data_file),
genes=rules.select_for_pairs.output.genes,
params:
n_components=5,
output:
components="rep{N}/ica/components.txt",
script: "../../pipelineScripts/ICA/run_ica.py"

rule run_ICA_fdr:
input:
components="rep{N}/ica/components.txt",
params:
qvalcutoff=.1
output:
components_fdr="rep{N}/ica/components_fdr.txt",
modules="rep{N}/ica/modules.txt",
script: "../../pipelineScripts/ICA/run_ica_fdr.R"

rule runGrnboost:
input:
loom=data(data_file),
genes=rules.select_for_pairs.output.genes,
output:
pair_scores="rep{N}/grnboost/importance_scores.txt.gz",
shell:
"""
python pipelineScripts/grnboost2/grnboost2.py \
`readlink -f {input.loom}` \
`readlink -f {input.genes}` \
`readlink -f {output.pair_scores}`
"""

rule extractModules_Grnboost:
input:
results_z=rules.runGrnboost.output.pair_scores,
params:
min_cluster_genes=15,
fdr_threshold=.05,
output:
cluster_heatmap="rep{N}/grnboost/module_heatmap.svg",
cluster_output="rep{N}/grnboost/modules.txt",
linkage_output="rep{N}/grnboost/linkage.txt",
script: "pipelineScripts/grnboost2/extractModules.py"

reps = [str(x) for x in range(1, 11)]

rule all_reps:
input:
# expand(rules.UMAP.output.out, N=reps),
# expand(rules.TSNE.output.out, N=reps),
# expand(rules.vision.output.out, N=reps),
expand(rules.runHotspot_threshold_300.output, N=reps),
expand(rules.runHotspot_threshold_pca.output, N=reps),
expand(rules.runHotspotPairs.output, N=reps),
expand(rules.runRegularPairs.output, N=reps),
expand(rules.extractHotspotModules.output, N=reps),
expand(rules.extractRegularModules.output, N=reps),
expand(rules.select_hvg.output, N=reps),
expand(rules.select_danb.output, N=reps),
expand(rules.select_pca.output, N=reps),
# expand(rules.TSNE_pca.output, N=reps),
expand(rules.run_WGCNA.output, N=reps),
expand(rules.run_ICA_fdr.output, N=reps),
expand(rules.extractModules_Grnboost.output, N=reps),


include: "Snakefile_k_sensitivity"
18 changes: 18 additions & 0 deletions Transcriptomics/Simulation6/Snakefile_k_sensitivity
@@ -0,0 +1,18 @@
rule runHotspot_k_sensitivity:
input:
loom=data(data_file),
latent=rules.SCVI.output.latent,
params:
model='danb',
n_neighbors='{k}',
n_cells_min=10,
weighted_graph=False,
output:
results="rep{N}/knn_sensitivity/k_{k}/hotspot/hotspot.txt"
script: "../../pipelineScripts/hotspot/runHotspot.py"

k_values = [5, 10, 30, 50, 100, 300, 500, 1000]

rule runHotspot_k_sensitivity_all:
input:
expand(rules.runHotspot_k_sensitivity.output.results, k=k_values, N=1)
1 change: 1 addition & 0 deletions Transcriptomics/Simulation6/pipelineScripts

0 comments on commit 24aa531

Please sign in to comment.