Simulation Updates for Revision

deto · Dec 17, 2020 · 24aa531 · 24aa531
1 parent fe9775c
commit 24aa531
Show file tree

Hide file tree

Showing 8 changed files with 885 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -36,3 +36,4 @@ SlideSeq/DropViz/DropSeq.util*
 *.mtx.gz
 *.xls.gz
 **/*_log*
+misc/*
diff --git a/Transcriptomics/Simulation6/Snakefile b/Transcriptomics/Simulation6/Snakefile
@@ -0,0 +1,317 @@
+subworkflow data:
+    workdir:
+        "../../data/Simulated6"
+
+data_file = "rep{N}/data.loom"
+gene_effect_file = "rep{N}/gene_effects.txt"
+
+rule select_threshold:
+    input:
+        loom=data(data_file),
+    params:
+        N_CELLS=10,
+    output:
+        genes="rep{N}/genes/threshold.txt"
+    script: "pipelineScripts/select_threshold.py"
+
+rule select_for_pairs:
+    input:
+        ge_file=data(gene_effect_file),
+    output:
+        genes="rep{N}/genes/forPairs.txt"
+    script: "pipelineScripts/select_for_pairs.py"
+
+rule select_hvg:
+    input:
+        loom=data(data_file),
+    params:
+        lowXcutoff=0.1,
+        #highXMeanCutoff=20,
+    output:
+        genes="rep{N}/genes/hvg.txt",
+        geneInfo="rep{N}/genes/hvg_info.txt"
+    script: "pipelineScripts/select_hvg.R"
+
+rule select_danb:
+    input:
+        loom=data(data_file),
+    params:
+        #highXMeanCutoff=20,
+    output:
+        genes="rep{N}/genes/danb.txt",
+        geneInfo="rep{N}/genes/danb_info.txt"
+    script: "pipelineScripts/select_danb.R"
+
+rule PCA_threshold:
+    message: "Computing PCA on thresholded genes"
+    input:
+        loom=data(data_file),
+        genes=rules.select_threshold.output.genes,
+    params:
+        doStandardize=True,
+        doJackstraw=False,
+        num_pcs=5,
+    output:
+        latent="rep{N}/pca/pca_threshold.txt",
+        components="rep{N}/pca/pca_components.txt",
+    script: "pipelineScripts/pca/pca_sim.py"
+
+rule select_pca:
+    input:
+        components=rules.PCA_threshold.output.components,
+    output:
+        genes="rep{N}/genes/pca.txt",
+        geneInfo="rep{N}/genes/pca_info.txt"
+    script: "pipelineScripts/select_pca.py"
+
+rule SCVI:
+    message: "Computing SCVI on thresholded genes"
+    input:
+        loom=data(data_file),
+        genes=rules.select_threshold.output.genes,
+    params:
+        components=10,
+    output:
+        latent="rep{N}/scvi/threshold/latent.txt.gz",
+        model="rep{N}/scvi/threshold/model.pkl",
+    script: "pipelineScripts/scvi/scviTorch.py"
+
+rule TSNE:
+    message: "Computing TSNE - hvg genes"
+    input:
+        latent=rules.SCVI.output.latent,
+    output:
+        out="rep{N}/tsne/tsne.txt",
+    script: "pipelineScripts/tsne/tsne.py"
+
+rule UMAP:
+    message: "Computing UMAP"
+    input:
+        latent=rules.SCVI.output.latent,
+    params:
+        n_neighbors=30,
+    output:
+        out="rep{N}/umap/umap.txt",
+    script: "pipelineScripts/umap/umap.py"
+
+rule TSNE_pca:
+    message: "Computing TSNE - hvg genes"
+    input:
+        latent=rules.PCA_threshold.output.latent,
+    output:
+        out="rep{N}/tsne/tsne_pca.txt",
+    script: "pipelineScripts/tsne/tsne.py"
+
+rule UMAP_pca:
+    message: "Computing UMAP"
+    input:
+        latent=rules.PCA_threshold.output.latent,
+    params:
+        n_neighbors=30,
+    output:
+        out="rep{N}/umap/umap_pca.txt",
+    script: "pipelineScripts/umap/umap.py"
+
+rule runHotspot_threshold:
+    input:
+        loom=data(data_file),
+        latent=rules.SCVI.output.latent,
+    params:
+        model='danb',
+        n_neighbors=30,
+        n_cells_min=10,
+    output:
+        results="rep{N}/hotspot/hotspot_threshold.txt"
+    script: "pipelineScripts/hotspot/runHotspot.py"
+
+rule runHotspot_threshold_300:
+    input:
+        loom=data(data_file),
+        latent=rules.SCVI.output.latent,
+    params:
+        model='danb',
+        n_neighbors=300,
+        n_cells_min=10,
+        weighted_graph=True,
+    output:
+        results="rep{N}/hotspot/hotspot_threshold_300.txt"
+    script: "pipelineScripts/hotspot/runHotspot.py"
+
+rule runHotspot_threshold_pca:
+    input:
+        loom=data(data_file),
+        latent=rules.PCA_threshold.output.latent,
+    params:
+        model='danb',
+        n_neighbors=300,
+        n_cells_min=10,
+        weighted_graph=True,
+    output:
+        results="rep{N}/hotspot/hotspot_pca_threshold.txt"
+    script: "pipelineScripts/hotspot/runHotspot.py"
+
+rule runHotspotPairs:
+    input:
+        loom=data(data_file),
+        latent=rules.SCVI.output.latent,
+        hs_results=rules.runHotspot_threshold.output.results,
+        genes=rules.select_for_pairs.output.genes,
+    params:
+        model='danb',
+        fdrThresh=0.05,
+        # topN=500,
+        n_neighbors=30,
+        # highXMeanCutoff=20,
+    output:
+        results_z="rep{N}/hotspot/hotspot_pairs_z.txt.gz"
+    script: "pipelineScripts/hotspot/runHotspotPairs.py"
+
+rule runRegularPairs:
+    input:
+        loom=data(data_file),
+        latent=rules.SCVI.output.latent,
+        hs_results=rules.runHotspot_threshold.output.results,
+        genes=rules.select_for_pairs.output.genes,
+    params:
+        model='danb',
+        fdrThresh=0.05,
+        # topN=500,
+        n_neighbors=30,
+        # highXMeanCutoff=20,
+    output:
+        results_lc="rep{N}/hotspot/regular_pairs_lc.txt.gz",
+    script: "pipelineScripts/hotspot/runRegularPairs.py"
+
+rule extractHotspotModules:
+    input:
+        results_z=rules.runHotspotPairs.output.results_z,
+    params:
+        min_cluster_genes=15,
+        core_only=False,
+        # z_threshold=1.96,
+        fdr_threshold=.05,
+    output:
+        cluster_heatmap="rep{N}/hotspot/module_heatmap.svg",
+        cluster_output="rep{N}/hotspot/modules.txt",
+        linkage_output="rep{N}/hotspot/linkage.txt",
+    script: "pipelineScripts/hotspot/extractHotspotModules.py"
+
+
+rule extractRegularModules:
+    input:
+        results_z=rules.runRegularPairs.output.results_lc,
+        latent=rules.SCVI.output.latent,
+    params:
+        min_cluster_genes=15,
+        core_only=False,
+        fdr_threshold=.05,
+    output:
+        cluster_heatmap="rep{N}/hotspot/module_heatmap_regular.svg",
+        cluster_output="rep{N}/hotspot/modules_regular.txt",
+        linkage_output="rep{N}/hotspot/linkage_regular.txt",
+    script: "pipelineScripts/hotspot/extractRegularModules.py"
+
+
+rule vision:
+    input:
+        loom=data(data_file),
+        latent=rules.SCVI.output.latent,
+        proj_positions=rules.SCVI.output.latent,
+        tsne=rules.TSNE.output.out,
+        umap=rules.UMAP.output.out,
+    output:
+        out="rep{N}/vision/vision.rds"
+    script: "pipelineScripts/Vision/vision.R"
+
+rule vision_pca:
+    input:
+        loom=data(data_file),
+        latent=rules.PCA_threshold.output.latent,
+        proj_positions=rules.PCA_threshold.output.latent,
+        tsne=rules.TSNE_pca.output.out,
+        umap=rules.UMAP_pca.output.out,
+    output:
+        out="rep{N}/vision/vision_pca.rds"
+    script: "pipelineScripts/Vision/vision.R"
+
+rule run_WGCNA:
+    input:
+        loom=data(data_file),
+        genes=rules.select_for_pairs.output.genes,
+    params:
+        power=1,
+        min_module_size=15,
+    output:
+        cluster_output="rep{N}/wgcna/modules.txt",
+        scores="rep{N}/wgcna/module_scores.txt",
+    script: "../../pipelineScripts/wgcna/wgcna.R"
+
+rule run_ICA:
+    input:
+        loom=data(data_file),
+        genes=rules.select_for_pairs.output.genes,
+    params:
+        n_components=5,
+    output:
+        components="rep{N}/ica/components.txt",
+    script: "../../pipelineScripts/ICA/run_ica.py"
+
+rule run_ICA_fdr:
+    input:
+        components="rep{N}/ica/components.txt",
+    params:
+        qvalcutoff=.1
+    output:
+        components_fdr="rep{N}/ica/components_fdr.txt",
+        modules="rep{N}/ica/modules.txt",
+    script: "../../pipelineScripts/ICA/run_ica_fdr.R"
+
+rule runGrnboost:
+    input:
+        loom=data(data_file),
+        genes=rules.select_for_pairs.output.genes,
+    output:
+        pair_scores="rep{N}/grnboost/importance_scores.txt.gz",
+    shell: 
+        """
+        python pipelineScripts/grnboost2/grnboost2.py \
+            `readlink -f {input.loom}` \
+            `readlink -f {input.genes}` \
+            `readlink -f {output.pair_scores}`
+        """
+
+rule extractModules_Grnboost:
+    input:
+        results_z=rules.runGrnboost.output.pair_scores,
+    params:
+        min_cluster_genes=15,
+        fdr_threshold=.05,
+    output:
+        cluster_heatmap="rep{N}/grnboost/module_heatmap.svg",
+        cluster_output="rep{N}/grnboost/modules.txt",
+        linkage_output="rep{N}/grnboost/linkage.txt",
+    script: "pipelineScripts/grnboost2/extractModules.py"
+
+reps = [str(x) for x in range(1, 11)]
+
+rule all_reps:
+    input:
+        # expand(rules.UMAP.output.out, N=reps),
+        # expand(rules.TSNE.output.out, N=reps),
+        # expand(rules.vision.output.out, N=reps),
+        expand(rules.runHotspot_threshold_300.output, N=reps),
+        expand(rules.runHotspot_threshold_pca.output, N=reps),
+        expand(rules.runHotspotPairs.output, N=reps),
+        expand(rules.runRegularPairs.output, N=reps),
+        expand(rules.extractHotspotModules.output, N=reps),
+        expand(rules.extractRegularModules.output, N=reps),
+        expand(rules.select_hvg.output, N=reps),
+        expand(rules.select_danb.output, N=reps),
+        expand(rules.select_pca.output, N=reps),
+        # expand(rules.TSNE_pca.output, N=reps),
+        expand(rules.run_WGCNA.output, N=reps),
+        expand(rules.run_ICA_fdr.output, N=reps),
+        expand(rules.extractModules_Grnboost.output, N=reps),
+
+
+include: "Snakefile_k_sensitivity"
diff --git a/Transcriptomics/Simulation6/Snakefile_k_sensitivity b/Transcriptomics/Simulation6/Snakefile_k_sensitivity
@@ -0,0 +1,18 @@
+rule runHotspot_k_sensitivity:
+    input:
+        loom=data(data_file),
+        latent=rules.SCVI.output.latent,
+    params:
+        model='danb',
+        n_neighbors='{k}',
+        n_cells_min=10,
+        weighted_graph=False,
+    output:
+        results="rep{N}/knn_sensitivity/k_{k}/hotspot/hotspot.txt"
+    script: "../../pipelineScripts/hotspot/runHotspot.py"
+
+k_values = [5, 10, 30, 50, 100, 300, 500, 1000]
+
+rule runHotspot_k_sensitivity_all:
+    input:
+        expand(rules.runHotspot_k_sensitivity.output.results, k=k_values, N=1)
diff --git a/Transcriptomics/Simulation6/pipelineScripts b/Transcriptomics/Simulation6/pipelineScripts
@@ -0,0 +1 @@
+../../pipelineScripts/