From 9ea5545fd7f644a5f2a107cf925cd8aaa26a31af Mon Sep 17 00:00:00 2001 From: David DeTomaso Date: Wed, 16 Dec 2020 23:17:17 -0800 Subject: [PATCH] Update paths for Figure panels --- README.md | 69 +++++-- .../CD4_Correlation/ModuleConsistency_CD4.py | 185 ++++++++++++++++++ ...ency.py => ModuleConsistency_Monocytes.py} | 6 +- .../EvaluateFeatureSelection/hvg_vs_hs.py | 24 --- 4 files changed, 241 insertions(+), 43 deletions(-) create mode 100644 Transcriptomics/Figures/CD4_Correlation/ModuleConsistency_CD4.py rename Transcriptomics/Figures/CD4_Correlation/{ModuleConsistency.py => ModuleConsistency_Monocytes.py} (96%) diff --git a/README.md b/README.md index 470dbc6..468df3b 100644 --- a/README.md +++ b/README.md @@ -23,43 +23,84 @@ Outputs can be recreated in each directory by running `snakemake all` Code to re-create figures can be found in various `Figures` directories and depends on prior execution of Snakemake pipelines. -- Figure 1 +- Figure 1 - Algorithm Diagram + +- Figure 2 + - Panel A: `/Transcriptomics/Figures/EvaluateFeatureSelection/plotRelevance.py` + - Panel B: `/Transcriptomics/Figures/EvaluateFeatureSelection/compareLatentSpaces.py` + - Panel C: `/Transcriptomics/Figures/EvaluateFeatureSelection/compare_local_expression.py` + - Panel D: `/Transcriptomics/Figures/CD4_Correlation/plot.py` + - Panel E: `/Transcriptomics/Figures/CD4_Correlation/ModuleConsistency_CD4.py` + +- Figure 3 - Panel A: `/SlideSeq/Figures/MainFigure/moduleHeatmap.py` - Panel B: `/SlideSeq/Figures/MainFigure/moduleCellTypes.py` - Panel C: `/SlideSeq/Figures/MainFigure/moduleScores.py` -- Figure 2 + +- Figure 4 - Panel A: `/Lineage/Figure/plotCorrelations.py` - Panel B: `/Lineage/Figure/plotUMAPs.py` - Panel C: `/Lineage/Figure/plotKernels.py` - Panel D: `/Lineage/Figure/plotKernelsTx.py` - Panel E: `/Lineage/Figure/plotAngioblasts.py` -- Figure 3 - - Panel A: `/Transcriptomics/Figures/EvaluateFeatureSelection/plotRelevance.py` - - Panel B: `/Transcriptomics/Figures/EvaluateFeatureSelection/compareLatentSpaces.py` - - Panel C: `/Transcriptomics/Figures/CD4_Correlation/plot.py` - - Panel D: `/Transcriptomics/Figures/CD4_Correlation/plot.py` + - Figure S1 - - Panel A: `/SlideSeq/Figures/Supp1/plotMeanVar.py` + - Panel A: `/Transcriptomics/Figures/Simulation/plotTSNEs.py` + - Panel B: `/Transcriptomics/Figures/Simulation/plotAUC_PR.py` + - Panel C: `/Transcriptomics/Figures/Simulation/plotModuleAssignment.py` + +- Figure S2 + - Panel A: `/SlideSeq/Figures/Supp1/plotMeanVar.py`, `/SlideSeq/Figures/Supp_Autocorr/compare_local_expression.py` - Panel B: `/SlideSeq/Figures/Supp1/plotPR.py` - Panel C: `/SlideSeq/Figures/Supp1/plotTimings.py` - Panel D: `/SlideSeq/Figures/Supp1/plotIDR.py` -- Figure S2 + +- Figure S3 - Panel A: `/SlideSeq/Figures/Supp2/comparePairwiseZScores.py` - Panel B: `/SlideSeq/Figures/Supp2/compareModules.py` - Panel C: `/SlideSeq/Figures/Supp2/compareModuleAssignments.py` -- Figure S3 + +- Figure S4 - Panel A: `/SlideSeq/Figures/Supp3/compareModulesSpatialDE.py` - Panel B: `/SlideSeq/Figures/Supp3/compareTiming.py` - Panel C: `/SlideSeq/Figures/Supp3/compareModulesSpatialDE.py` -- Figure S4 - - Panel A: `/Transcriptomics/Figures/Simulation/plotTSNEs.py` - - Panel B: `/Transcriptomics/Figures/Simulation/plotAUC_PR.py` - - Panel C: `/Transcriptomics/Figures/Simulation/plotModuleAssignment.py` + - Figure S5 - Panel A: `/SlideSeq/Figures/Supp4/plotPValues.py` - Panel B: `/Transcriptomics/Figures/Supp_Pvals/plotPValues.py` - Panel C: `/Lineage/Figure/plotPValues.py` +- Figure S6 + - All Panels: `/Transcriptomics/Figures/EvaluateFeatureSelection/hvg_vs_hs.py` + +- Figure S7 + - Panel A: `/Transcriptomics/Figures/Simulation/downsampling_correlation.py` + - Panel B: `/Transcriptomics/Figures/CD4_Correlation/plot_downsampled.py` + - Panel C: `/Transcriptomics/Figures/CD4_Correlation/plot_downsampled.py` + - Panel D: `/Transcriptomics/Figures/Simulation/downsampling_correlation.py` + +- Figure S8 + - All Panels: `/SlideSeq/Figures/Supp5_HVG_vs_HS/hvg_vs_hs.py` + +- Figure S9 + - Panel A: `/SlideSeq/Figures/Supp6_NegBinom_vs_Bernoulli/plotPR.py` + - Panel B: `/SlideSeq/Figures/Supp6_NegBinom_vs_Bernoulli/compareModuleAssignments.py` + +- Figure S10 + - Column 1: `/Transcriptomics/Figures/Simulation/plotAUC_PR_k_sensitivity.py` + - Column 2: `/Transcriptomics/Figures/EvaluateFeatureSelection/plotRelevance_k_sensitivity.py` + - Column 3: `/SlideSeq/Figures/Supp1/plotPR_k_sensitivity.py` + +- Figure S11 + - Panel A: `/SlideSeq/Figures/Supp7_Bernoulli/plot.py` + - Panel B: `/Transcriptomics/Figures/CD4_Correlation/ModuleConsistency_Monocytes.py` + +- Figure S12 + - All Panels: `/Transcriptomics/Figures/EvaluateFeatureSelection/compare_local_expression.py` + +- Figure S13 + - All Panels: `/SlideSeq/Figures/Supp_Autocorr/compare_local_expression.py` + ## Software Versions diff --git a/Transcriptomics/Figures/CD4_Correlation/ModuleConsistency_CD4.py b/Transcriptomics/Figures/CD4_Correlation/ModuleConsistency_CD4.py new file mode 100644 index 0000000..7c869da --- /dev/null +++ b/Transcriptomics/Figures/CD4_Correlation/ModuleConsistency_CD4.py @@ -0,0 +1,185 @@ +""" +Plots module consistence for the CD4 data between the train/test splits +See the _Monocyte.py version for a nearly identical script on the Monocyte data +""" +import os +import pandas as pd +from tqdm import tqdm +import matplotlib.pyplot as plt +import seaborn as sns + +plt.rcParams['svg.fonttype'] = 'none' + +base_dir = '../../CD4_w_protein' + +datasets = [ + { + 'Name': 'Hotspot', + 'Train': os.path.join(base_dir, 'train/hotspot_hs/modules.txt'), + 'Test': os.path.join(base_dir, 'test/hotspot_hs/modules.txt'), + }, + { + 'Name': 'WGCNA', + 'Train': os.path.join(base_dir, 'train/wgcna_hs/modules.txt'), + 'Test': os.path.join(base_dir, 'test/wgcna_hs/modules.txt'), + }, + { + 'Name': 'ICA5', + 'Train': os.path.join(base_dir, 'train/ica5/modules.txt'), + 'Test': os.path.join(base_dir, 'test/ica5/modules.txt'), + }, + # # Don't need so many ICA versions as this one isn't that different from ICA10 + # { + # 'Name': 'ICA8', + # 'Train': os.path.join(base_dir, 'train/ica8/modules.txt'), + # 'Test': os.path.join(base_dir, 'test/ica8/modules.txt'), + # }, + { + 'Name': 'ICA10', + 'Train': os.path.join(base_dir, 'train/ica10/modules.txt'), + 'Test': os.path.join(base_dir, 'test/ica10/modules.txt'), + }, + { + 'Name': 'Grnboost', + 'Train': os.path.join(base_dir, 'train/grnboost/modules.txt'), + 'Test': os.path.join(base_dir, 'test/grnboost/modules.txt'), + }, +] + +for data in datasets: + train = pd.read_table(data['Train'], index_col=0) + test = pd.read_table(data['Test'], index_col=0) + + train = train.Cluster.to_dict() + test = test.Cluster.to_dict() + data['TrainDict'] = train + data['TestDict'] = test + + +def eval_module_consistency(data): + consistency = ( + eval_module_consistency_inner(data['TrainDict'], data['TestDict']) + + eval_module_consistency_inner(data['TestDict'], data['TrainDict']) + ) / 2 + + data['Consistency'] = consistency + + +def eval_module_consistency_inner(dict_a, dict_b): + + all_genes = set(dict_a.keys()) & set(dict_b.keys()) + + # For each pairs of genes that are in the same module in 'A', how many are in the same module in 'B'? + + denom = 0 + num = 0 + for ga in all_genes: + for gb in all_genes: + + if ga == gb: continue + + if dict_a[ga] == dict_a[gb] and dict_a[ga] != -1 and dict_a[gb] != -1: # Same module in A + denom += 1 + + if dict_b[ga] == dict_b[gb] and dict_b[ga] != -1 and dict_b[gb] != -1: # Same module in B + num += 1 + + num = num/2 + denom = denom/2 + + consistent_rate = num/denom + + return consistent_rate + + +def eval_num_modules(data): + num_modules = ( + pd.Series(data['TrainDict']).unique().size - 1 + + pd.Series(data['TestDict']).unique().size - 1 + ) / 2 + + data['NumModules'] = num_modules + + +def eval_num_assigned(data): + assigned = ( + (pd.Series(data['TrainDict']) != -1).sum()/2 + + (pd.Series(data['TestDict']) != -1).sum()/2 + ) + + data['NumAssigned'] = assigned + + +for data in tqdm(datasets): + train = pd.read_table(data['Train'], index_col=0) + test = pd.read_table(data['Test'], index_col=0) + + train = train.Cluster.to_dict() + test = test.Cluster.to_dict() + data['TrainDict'] = train + data['TestDict'] = test + + eval_module_consistency(data) + eval_num_modules(data) + eval_num_assigned(data) + + +# %% Consolidate into a nice dataframe +columns = [ + 'Name', + 'Consistency', + 'NumModules', + 'NumAssigned' +] + +results = [] +for data in datasets: + results.append( + [data[x] for x in columns] + ) + +results = pd.DataFrame(results, columns=columns) + + +# %% Plot it +order = ['ICA5', 'ICA10', 'Grnboost', 'WGCNA', 'Hotspot'] +colors = sns.color_palette('deep')[:len(order)] +plot_data = results.set_index('Name').loc[order] + +fig, axs = plt.subplots(1, 3, figsize=(12, 4)) + +plt.sca(axs[0]) + +plt.bar( + plot_data.index, plot_data.Consistency, alpha=0.9, color=colors +) +plt.xticks(rotation=45) +plt.ylabel('Proportion of Gene Pairs Which\nReplicate Across Data Split') +plt.title('Reproducibility') +plt.gca().set_axisbelow(True) +plt.grid(color='#CCCCCC', lw=0.5, axis='y', ls=(0, (5, 5))) + +plt.sca(axs[1]) + +plt.bar( + plot_data.index, plot_data.NumModules, alpha=0.9, color=colors +) +plt.xticks(rotation=45) +plt.ylabel('Modules') +plt.title('# Modules') +plt.gca().set_axisbelow(True) +plt.grid(color='#CCCCCC', lw=0.5, axis='y', ls=(0, (5, 5))) + +plt.sca(axs[2]) + +plt.bar( + plot_data.index, plot_data.NumAssigned, alpha=0.9, color=colors +) +plt.xticks(rotation=45) +plt.ylabel('Genes') +plt.title('# Genes Assigned') +plt.gca().set_axisbelow(True) +plt.grid(color='#CCCCCC', lw=0.5, axis='y', ls=(0, (5, 5))) + +plt.subplots_adjust(bottom=.25, wspace=0.4, left=0.1, right=0.9) +plt.savefig('CD4_Module_TrainTest.svg') diff --git a/Transcriptomics/Figures/CD4_Correlation/ModuleConsistency.py b/Transcriptomics/Figures/CD4_Correlation/ModuleConsistency_Monocytes.py similarity index 96% rename from Transcriptomics/Figures/CD4_Correlation/ModuleConsistency.py rename to Transcriptomics/Figures/CD4_Correlation/ModuleConsistency_Monocytes.py index 2b32969..f6a90fb 100644 --- a/Transcriptomics/Figures/CD4_Correlation/ModuleConsistency.py +++ b/Transcriptomics/Figures/CD4_Correlation/ModuleConsistency_Monocytes.py @@ -3,11 +3,9 @@ from tqdm import tqdm import matplotlib.pyplot as plt import seaborn as sns - + plt.rcParams['svg.fonttype'] = 'none' -# Pick which set of results to plot - CD4 or Monocytes -# base_dir = '../../CD4_w_protein' base_dir = '../../Mono_w_protein' datasets = [ @@ -180,6 +178,4 @@ def eval_num_assigned(data): plt.grid(color='#CCCCCC', lw=0.5, axis='y', ls=(0, (5, 5))) plt.subplots_adjust(bottom=.25, wspace=0.4, left=0.1, right=0.9) -# plt.savefig('CD4_Module_TrainTest.svg') plt.savefig('Monocyte_Module_TrainTest.svg') -# plt.show() diff --git a/Transcriptomics/Figures/EvaluateFeatureSelection/hvg_vs_hs.py b/Transcriptomics/Figures/EvaluateFeatureSelection/hvg_vs_hs.py index 8484f66..7f87328 100644 --- a/Transcriptomics/Figures/EvaluateFeatureSelection/hvg_vs_hs.py +++ b/Transcriptomics/Figures/EvaluateFeatureSelection/hvg_vs_hs.py @@ -14,30 +14,6 @@ hvg = hvg.loc[hs.index] -# %% Plot one vs other - -# plot_data = hs[['Symbol', 'Z']].join( -# hvg[['gene.dispersion.scaled', 'gene.mean']] -# ) -# -# plt.figure(figsize=(5, 5)) -# -# plt.plot( -# plot_data['gene.dispersion.scaled'], -# plot_data['Z'], -# 'o', ms=2, rasterized=True -# ) -# -# plt.gca().set_axisbelow(True) -# plt.xlabel('Scaled Dispersion') -# plt.ylabel('Autocorrelation Z') -# plt.grid(color='#BBBBBB', ls=(0, (5, 5)), lw=.5) -# plt.subplots_adjust(left=0.15, right=1-0.15, bottom=0.15, top=1-0.15) -# -# plt.show() -# plt.savefig('autocorrelation_vs_hvg.svg', dpi=300) - - # %% loom_file = "../../../data/10x_PBMC_w_proteins/cd4/data.loom"