v2.0.26

add report display name, remove paths from stored files, fix sgRNA plot, CRISPRessoPooled report HTML, add citation to report
pinellolab · Mar 6, 2019 · 768c75c · 768c75c
1 parent 58257b5
commit 768c75c
Show file tree

Hide file tree

Showing 12 changed files with 197 additions and 100 deletions.
diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py
@@ -335,13 +335,13 @@ def report_nucleotide_summary(amplicon_seq,amplicon_name,amplicon_index):
                     info("Skipping the amplicon '%s' in folder '%s'. Cannot find nucleotide information."%(batch_amplicon_name,folder_name))
                     continue
 
-                nucleotide_frequency_file = run_data['refs'][batch_amplicon_name]['nuc_freq_filename']
+                nucleotide_frequency_file = os.path.join(folder_name,run_data['refs'][batch_amplicon_name]['nuc_freq_filename'])
                 ampSeq_nf,nuc_freqs = CRISPRessoShared.parse_count_file(nucleotide_frequency_file)
 
-                nucleotide_pct_file=run_data['refs'][batch_amplicon_name]['nuc_pct_filename']
+                nucleotide_pct_file = os.path.join(folder_name,run_data['refs'][batch_amplicon_name]['nuc_pct_filename'])
                 ampSeq_np,nuc_pcts = CRISPRessoShared.parse_count_file(nucleotide_pct_file)
 
-                count_file=run_data['refs'][batch_amplicon_name]['mod_count_filename']
+                count_file = os.path.join(folder_name,run_data['refs'][batch_amplicon_name]['mod_count_filename'])
                 ampSeq_cf,mod_freqs = CRISPRessoShared.parse_count_file(count_file)
 
                 if ampSeq_nf is None or ampSeq_np is None or ampSeq_cf is None:
@@ -491,7 +491,7 @@ def report_nucleotide_summary(amplicon_seq,amplicon_name,amplicon_index):
                 if run_data is None:
                     continue
 
-                amplicon_modification_file=run_data['quant_of_editing_freq_filename']
+                amplicon_modification_file=os.path.join(folder_name,run_data['quant_of_editing_freq_filename'])
                 with open(amplicon_modification_file,'r') as infile:
                     file_head = infile.readline()
                     if not wrote_header:
@@ -510,7 +510,7 @@ def report_nucleotide_summary(amplicon_seq,amplicon_name,amplicon_index):
                 run_data = run_datas[idx]
                 if run_data is None:
                     continue
-                amplicon_modification_file=run_data['mapping_stats_filename']
+                amplicon_modification_file=os.path.join(folder_name,run_data['mapping_stats_filename'])
                 with open(amplicon_modification_file,'r') as infile:
                     file_head = infile.readline()
                     if not wrote_header:

diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py
diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py
@@ -42,7 +42,7 @@ def get_amplicon_output(amplicon_name,output_folder):
     if os.path.exists(quantification_file) and profile_file:
         return quantification_file,profile_file
     else:
-        raise CRISPRessoShared.OutputFolderIncompleteException('The folder %s  is not a valid CRISPResso2 output folder. Cannot find profile file %s for amplicon %s.' % (output_folder,profile_file,amplicon_name))
+        raise CRISPRessoShared.OutputFolderIncompleteException('The folder %s is not a valid CRISPResso2 output folder. Cannot find profile file %s for amplicon %s.' % (output_folder,profile_file,amplicon_name))
 
 def parse_profile(profile_file):
     return np.loadtxt(profile_file,skiprows=1)
@@ -77,10 +77,7 @@ class DifferentAmpliconLengthException(Exception):
 
 
 matplotlib=check_library('matplotlib')
-from matplotlib import font_manager as fm
-font = {'size'   : 20}
-matplotlib.rc('font', **font)
-matplotlib.use('Agg')
+CRISPRessoPlot.setMatplotlibDefaults()
 
 plt=check_library('pylab')
 np=check_library('numpy')

diff --git a/CRISPResso2/CRISPRessoPlot.py b/CRISPResso2/CRISPRessoPlot.py
@@ -7,7 +7,7 @@
 import numpy as np
 import pandas as pd
 import matplotlib
-matplotlib.use('Agg')
+matplotlib.use('AGG')
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
 import matplotlib.cm as cm
@@ -26,7 +26,7 @@
 def setMatplotlibDefaults():
     font = {'size'   : 22}
     matplotlib.rc('font', **font)
-    matplotlib.use('Agg')
+    matplotlib.use('AGG')
     matplotlib.rcParams['pdf.fonttype'] = 42
     matplotlib.rcParams['ps.fonttype'] = 42
     matplotlib.rcParams["font.sans-serif"] = ["Arial", "Liberation Sans", "Bitstream Vera Sans"]
@@ -88,7 +88,6 @@ def plot_nucleotide_quilt(nuc_pct_df,mod_pct_df,fig_filename_root,save_also_png=
     min_text_pct: add text annotation if the percent is greater than this number
     max_text_pct: add text annotation if the percent is less than this number
     """
-
     plotPct = 0.9 #percent of vertical space to plot in (the rest will be white)
     min_plot_pct = 0.01 #if value is less than this, it won't plot the rectangle (with white boundary)
 
@@ -185,7 +184,6 @@ def plot_nucleotide_quilt(nuc_pct_df,mod_pct_df,fig_filename_root,save_also_png=
 #    sampleReadCounts = list(nuc_pct_df.iloc[[((nSamples-1)-x)*nNucs for x in range(0,nSamples)],0]))
     ax.set_yticklabels(['Reference'] + list(nuc_pct_df.iloc[[((nSamples-1)-x)*nNucs for x in range(0,nSamples)],0]))
 
-
     plot_y_start = ref_y_start
 
     if sgRNA_intervals:
@@ -194,19 +192,27 @@ def plot_nucleotide_quilt(nuc_pct_df,mod_pct_df,fig_filename_root,save_also_png=
         sgRNA_y_height = 0.2
         min_sgRNA_x = None
         for idx,sgRNA_int in enumerate(sgRNA_intervals):
-            this_sgRNA_start = sgRNA_int[0]
-            this_sgRNA_end = sgRNA_int[1]
-#            print('this sgRNA_start is ' + str(this_sgRNA_start))
-#            print('this sgRNA_end is ' + str(this_sgRNA_end))
-#            print(nuc_pct_df)
+            this_sgRNA_start = max(0,sgRNA_int[0])
+            this_sgRNA_end = min(sgRNA_int[1],amp_len - 1)
             ax.add_patch(
-                patches.Rectangle((2+sgRNA_int[0], sgRNA_y_start), 1+sgRNA_int[1]-sgRNA_int[0], sgRNA_y_height,facecolor=(0,0,0,0.15))
+                patches.Rectangle((2+this_sgRNA_start, sgRNA_y_start), 1+this_sgRNA_end-this_sgRNA_start, sgRNA_y_height,facecolor=(0,0,0,0.15))
                 )
+
+            #if plot has trimmed the sgRNA, add a mark
+            if this_sgRNA_start != sgRNA_int[0]:
+                ax.add_patch(
+                    patches.Rectangle((2.1+this_sgRNA_start, sgRNA_y_start), 0.1, sgRNA_y_height,facecolor='w')
+                    )
+            if this_sgRNA_end != sgRNA_int[1]:
+                ax.add_patch(
+                    patches.Rectangle((2.8+this_sgRNA_end, sgRNA_y_start), 0.1, sgRNA_y_height,facecolor='w')
+                    )
+
             #set left-most sgrna start
             if not min_sgRNA_x:
-                min_sgRNA_x = sgRNA_int[0]
-            if sgRNA_int[0] < min_sgRNA_x:
-                min_sgRNA_x = sgRNA_int[0]
+                min_sgRNA_x = this_sgRNA_start
+            if this_sgRNA_start < min_sgRNA_x:
+                min_sgRNA_x = this_sgRNA_start
         ax.text(2+min_sgRNA_x,sgRNA_y_start + sgRNA_y_height/2,'sgRNA ',horizontalalignment='right',verticalalignment='center')
 
     if quantification_window_idxs is not None:
@@ -394,14 +400,27 @@ def plot_conversion_map(nuc_pct_df,fig_filename_root,conversion_nuc_from,convers
         sgRNA_y_height = 0.1
         min_sgRNA_x = None
         for idx,sgRNA_int in enumerate(sgRNA_intervals):
+            this_sgRNA_start = max(0,sgRNA_int[0])
+            this_sgRNA_end = min(sgRNA_int[1],amp_len - 1)
             ax.add_patch(
-                patches.Rectangle((2+sgRNA_int[0], sgRNA_y_start), 1+sgRNA_int[1]-sgRNA_int[0], sgRNA_y_height,facecolor=(0,0,0,0.15))
+                patches.Rectangle((2+this_sgRNA_start, sgRNA_y_start), 1+this_sgRNA_end-this_sgRNA_start, sgRNA_y_height,facecolor=(0,0,0,0.15))
                 )
+
+            #if plot has trimmed the sgRNA, add a mark
+            if this_sgRNA_start != sgRNA_int[0]:
+                ax.add_patch(
+                    patches.Rectangle((2.1+this_sgRNA_start, sgRNA_y_start), 0.1, sgRNA_y_height,facecolor='w')
+                    )
+            if this_sgRNA_end != sgRNA_int[1]:
+                ax.add_patch(
+                    patches.Rectangle((2.8+this_sgRNA_end, sgRNA_y_start), 0.1, sgRNA_y_height,facecolor='w')
+                    )
+
             #set left-most sgrna start
             if not min_sgRNA_x:
-                min_sgRNA_x = sgRNA_int[0]
-            if sgRNA_int[0] < min_sgRNA_x:
-                min_sgRNA_x = sgRNA_int[0]
+                min_sgRNA_x = this_sgRNA_start
+            if this_sgRNA_start < min_sgRNA_x:
+                min_sgRNA_x = this_sgRNA_start
         ax.text(2+min_sgRNA_x,sgRNA_y_start + sgRNA_y_height/2,'sgRNA ',horizontalalignment='right',verticalalignment='center')
 
     #legend
@@ -1026,19 +1045,21 @@ def plot_alleles_heatmap(reference_seq,fig_filename_root,X,annot,y_labels,insert
 
 # todo -- add sgRNAs below reference plot
 #    if sgRNA_intervals:
+#        ax_hm_anno=plt.subplot(gs3[2, :])
 #        sgRNA_y_start = 0.3
-#        sgRNA_y_height = 0.1
+##        sgRNA_y_height = 0.1
+#        sgRNA_y_height = 10
 #        min_sgRNA_x = None
 #        for idx,sgRNA_int in enumerate(sgRNA_intervals):
-#            ax_hm_ref.add_patch(
+#            ax_hm_anno.add_patch(
 #                patches.Rectangle((2+sgRNA_int[0], sgRNA_y_start), 1+sgRNA_int[1]-sgRNA_int[0], sgRNA_y_height,facecolor=(0,0,0,0.15))
 #                )
 #            #set left-most sgrna start
 #            if not min_sgRNA_x:
 #                min_sgRNA_x = sgRNA_int[0]
 #            if sgRNA_int[0] < min_sgRNA_x:
 #                min_sgRNA_x = sgRNA_int[0]
-#        ax_hm_ref.text(2+min_sgRNA_x,sgRNA_y_start + sgRNA_y_height/2,'sgRNA ',horizontalalignment='right',verticalalignment='center')
+#        ax_hm_anno.text(2+min_sgRNA_x,sgRNA_y_start + sgRNA_y_height/2,'sgRNA ',horizontalalignment='right',verticalalignment='center')
 
     #print lines
 

diff --git a/CRISPResso2/CRISPRessoPooledCORE.py b/CRISPResso2/CRISPRessoPooledCORE.py
@@ -17,6 +17,7 @@
 import re
 from CRISPResso2 import CRISPRessoShared
 from CRISPResso2 import CRISPRessoMultiProcessing
+from CRISPResso2 import CRISPRessoReport
 import traceback
 
 import logging
@@ -323,7 +324,7 @@ def main():
         logging.getLogger().addHandler(logging.FileHandler(log_filename))
 
         with open(log_filename,'w+') as outfile:
-                  outfile.write('[Command used]:\nCRISPRessoPooled %s\n\n[Execution log]:\n' % ' '.join(sys.argv))
+                  outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv))
 
         if args.fastq_r2=='': #single end reads
 
@@ -908,6 +909,11 @@ def default_sigpipe():
                          warn('Skipping:%s' %file_to_remove)
 
 
+        if not args.suppress_report:
+            report_name = _jp('CRISPResso2Pooled_report.html')
+            CRISPRessoReport.make_pooled_report_from_folder(report_name,OUTPUT_DIRECTORY,_ROOT)
+
+
         info('All Done!')
         print CRISPRessoShared.get_crispresso_footer()
         sys.exit(0)

diff --git a/CRISPResso2/CRISPRessoReport.py b/CRISPResso2/CRISPRessoReport.py
@@ -105,10 +105,13 @@ def add_fig_if_exists(fig_name,fig_root,fig_title,fig_caption,
         fig_titles[amplicon_name] = amplicon_fig_titles
         fig_captions[amplicon_name] = amplicon_fig_captions
 
+    report_display_name = ""
+    if run_data['args'].name != "":
+        report_display_name = run_data['args'].name
 
     report_data={'amplicons':amplicons,'fig_names':fig_names,'fig_2b_names':fig_2b_names,'fig_9_names':fig_9_names,
             'fig_locs':fig_locs,'fig_titles':fig_titles,'fig_captions':fig_captions,'run_data':run_data,
-            'command_used':run_data['command_used'],'params':run_data['args_string']}
+            'command_used':run_data['command_used'],'params':run_data['args_string'],'report_display_name':report_display_name}
 
 
     j2_env = Environment(loader=FileSystemLoader(os.path.join(_ROOT,'templates')))
@@ -132,31 +135,76 @@ def make_batch_report_from_folder(crispressoBatch_report_file,batch_folder,_ROOT
 
 
     sub_folders = [x for x in all_files if x.startswith('CRISPResso_on_')]
-    sub_html_files = []
+    run_names = []
+    sub_html_files = {}
     for sub_folder in sub_folders:
         info_file = os.path.join(batch_folder,sub_folder,'CRISPResso2_info.pickle')
         if not os.path.exists(info_file):
             raise Exception('CRISPResso run %s is not complete. Cannot add to batch report.'% sub_folder)
         run_data = cp.load(open(info_file,'rb'))
         if not 'report_filename' in run_data:
             raise Exception('CRISPResso run %s has no report. Cannot add to batch report.'% sub_folder)
-        sub_html_files.append(os.path.join(sub_folder,os.path.basename(run_data['report_filename'])))
+        run_name = run_data['args'].name
+        run_names.append(run_name)
+        sub_html_files[run_name] = os.path.join(sub_folder,os.path.basename(run_data['report_filename']))
 
-    make_batch_report(window_nuc_pct_quilts,nuc_pct_quilts,window_nuc_conv_plots,nuc_conv_plots,sub_html_files,crispressoBatch_report_file,batch_folder,_ROOT)
+    make_multi_report(run_names,sub_html_files,crispressoBatch_report_file,_ROOT,'CRISPResso Batch',
+        window_nuc_pct_quilts=window_nuc_pct_quilts,
+        nuc_pct_quilts=nuc_pct_quilts,
+        window_nuc_conv_plots=window_nuc_conv_plots,
+        nuc_conv_plots=nuc_conv_plots)
 
-def make_batch_report(window_nuc_pct_quilts,nuc_pct_quilts,window_nuc_conv_plots,nuc_conv_plots,sub_html_files,crispressoBatch_report_file,batch_folder,_ROOT):
+def make_pooled_report_from_folder(crispressoPooled_report_file,pooled_folder,_ROOT):
+    all_files = os.listdir(pooled_folder)
+
+    sub_folders = [x for x in all_files if x.startswith('CRISPResso_on_')]
+    run_names = []
+    sub_html_files = {}
+
+    sub_2a_labels = {}
+    sub_2a_pdfs = {}
+
+    for sub_folder in sub_folders:
+        info_file = os.path.join(pooled_folder,sub_folder,'CRISPResso2_info.pickle')
+        if not os.path.exists(info_file):
+            raise Exception('CRISPResso run %s is not complete. Cannot add to pooled report.'% sub_folder)
+        run_data = cp.load(open(info_file,'rb'))
+        if not 'report_filename' in run_data:
+            raise Exception('CRISPResso run %s has no report. Cannot add to pooled report.'% sub_folder)
+
+        run_name = run_data['args'].name
+        run_names.append(run_name)
+        sub_html_files[run_name] = os.path.join(sub_folder,os.path.basename(run_data['report_filename']))
+
+        this_sub_2a_labels = []
+        this_sub_2a_pdfs = []
+        for ref_name in run_data['ref_names']:
+            this_sub_2a_labels.append("Nucleotide distribution across " + ref_name)
+            this_sub_2a_pdfs.append(run_data['refs'][ref_name]['plot_2a_root']+".pdf")
+
+        sub_2a_labels[run_name] = this_sub_2a_labels
+        sub_2a_pdfs[run_name] = this_sub_2a_pdfs
+
+    make_multi_report(run_names,sub_html_files,crispressoPooled_report_file,_ROOT,'CRISPResso Pooled')
+
+def make_multi_report(run_names,sub_html_files,crispresso_multi_report_file,_ROOT,crispresso_mode,
+    window_nuc_pct_quilts=[],
+    nuc_pct_quilts=[],
+    window_nuc_conv_plots=[],
+    nuc_conv_plots=[]
+):
 
         def dirname(path):
             return os.path.basename(os.path.dirname(path))
         j2_env = Environment(loader=FileSystemLoader(os.path.join(_ROOT,'templates')))
         j2_env.filters['dirname'] = dirname
-        template = j2_env.get_template('batchReport.html')
+        template = j2_env.get_template('multiReport.html')
 
-        dest_dir = os.path.dirname(crispressoBatch_report_file)
+        dest_dir = os.path.dirname(crispresso_multi_report_file)
         shutil.copy2(os.path.join(_ROOT,'templates','CRISPResso_justcup.png'),dest_dir)
         shutil.copy2(os.path.join(_ROOT,'templates','favicon.ico'),dest_dir)
 
-        outfile = open(crispressoBatch_report_file,'w')
+        outfile = open(crispresso_multi_report_file,'w')
         outfile.write(template.render(window_nuc_pct_quilts=window_nuc_pct_quilts,nuc_pct_quilts=nuc_pct_quilts,
-            window_nuc_conv_plots=window_nuc_conv_plots,nuc_conv_plots=nuc_conv_plots,sub_html_files=sub_html_files))
+            window_nuc_conv_plots=window_nuc_conv_plots,nuc_conv_plots=nuc_conv_plots,run_names=run_names,sub_html_files=sub_html_files,crispresso_mode=crispresso_mode))
         outfile.close()
diff --git a/CRISPResso2/CRISPRessoShared.py b/CRISPResso2/CRISPRessoShared.py
@@ -29,7 +29,7 @@
 else:
     import cPickle as cp #python 2.7
 
-__version__ = "2.0.25"
+__version__ = "2.0.26"
 
 ###EXCEPTIONS############################
 class FlashException(Exception):
@@ -323,7 +323,7 @@ def check_output_folder(output_folder):
     amplicon_info = {}
     amplicons = run_data['ref_names']
 
-    quantification_file=run_data['quant_of_editing_freq_filename']
+    quantification_file=os.path.join(output_folder,run_data['quant_of_editing_freq_filename'])
     if os.path.exists(quantification_file):
         with open(quantification_file) as quant_file:
             head_line = quant_file.readline()
@@ -332,17 +332,17 @@ def check_output_folder(output_folder):
                 line_els = line.split("\t")
                 amplicon_name = line_els[0]
                 amplicon_info[amplicon_name] = {}
-                amplicon_quant_file = run_data['refs'][amplicon_name]['combined_pct_vector_filename']
+                amplicon_quant_file = os.path.join(output_folder,run_data['refs'][amplicon_name]['combined_pct_vector_filename'])
                 if not os.path.exists(amplicon_quant_file):
-                    raise OutputFolderIncompleteException('The folder %s  is not a valid CRISPResso2 output folder. Cannot find quantification file %s for amplicon %s.' % (output_folder,amplicon_quant_file,amplicon_name))
+                    raise OutputFolderIncompleteException('The folder %s is not a valid CRISPResso2 output folder. Cannot find quantification file %s for amplicon %s.' % (output_folder,amplicon_quant_file,amplicon_name))
                 amplicon_info[amplicon_name]['quantification_file'] = amplicon_quant_file
 
-                amplicon_mod_count_file = run_data['refs'][amplicon_name]['quant_window_mod_count_filename']
+                amplicon_mod_count_file = os.path.join(output_folder,run_data['refs'][amplicon_name]['quant_window_mod_count_filename'])
                 if not os.path.exists(amplicon_mod_count_file):
                     raise OutputFolderIncompleteException('The folder %s  is not a valid CRISPResso2 output folder. Cannot find modification count vector file %s for amplicon %s.' % (output_folder,amplicon_mod_count_file,amplicon_name))
                 amplicon_info[amplicon_name]['modification_count_file'] = amplicon_mod_count_file
 
-                amplicon_info[amplicon_name]['allele_files'] = run_data['refs'][amplicon_name]['allele_frequency_files']
+                amplicon_info[amplicon_name]['allele_files'] = [os.path.join(output_folder,x) for x in run_data['refs'][amplicon_name]['allele_frequency_files']]
 
                 for idx,el in enumerate(head_line_els):
                     amplicon_info[amplicon_name][el] = line_els[idx]
@@ -747,7 +747,7 @@ def get_crispresso_header(description,header_str):
         for i in range(len(logo_lines))[::-1]:
             output_line = (pad_string + logo_lines[i].ljust(max_logo_width) + pad_string).center(term_width) + "\n" + output_line
 
-    output_line += '\n'+('[CRISPresso version ' + __version__ + ']').center(term_width) + '\n' + ('[Kendell Clement and Luca Pinello 2018]').center(term_width) + "\n" + ('[For support contact kclement@mgh.harvard.edu]').center(term_width) + "\n"
+    output_line += '\n'+('[CRISPresso version ' + __version__ + ']').center(term_width) + '\n' + ('[Kendell Clement and Luca Pinello 2019]').center(term_width) + "\n" + ('[For support contact kclement@mgh.harvard.edu]').center(term_width) + "\n"
 
     description_str = ""
     for str in description:

diff --git a/CRISPResso2/CRISPRessoWGSCORE.py b/CRISPResso2/CRISPRessoWGSCORE.py
@@ -294,7 +294,7 @@ def print_stacktrace_if_debug():
         logging.getLogger().addHandler(logging.FileHandler(log_filename))
 
         with open(log_filename,'w+') as outfile:
-                  outfile.write('[Command used]:\nCRISPRessoWGS %s\n\n[Execution log]:\n' % ' '.join(sys.argv))
+                  outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv))
 
         #check if bam has the index already
         if os.path.exists(args.bam_file+'.bai'):