Skip to content

Commit

Permalink
v2.0.26
Browse files Browse the repository at this point in the history
add report display name, remove paths from stored files, fix sgRNA plot, CRISPRessoPooled report HTML, add citation to report
  • Loading branch information
kclem committed Mar 6, 2019
1 parent 58257b5 commit 768c75c
Show file tree
Hide file tree
Showing 12 changed files with 197 additions and 100 deletions.
10 changes: 5 additions & 5 deletions CRISPResso2/CRISPRessoBatchCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,13 +335,13 @@ def report_nucleotide_summary(amplicon_seq,amplicon_name,amplicon_index):
info("Skipping the amplicon '%s' in folder '%s'. Cannot find nucleotide information."%(batch_amplicon_name,folder_name))
continue

nucleotide_frequency_file = run_data['refs'][batch_amplicon_name]['nuc_freq_filename']
nucleotide_frequency_file = os.path.join(folder_name,run_data['refs'][batch_amplicon_name]['nuc_freq_filename'])
ampSeq_nf,nuc_freqs = CRISPRessoShared.parse_count_file(nucleotide_frequency_file)

nucleotide_pct_file=run_data['refs'][batch_amplicon_name]['nuc_pct_filename']
nucleotide_pct_file = os.path.join(folder_name,run_data['refs'][batch_amplicon_name]['nuc_pct_filename'])
ampSeq_np,nuc_pcts = CRISPRessoShared.parse_count_file(nucleotide_pct_file)

count_file=run_data['refs'][batch_amplicon_name]['mod_count_filename']
count_file = os.path.join(folder_name,run_data['refs'][batch_amplicon_name]['mod_count_filename'])
ampSeq_cf,mod_freqs = CRISPRessoShared.parse_count_file(count_file)

if ampSeq_nf is None or ampSeq_np is None or ampSeq_cf is None:
Expand Down Expand Up @@ -491,7 +491,7 @@ def report_nucleotide_summary(amplicon_seq,amplicon_name,amplicon_index):
if run_data is None:
continue

amplicon_modification_file=run_data['quant_of_editing_freq_filename']
amplicon_modification_file=os.path.join(folder_name,run_data['quant_of_editing_freq_filename'])
with open(amplicon_modification_file,'r') as infile:
file_head = infile.readline()
if not wrote_header:
Expand All @@ -510,7 +510,7 @@ def report_nucleotide_summary(amplicon_seq,amplicon_name,amplicon_index):
run_data = run_datas[idx]
if run_data is None:
continue
amplicon_modification_file=run_data['mapping_stats_filename']
amplicon_modification_file=os.path.join(folder_name,run_data['mapping_stats_filename'])
with open(amplicon_modification_file,'r') as infile:
file_head = infile.readline()
if not wrote_header:
Expand Down
77 changes: 41 additions & 36 deletions CRISPResso2/CRISPRessoCORE.py

Large diffs are not rendered by default.

7 changes: 2 additions & 5 deletions CRISPResso2/CRISPRessoCompareCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def get_amplicon_output(amplicon_name,output_folder):
if os.path.exists(quantification_file) and profile_file:
return quantification_file,profile_file
else:
raise CRISPRessoShared.OutputFolderIncompleteException('The folder %s is not a valid CRISPResso2 output folder. Cannot find profile file %s for amplicon %s.' % (output_folder,profile_file,amplicon_name))
raise CRISPRessoShared.OutputFolderIncompleteException('The folder %s is not a valid CRISPResso2 output folder. Cannot find profile file %s for amplicon %s.' % (output_folder,profile_file,amplicon_name))

def parse_profile(profile_file):
return np.loadtxt(profile_file,skiprows=1)
Expand Down Expand Up @@ -77,10 +77,7 @@ class DifferentAmpliconLengthException(Exception):


matplotlib=check_library('matplotlib')
from matplotlib import font_manager as fm
font = {'size' : 20}
matplotlib.rc('font', **font)
matplotlib.use('Agg')
CRISPRessoPlot.setMatplotlibDefaults()

plt=check_library('pylab')
np=check_library('numpy')
Expand Down
61 changes: 41 additions & 20 deletions CRISPResso2/CRISPRessoPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')
matplotlib.use('AGG')
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.cm as cm
Expand All @@ -26,7 +26,7 @@
def setMatplotlibDefaults():
font = {'size' : 22}
matplotlib.rc('font', **font)
matplotlib.use('Agg')
matplotlib.use('AGG')
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
matplotlib.rcParams["font.sans-serif"] = ["Arial", "Liberation Sans", "Bitstream Vera Sans"]
Expand Down Expand Up @@ -88,7 +88,6 @@ def plot_nucleotide_quilt(nuc_pct_df,mod_pct_df,fig_filename_root,save_also_png=
min_text_pct: add text annotation if the percent is greater than this number
max_text_pct: add text annotation if the percent is less than this number
"""

plotPct = 0.9 #percent of vertical space to plot in (the rest will be white)
min_plot_pct = 0.01 #if value is less than this, it won't plot the rectangle (with white boundary)

Expand Down Expand Up @@ -185,7 +184,6 @@ def plot_nucleotide_quilt(nuc_pct_df,mod_pct_df,fig_filename_root,save_also_png=
# sampleReadCounts = list(nuc_pct_df.iloc[[((nSamples-1)-x)*nNucs for x in range(0,nSamples)],0]))
ax.set_yticklabels(['Reference'] + list(nuc_pct_df.iloc[[((nSamples-1)-x)*nNucs for x in range(0,nSamples)],0]))


plot_y_start = ref_y_start

if sgRNA_intervals:
Expand All @@ -194,19 +192,27 @@ def plot_nucleotide_quilt(nuc_pct_df,mod_pct_df,fig_filename_root,save_also_png=
sgRNA_y_height = 0.2
min_sgRNA_x = None
for idx,sgRNA_int in enumerate(sgRNA_intervals):
this_sgRNA_start = sgRNA_int[0]
this_sgRNA_end = sgRNA_int[1]
# print('this sgRNA_start is ' + str(this_sgRNA_start))
# print('this sgRNA_end is ' + str(this_sgRNA_end))
# print(nuc_pct_df)
this_sgRNA_start = max(0,sgRNA_int[0])
this_sgRNA_end = min(sgRNA_int[1],amp_len - 1)
ax.add_patch(
patches.Rectangle((2+sgRNA_int[0], sgRNA_y_start), 1+sgRNA_int[1]-sgRNA_int[0], sgRNA_y_height,facecolor=(0,0,0,0.15))
patches.Rectangle((2+this_sgRNA_start, sgRNA_y_start), 1+this_sgRNA_end-this_sgRNA_start, sgRNA_y_height,facecolor=(0,0,0,0.15))
)

#if plot has trimmed the sgRNA, add a mark
if this_sgRNA_start != sgRNA_int[0]:
ax.add_patch(
patches.Rectangle((2.1+this_sgRNA_start, sgRNA_y_start), 0.1, sgRNA_y_height,facecolor='w')
)
if this_sgRNA_end != sgRNA_int[1]:
ax.add_patch(
patches.Rectangle((2.8+this_sgRNA_end, sgRNA_y_start), 0.1, sgRNA_y_height,facecolor='w')
)

#set left-most sgrna start
if not min_sgRNA_x:
min_sgRNA_x = sgRNA_int[0]
if sgRNA_int[0] < min_sgRNA_x:
min_sgRNA_x = sgRNA_int[0]
min_sgRNA_x = this_sgRNA_start
if this_sgRNA_start < min_sgRNA_x:
min_sgRNA_x = this_sgRNA_start
ax.text(2+min_sgRNA_x,sgRNA_y_start + sgRNA_y_height/2,'sgRNA ',horizontalalignment='right',verticalalignment='center')

if quantification_window_idxs is not None:
Expand Down Expand Up @@ -394,14 +400,27 @@ def plot_conversion_map(nuc_pct_df,fig_filename_root,conversion_nuc_from,convers
sgRNA_y_height = 0.1
min_sgRNA_x = None
for idx,sgRNA_int in enumerate(sgRNA_intervals):
this_sgRNA_start = max(0,sgRNA_int[0])
this_sgRNA_end = min(sgRNA_int[1],amp_len - 1)
ax.add_patch(
patches.Rectangle((2+sgRNA_int[0], sgRNA_y_start), 1+sgRNA_int[1]-sgRNA_int[0], sgRNA_y_height,facecolor=(0,0,0,0.15))
patches.Rectangle((2+this_sgRNA_start, sgRNA_y_start), 1+this_sgRNA_end-this_sgRNA_start, sgRNA_y_height,facecolor=(0,0,0,0.15))
)

#if plot has trimmed the sgRNA, add a mark
if this_sgRNA_start != sgRNA_int[0]:
ax.add_patch(
patches.Rectangle((2.1+this_sgRNA_start, sgRNA_y_start), 0.1, sgRNA_y_height,facecolor='w')
)
if this_sgRNA_end != sgRNA_int[1]:
ax.add_patch(
patches.Rectangle((2.8+this_sgRNA_end, sgRNA_y_start), 0.1, sgRNA_y_height,facecolor='w')
)

#set left-most sgrna start
if not min_sgRNA_x:
min_sgRNA_x = sgRNA_int[0]
if sgRNA_int[0] < min_sgRNA_x:
min_sgRNA_x = sgRNA_int[0]
min_sgRNA_x = this_sgRNA_start
if this_sgRNA_start < min_sgRNA_x:
min_sgRNA_x = this_sgRNA_start
ax.text(2+min_sgRNA_x,sgRNA_y_start + sgRNA_y_height/2,'sgRNA ',horizontalalignment='right',verticalalignment='center')

#legend
Expand Down Expand Up @@ -1026,19 +1045,21 @@ def plot_alleles_heatmap(reference_seq,fig_filename_root,X,annot,y_labels,insert

# todo -- add sgRNAs below reference plot
# if sgRNA_intervals:
# ax_hm_anno=plt.subplot(gs3[2, :])
# sgRNA_y_start = 0.3
# sgRNA_y_height = 0.1
## sgRNA_y_height = 0.1
# sgRNA_y_height = 10
# min_sgRNA_x = None
# for idx,sgRNA_int in enumerate(sgRNA_intervals):
# ax_hm_ref.add_patch(
# ax_hm_anno.add_patch(
# patches.Rectangle((2+sgRNA_int[0], sgRNA_y_start), 1+sgRNA_int[1]-sgRNA_int[0], sgRNA_y_height,facecolor=(0,0,0,0.15))
# )
# #set left-most sgrna start
# if not min_sgRNA_x:
# min_sgRNA_x = sgRNA_int[0]
# if sgRNA_int[0] < min_sgRNA_x:
# min_sgRNA_x = sgRNA_int[0]
# ax_hm_ref.text(2+min_sgRNA_x,sgRNA_y_start + sgRNA_y_height/2,'sgRNA ',horizontalalignment='right',verticalalignment='center')
# ax_hm_anno.text(2+min_sgRNA_x,sgRNA_y_start + sgRNA_y_height/2,'sgRNA ',horizontalalignment='right',verticalalignment='center')

#print lines

Expand Down
8 changes: 7 additions & 1 deletion CRISPResso2/CRISPRessoPooledCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import re
from CRISPResso2 import CRISPRessoShared
from CRISPResso2 import CRISPRessoMultiProcessing
from CRISPResso2 import CRISPRessoReport
import traceback

import logging
Expand Down Expand Up @@ -323,7 +324,7 @@ def main():
logging.getLogger().addHandler(logging.FileHandler(log_filename))

with open(log_filename,'w+') as outfile:
outfile.write('[Command used]:\nCRISPRessoPooled %s\n\n[Execution log]:\n' % ' '.join(sys.argv))
outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv))

if args.fastq_r2=='': #single end reads

Expand Down Expand Up @@ -908,6 +909,11 @@ def default_sigpipe():
warn('Skipping:%s' %file_to_remove)


if not args.suppress_report:
report_name = _jp('CRISPResso2Pooled_report.html')
CRISPRessoReport.make_pooled_report_from_folder(report_name,OUTPUT_DIRECTORY,_ROOT)


info('All Done!')
print CRISPRessoShared.get_crispresso_footer()
sys.exit(0)
Expand Down
66 changes: 57 additions & 9 deletions CRISPResso2/CRISPRessoReport.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,13 @@ def add_fig_if_exists(fig_name,fig_root,fig_title,fig_caption,
fig_titles[amplicon_name] = amplicon_fig_titles
fig_captions[amplicon_name] = amplicon_fig_captions

report_display_name = ""
if run_data['args'].name != "":
report_display_name = run_data['args'].name

report_data={'amplicons':amplicons,'fig_names':fig_names,'fig_2b_names':fig_2b_names,'fig_9_names':fig_9_names,
'fig_locs':fig_locs,'fig_titles':fig_titles,'fig_captions':fig_captions,'run_data':run_data,
'command_used':run_data['command_used'],'params':run_data['args_string']}
'command_used':run_data['command_used'],'params':run_data['args_string'],'report_display_name':report_display_name}


j2_env = Environment(loader=FileSystemLoader(os.path.join(_ROOT,'templates')))
Expand All @@ -132,31 +135,76 @@ def make_batch_report_from_folder(crispressoBatch_report_file,batch_folder,_ROOT


sub_folders = [x for x in all_files if x.startswith('CRISPResso_on_')]
sub_html_files = []
run_names = []
sub_html_files = {}
for sub_folder in sub_folders:
info_file = os.path.join(batch_folder,sub_folder,'CRISPResso2_info.pickle')
if not os.path.exists(info_file):
raise Exception('CRISPResso run %s is not complete. Cannot add to batch report.'% sub_folder)
run_data = cp.load(open(info_file,'rb'))
if not 'report_filename' in run_data:
raise Exception('CRISPResso run %s has no report. Cannot add to batch report.'% sub_folder)
sub_html_files.append(os.path.join(sub_folder,os.path.basename(run_data['report_filename'])))
run_name = run_data['args'].name
run_names.append(run_name)
sub_html_files[run_name] = os.path.join(sub_folder,os.path.basename(run_data['report_filename']))

make_batch_report(window_nuc_pct_quilts,nuc_pct_quilts,window_nuc_conv_plots,nuc_conv_plots,sub_html_files,crispressoBatch_report_file,batch_folder,_ROOT)
make_multi_report(run_names,sub_html_files,crispressoBatch_report_file,_ROOT,'CRISPResso Batch',
window_nuc_pct_quilts=window_nuc_pct_quilts,
nuc_pct_quilts=nuc_pct_quilts,
window_nuc_conv_plots=window_nuc_conv_plots,
nuc_conv_plots=nuc_conv_plots)

def make_batch_report(window_nuc_pct_quilts,nuc_pct_quilts,window_nuc_conv_plots,nuc_conv_plots,sub_html_files,crispressoBatch_report_file,batch_folder,_ROOT):
def make_pooled_report_from_folder(crispressoPooled_report_file,pooled_folder,_ROOT):
all_files = os.listdir(pooled_folder)

sub_folders = [x for x in all_files if x.startswith('CRISPResso_on_')]
run_names = []
sub_html_files = {}

sub_2a_labels = {}
sub_2a_pdfs = {}

for sub_folder in sub_folders:
info_file = os.path.join(pooled_folder,sub_folder,'CRISPResso2_info.pickle')
if not os.path.exists(info_file):
raise Exception('CRISPResso run %s is not complete. Cannot add to pooled report.'% sub_folder)
run_data = cp.load(open(info_file,'rb'))
if not 'report_filename' in run_data:
raise Exception('CRISPResso run %s has no report. Cannot add to pooled report.'% sub_folder)

run_name = run_data['args'].name
run_names.append(run_name)
sub_html_files[run_name] = os.path.join(sub_folder,os.path.basename(run_data['report_filename']))

this_sub_2a_labels = []
this_sub_2a_pdfs = []
for ref_name in run_data['ref_names']:
this_sub_2a_labels.append("Nucleotide distribution across " + ref_name)
this_sub_2a_pdfs.append(run_data['refs'][ref_name]['plot_2a_root']+".pdf")

sub_2a_labels[run_name] = this_sub_2a_labels
sub_2a_pdfs[run_name] = this_sub_2a_pdfs

make_multi_report(run_names,sub_html_files,crispressoPooled_report_file,_ROOT,'CRISPResso Pooled')

def make_multi_report(run_names,sub_html_files,crispresso_multi_report_file,_ROOT,crispresso_mode,
window_nuc_pct_quilts=[],
nuc_pct_quilts=[],
window_nuc_conv_plots=[],
nuc_conv_plots=[]
):

def dirname(path):
return os.path.basename(os.path.dirname(path))
j2_env = Environment(loader=FileSystemLoader(os.path.join(_ROOT,'templates')))
j2_env.filters['dirname'] = dirname
template = j2_env.get_template('batchReport.html')
template = j2_env.get_template('multiReport.html')

dest_dir = os.path.dirname(crispressoBatch_report_file)
dest_dir = os.path.dirname(crispresso_multi_report_file)
shutil.copy2(os.path.join(_ROOT,'templates','CRISPResso_justcup.png'),dest_dir)
shutil.copy2(os.path.join(_ROOT,'templates','favicon.ico'),dest_dir)

outfile = open(crispressoBatch_report_file,'w')
outfile = open(crispresso_multi_report_file,'w')
outfile.write(template.render(window_nuc_pct_quilts=window_nuc_pct_quilts,nuc_pct_quilts=nuc_pct_quilts,
window_nuc_conv_plots=window_nuc_conv_plots,nuc_conv_plots=nuc_conv_plots,sub_html_files=sub_html_files))
window_nuc_conv_plots=window_nuc_conv_plots,nuc_conv_plots=nuc_conv_plots,run_names=run_names,sub_html_files=sub_html_files,crispresso_mode=crispresso_mode))
outfile.close()
14 changes: 7 additions & 7 deletions CRISPResso2/CRISPRessoShared.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
else:
import cPickle as cp #python 2.7

__version__ = "2.0.25"
__version__ = "2.0.26"

###EXCEPTIONS############################
class FlashException(Exception):
Expand Down Expand Up @@ -323,7 +323,7 @@ def check_output_folder(output_folder):
amplicon_info = {}
amplicons = run_data['ref_names']

quantification_file=run_data['quant_of_editing_freq_filename']
quantification_file=os.path.join(output_folder,run_data['quant_of_editing_freq_filename'])
if os.path.exists(quantification_file):
with open(quantification_file) as quant_file:
head_line = quant_file.readline()
Expand All @@ -332,17 +332,17 @@ def check_output_folder(output_folder):
line_els = line.split("\t")
amplicon_name = line_els[0]
amplicon_info[amplicon_name] = {}
amplicon_quant_file = run_data['refs'][amplicon_name]['combined_pct_vector_filename']
amplicon_quant_file = os.path.join(output_folder,run_data['refs'][amplicon_name]['combined_pct_vector_filename'])
if not os.path.exists(amplicon_quant_file):
raise OutputFolderIncompleteException('The folder %s is not a valid CRISPResso2 output folder. Cannot find quantification file %s for amplicon %s.' % (output_folder,amplicon_quant_file,amplicon_name))
raise OutputFolderIncompleteException('The folder %s is not a valid CRISPResso2 output folder. Cannot find quantification file %s for amplicon %s.' % (output_folder,amplicon_quant_file,amplicon_name))
amplicon_info[amplicon_name]['quantification_file'] = amplicon_quant_file

amplicon_mod_count_file = run_data['refs'][amplicon_name]['quant_window_mod_count_filename']
amplicon_mod_count_file = os.path.join(output_folder,run_data['refs'][amplicon_name]['quant_window_mod_count_filename'])
if not os.path.exists(amplicon_mod_count_file):
raise OutputFolderIncompleteException('The folder %s is not a valid CRISPResso2 output folder. Cannot find modification count vector file %s for amplicon %s.' % (output_folder,amplicon_mod_count_file,amplicon_name))
amplicon_info[amplicon_name]['modification_count_file'] = amplicon_mod_count_file

amplicon_info[amplicon_name]['allele_files'] = run_data['refs'][amplicon_name]['allele_frequency_files']
amplicon_info[amplicon_name]['allele_files'] = [os.path.join(output_folder,x) for x in run_data['refs'][amplicon_name]['allele_frequency_files']]

for idx,el in enumerate(head_line_els):
amplicon_info[amplicon_name][el] = line_els[idx]
Expand Down Expand Up @@ -747,7 +747,7 @@ def get_crispresso_header(description,header_str):
for i in range(len(logo_lines))[::-1]:
output_line = (pad_string + logo_lines[i].ljust(max_logo_width) + pad_string).center(term_width) + "\n" + output_line

output_line += '\n'+('[CRISPresso version ' + __version__ + ']').center(term_width) + '\n' + ('[Kendell Clement and Luca Pinello 2018]').center(term_width) + "\n" + ('[For support contact kclement@mgh.harvard.edu]').center(term_width) + "\n"
output_line += '\n'+('[CRISPresso version ' + __version__ + ']').center(term_width) + '\n' + ('[Kendell Clement and Luca Pinello 2019]').center(term_width) + "\n" + ('[For support contact kclement@mgh.harvard.edu]').center(term_width) + "\n"

description_str = ""
for str in description:
Expand Down
2 changes: 1 addition & 1 deletion CRISPResso2/CRISPRessoWGSCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def print_stacktrace_if_debug():
logging.getLogger().addHandler(logging.FileHandler(log_filename))

with open(log_filename,'w+') as outfile:
outfile.write('[Command used]:\nCRISPRessoWGS %s\n\n[Execution log]:\n' % ' '.join(sys.argv))
outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv))

#check if bam has the index already
if os.path.exists(args.bam_file+'.bai'):
Expand Down

0 comments on commit 768c75c

Please sign in to comment.