Fix prime editing indexing for plots

pinellolab · Feb 9, 2022 · ab72489 · ab72489
1 parent 442a48c
commit ab72489
Showing 1 changed file with 20 additions and 6 deletions.
diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py
@@ -1117,6 +1117,8 @@ def rreplace(s, old, new):
             #we're going to consider the first reference only (so if multiple alleles exist at the editing position, this may get messy)
             best_aln_seq, best_aln_score, best_aln_mismatches, best_aln_start, best_aln_end, s1, s2 = CRISPRessoShared.get_best_aln_pos_and_mismatches(prime_editing_extension_seq_dna, amplicon_seq_arr[0],aln_matrix,args.needleman_wunsch_gap_open,0)
             new_ref = s2[0:best_aln_start] + prime_editing_extension_seq_dna + s2[best_aln_end:]
+            if args.debug:
+                info('Alignment between extension sequence and reference sequence: \n' + s1 + '\n' + s2)
 
             if args.prime_editing_override_prime_edited_ref_seq != "":
                 if args.debug:
@@ -4371,10 +4373,22 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq):
                 crispresso2_info['results']['refs'][ref_names_for_pe[0]]['plot_11b_roots'] = []
                 crispresso2_info['results']['refs'][ref_names_for_pe[0]]['plot_11b_captions'] = []
                 crispresso2_info['results']['refs'][ref_names_for_pe[0]]['plot_11b_datas'] = []
-                for i in range(len(cut_points)):
-                    cut_point = cut_points[i]
-                    sgRNA = sgRNA_orig_sequences[i]
-                    sgRNA_name = sgRNA_names[i]
+
+                pe_sgRNA_sequences = refs[ref_names_for_pe[0]]['sgRNA_sequences']
+                pe_sgRNA_orig_sequences = refs[ref_names_for_pe[0]]['sgRNA_orig_sequences']
+                pe_sgRNA_cut_points = refs[ref_names_for_pe[0]]['sgRNA_cut_points']
+                pe_sgRNA_plot_cut_points = refs[ref_names_for_pe[0]]['sgRNA_plot_cut_points']
+                pe_sgRNA_intervals = refs[ref_names_for_pe[0]]['sgRNA_intervals']
+                pe_sgRNA_names = refs[ref_names_for_pe[0]]['sgRNA_names']
+                pe_sgRNA_plot_idxs = refs[ref_names_for_pe[0]]['sgRNA_plot_idxs']
+                pe_sgRNA_mismatches = refs[ref_names_for_pe[0]]['sgRNA_mismatches']
+                pe_ref_len = refs[ref_names_for_pe[0]]['sequence_length']
+                pe_include_idxs_list = refs[ref_names_for_pe[0]]['include_idxs']
+
+                for i in range(len(pe_sgRNA_cut_points)):
+                    cut_point = pe_sgRNA_cut_points[i]
+                    sgRNA = pe_sgRNA_orig_sequences[i]
+                    sgRNA_name = pe_sgRNA_names[i]
 
                     sgRNA_label = "sgRNA_"+sgRNA # for file names
                     sgRNA_legend = "sgRNA " + sgRNA # for legends
@@ -4387,15 +4401,15 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq):
                     sel_cols = [0, 1]
                     plot_half_window = max(1, args.plot_window_size)
                     new_sel_cols_start = max(2, cut_point-plot_half_window+1)
-                    new_sel_cols_end = min(ref_len, cut_point+plot_half_window+1)
+                    new_sel_cols_end = min(pe_ref_len, cut_point+plot_half_window+1)
                     sel_cols.extend(list(range(new_sel_cols_start+2, new_sel_cols_end+2))) #+2 because the first two columns are Batch and Nucleotide
                     #get new intervals
                     new_sgRNA_intervals = []
                     #add annotations for each sgRNA (to be plotted on this sgRNA's plot)
                     for (int_start, int_end) in refs[ref_names_for_pe[0]]['sgRNA_intervals']:
                         new_sgRNA_intervals += [(int_start - new_sel_cols_start, int_end - new_sel_cols_start)]
                     new_include_idx = []
-                    for x in include_idxs_list:
+                    for x in pe_include_idxs_list:
                         new_include_idx += [x - new_sel_cols_start]
                     plot_root = _jp('11b.Nucleotide_percentage_quilt_around_' + sgRNA_label)
                     plot_11b_input = {