-
Notifications
You must be signed in to change notification settings - Fork 0
/
Recombinastics_analysis.Rmd
2160 lines (1688 loc) · 141 KB
/
Recombinastics_analysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
---
title: "Recombinastics_analysis"
author: "Kenneth Matreyek and Nisha D. Kamath"
date: "initialized 6/17/2020 - last updated 9/22/2023"
output: github_document
---
```{r Set up the workspace}
rm(list = ls())
library(tidyverse)
library(ggrepel)
library(reshape)
library(abind)
library(gridExtra)
library(ggbeeswarm)
library(factoextra)
library(ggfortify)
library(patchwork)
theme_set(theme_bw())
theme_update(panel.grid.minor = element_blank())
R.Version()
```
## Look at the recombination method test data. Note: this is not part of the manuscript
```{r Testing recombination efficiency in two different plate formats, fig.height = 4, fig.width = 6}
methods_data <- read.csv(file = "data/Recombination_method_tests.csv", header = T)
methods_data_replicates <- ncol(methods_data) - 5
methods_data$mean <- rowMeans(methods_data[,c("Recombined_Rep1","Recombined_Rep2","Recombined_Rep3","Recombined_Rep4")], na.rm = T)
methods_data$sd <- sqrt((methods_data$Recombined_Rep1 - methods_data$mean)^2 +
(methods_data$Recombined_Rep2 - methods_data$mean)^2 +
(methods_data$Recombined_Rep3 - methods_data$mean)^2 +
(methods_data$Recombined_Rep4 - methods_data$mean)^2)
methods_data$se <- methods_data$sd / sqrt(methods_data_replicates - 1)
methods_data$upper_conf <- methods_data$mean + methods_data$se * 1.96
methods_data$lower_conf <- methods_data$mean - methods_data$se * 1.96
methods_data2 <- methods_data %>% filter(!is.na(Recombined_Rep1) & Method != "none") %>% mutate(concat = paste("Method: ",Method,"\n","Bxb1: ",Bxb1,sep = ""))
methods_data2$WellSize <- factor(methods_data2$WellSize)
methods_data2$concat <- factor(methods_data2$concat, levels = methods_data2$concat[c(1,3,2,4)])
methods_data2[methods_data2$lower_conf < 0,"lower_conf"] <- 0
Recombination_method_plot <- ggplot() +
theme_classic() + theme(axis.text.x = element_text(angle = -90, vjust = 0.5)) +
scale_y_continuous(expand = c(0,0), limits = c(0,65)) +
ylab("% mCherry positive cells") +
xlab(NULL) +
geom_hline(yintercept = 0) +
geom_errorbar(data = methods_data2, aes(x = concat, ymin = lower_conf, ymax = upper_conf, color = WellSize),
width = 0.2, position=position_dodge(width=0.3), size = 0.3 ) +
geom_point(data = methods_data2, aes(x= concat, y=mean, color = WellSize),
position=position_dodge(width=0.3), shape = 95, size = 2) +
geom_jitter(data = methods_data2, aes(x= concat, y=Recombined_Rep1, color = WellSize),
size = 0.9, position=position_dodge(width=0.3), alpha = 0.4) +
geom_jitter(data = methods_data2, aes(x= concat, y=Recombined_Rep2, color = WellSize),
size = 0.9, position=position_dodge(width=0.3), alpha = 0.4) +
geom_jitter(data = methods_data2, aes(x= concat, y=Recombined_Rep3, color = WellSize),
size = 0.9, position=position_dodge(width=0.3), alpha = 0.4) +
geom_jitter(data = methods_data2, aes(x= concat, y=Recombined_Rep4, color = WellSize),
size = 0.9, position=position_dodge(width=0.3), alpha = 0.4)
print(Recombination_method_plot)
```
## GT and GA Ortholognality
## Relevant to Figure 1
This next chunk is for testing the orthogonality of the GT and GA Bxb1 sequences
```{r Initial testing of orthogonality of GT and GA recombination site pairs, fig.height = 2, fig.width = 3}
rep_1_frame <- read.csv(file = "data/Orthog_recomb/GT_vs_GA_200206.csv", header = T, stringsAsFactors = F) %>% arrange(attp, bxb1, mix) %>% filter(bxb1 != "none")
rep_2_frame <- read.csv(file = "data/Orthog_recomb/GT_vs_GA_200213.csv", header = T, stringsAsFactors = F) %>% arrange(attp, bxb1, mix) %>% filter(bxb1 != "none")
rep_3_frame <- read.csv(file = "data/Orthog_recomb/GT_vs_GA_200219.csv", header = T, stringsAsFactors = F) %>% arrange(attp, bxb1, mix) %>% filter(bxb1 != "none")
replicates <- 2
rep_1 <- log10(rep_1_frame[,5:8])
rep_2 <- log10(rep_2_frame[,5:8])
rep_3 <- log10(rep_3_frame[,5:8])
means <- (rep_1 + rep_2 + rep_3)/3
#https://stackoverflow.com/questions/32609926/performing-element-wise-standard-deviation-in-r-with-two-matrices
m <- abind(rep_1, rep_2, rep_3, along=3)
standard_devs <- data.frame(apply(m, 1:2, sd))
standard_errors <- standard_devs / sqrt(replicates)
upper_conf <- means + standard_errors * 1.96
lower_conf <- means - standard_errors * 1.96
label_frame <- rep_1_frame[,1:4] %>% mutate(comboname = paste(bxb1, mix, sep = "\n"))
means2 <- cbind(label_frame,means)
upper_conf2 <- cbind(label_frame,upper_conf)
lower_conf2 <- cbind(label_frame,lower_conf)
#ga_data_filtered$attp <- factor(ga_data_filtered$attp, levels = c("gt","ga"))
upper_conf2_melted <- melt(upper_conf2[,c("comboname","attp","gfp","mcherry")], id = c("comboname","attp"))
lower_conf2_melted <- melt(lower_conf2[,c("comboname","attp","gfp","mcherry")], id = c("comboname","attp"))
means2_melted <- melt(means2[,c("comboname","attp","gfp","mcherry")], id = c("comboname","attp"))
means2_melted$geomean <- 10^means2_melted$value
means2_melted$geomean_rounded <- round(means2_melted$geomean,1)
green_red_colorscale <- c(gfp = "green", mcherry = "red")
GT_vs_GA_plot <- ggplot() +
theme_classic() + theme(axis.text.x = element_text(angle = -90, vjust = 0.5)) +
ylab("% cells recombined") +
xlab(NULL) +
scale_y_log10() +
scale_color_manual(values = green_red_colorscale) +
geom_errorbar(data = upper_conf2_melted, aes(x = comboname, color = variable, ymin = 10^lower_conf2_melted$value, ymax = 10^value),
alpha = 0.5, width = 0.2, position = position_dodge(width = 0.5)) +
geom_point(data = means2_melted, aes(x = comboname, y = geomean, color = variable), alpha = 0.5, position = position_dodge(width = 0.5)) +
facet_wrap(~attp)
print(GT_vs_GA_plot)
ggsave(file = "Plots/GT_vs_GA_plot.pdf", GT_vs_GA_plot, height = 1.8, width = 3.8)
```
## Flanking recombinase sites to excise unwanted bacterial DNA sequences
## Relevant to Figure 2
```{r Flanking data - Flow Cyometry}
## Making a data frame to keep track of how many unexcised cells are observed with G718A
flanking_flow_df <- data.frame("flow" = NA, "frac_unexcised" = NA)
flank_red_pos_cutoff <- 3e3
## F84
f84_none <- read.csv(file = "data/flow/Flanking/F84/F84_None_unselected_Sample(1).csv.gz") %>% mutate("flow" = "F84", sample = "None")
f84_g718a <- read.csv(file = "data/flow/Flanking/F84/F84_G718A_hygro_Sample(4).csv.gz") %>% mutate("flow" = "F84", sample = "Flanked")
f84_g747a <- read.csv(file = "data/flow/Flanking/F84/F84_G747A_hygro_Sample(5).csv.gz") %>% mutate("flow" = "F84", sample = "Control")
flank_expt_cell_num <- 90000
flank_red_pos_cutoff <- 5e3
f84 <- rbind(f84_none[1:flank_expt_cell_num,], f84_g718a[1:flank_expt_cell_num,], f84_g747a[1:flank_expt_cell_num,])
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) + geom_point(data = f84, aes(x = YL2.A, y = BL1.A), alpha = 0.01) + facet_grid(rows = vars(sample))
f84$ratio <- f84$BL1.A / f84$YL2.A
f84_subset <- f84 %>% filter(YL2.A >= flank_red_pos_cutoff)
f84_control_95pct_interval <- c(quantile((f84_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f84_subset %>% filter(sample == "Control"))$ratio,0.975))
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f84_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample)) + geom_vline(xintercept = f84_control_95pct_interval, linetype = 2)
f84_fraction_unexcised <- sum((f84_subset %>% filter(sample == "Flanked"))$ratio > f84_control_95pct_interval[1])/nrow((f84_subset %>% filter(sample == "Flanked")))
flanking_flow_df <- rbind(flanking_flow_df, data.frame("flow" = "F84", "frac_unexcised" = f84_fraction_unexcised))
## F114
f114_g718a <- read.csv(file = "data/flow/Flanking/F114/F114_G718A_unselected_A2.csv.gz") %>% mutate("flow" = "F114", sample = "Flanked")
f114_g747a <- read.csv(file = "data/flow/Flanking/F114/F114_G747A_unselected_A3.csv.gz") %>% mutate("flow" = "F114", sample = "Control")
flank_expt_cell_num <- 45000
flank_red_pos_cutoff <- 3e3
f114 <- rbind(f114_g718a[1:flank_expt_cell_num,], f114_g747a[1:flank_expt_cell_num,])
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) + geom_point(data = f114, aes(x = YL2.A, y = BL1.A), alpha = 0.1) + facet_grid(rows = vars(sample))
f114$ratio <- f114$BL1.A / f114$YL2.A
f114_subset <- f114 %>% filter(YL2.A >= flank_red_pos_cutoff)
f114_control_95pct_interval <- c(quantile((f114_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f114_subset %>% filter(sample == "Control"))$ratio,0.975))
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f114_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample)) + geom_vline(xintercept = f114_control_95pct_interval, linetype = 2)
f114_fraction_unexcised <- sum((f114_subset %>% filter(sample == "Flanked"))$ratio > f114_control_95pct_interval[1])/nrow((f114_subset %>% filter(sample == "Flanked")))
flanking_flow_df <- rbind(flanking_flow_df, data.frame("flow" = "F114", "frac_unexcised" = f114_fraction_unexcised))
## F116
f116_g718a <- read.csv(file = "data/flow/Flanking/F116/F116_G718A_unselected_H11.csv.gz") %>% mutate("flow" = "F116", sample = "Flanked")
f116_g747a <- read.csv(file = "data/flow/Flanking/F116/F116_G747A_unselected_H12.csv.gz") %>% mutate("flow" = "F116", sample = "Control")
flank_expt_cell_num <- 25000
flank_red_pos_cutoff <- 5e3
f116 <- rbind(f116_g718a[1:flank_expt_cell_num,], f116_g747a[1:flank_expt_cell_num,])
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) + geom_point(data = f116, aes(x = YL2.A, y = BL1.A), alpha = 0.1) + facet_grid(rows = vars(sample))
f116$ratio <- f116$BL1.A / f116$YL2.A
f116_subset <- f116 %>% filter(YL2.A >= flank_red_pos_cutoff)
f116_control_95pct_interval <- c(quantile((f116_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f116_subset %>% filter(sample == "Control"))$ratio,0.975))
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f116_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample)) + geom_vline(xintercept = f116_control_95pct_interval, linetype = 2)
f116_fraction_unexcised <- sum((f116_subset %>% filter(sample == "Flanked"))$ratio > f116_control_95pct_interval[1])/nrow((f116_subset %>% filter(sample == "Flanked")))
flanking_flow_df <- rbind(flanking_flow_df, data.frame("flow" = "F116", "frac_unexcised" = f116_fraction_unexcised))
## F119
f119_none <- read.csv(file = "data/flow/Flanking/F119/F119_none_unselected_G11.csv.gz") %>% mutate("flow" = "F119", sample = "None")
f119_g718a <- read.csv(file = "data/flow/Flanking/F119/F119_G718A_unselected_H11.csv.gz") %>% mutate("flow" = "F119", sample = "Flanked")
f119_g747a <- read.csv(file = "data/flow/Flanking/F119/F119_G747A_unselected_H12.csv.gz") %>% mutate("flow" = "F119", sample = "Control")
flank_expt_cell_num <- 9000
flank_red_pos_cutoff <- 5e3
f119 <- rbind(f119_none[1:flank_expt_cell_num,], f119_g718a[1:flank_expt_cell_num,], f119_g747a[1:flank_expt_cell_num,]) %>% filter(!is.na(sample))
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) + geom_point(data = f119, aes(x = YL2.A, y = BL1.A), alpha = 0.1) + facet_grid(rows = vars(sample))
f119$ratio <- f119$BL1.A / f119$YL2.A
f119_subset <- f119 %>% filter(YL2.A >= flank_red_pos_cutoff)
f119_control_95pct_interval <- c(quantile((f119_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f119_subset %>% filter(sample == "Control"))$ratio,0.975))
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f119_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample)) + geom_vline(xintercept = f119_control_95pct_interval, linetype = 2)
f119_fraction_unexcised <- sum((f119_subset %>% filter(sample == "Flanked"))$ratio > f119_control_95pct_interval[1])/nrow((f119_subset %>% filter(sample == "Flanked")))
flanking_flow_df <- rbind(flanking_flow_df, data.frame("flow" = "F119", "frac_unexcised" = f119_fraction_unexcised))
## F130
f130_none <- read.csv(file = "data/flow/Flanking/F130/F130_none_unselected_D10.csv.gz") %>% mutate("flow" = "F130", sample = "None")
f130_g718a <- read.csv(file = "data/flow/Flanking/F130/F130_G718A_unselected_D11.csv.gz") %>% mutate("flow" = "F130", sample = "Flanked")
f130_g747a <- read.csv(file = "data/flow/Flanking/F130/F130_G747A_unselected_D12.csv.gz") %>% mutate("flow" = "F130", sample = "Control")
flank_expt_cell_num <- 89000
flank_red_pos_cutoff <- 5e3
f130 <- rbind(f130_none[1:flank_expt_cell_num,], f130_g718a[1:flank_expt_cell_num,], f130_g747a[1:flank_expt_cell_num,]) %>% filter(!is.na(sample))
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) + geom_point(data = f130, aes(x = YL2.A, y = BL1.A), alpha = 0.1) + facet_grid(rows = vars(sample))
f130$ratio <- f130$BL1.A / f130$YL2.A
f130_subset <- f130 %>% filter(YL2.A >= flank_red_pos_cutoff)
f130_control_95pct_interval <- c(quantile((f130_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f130_subset %>% filter(sample == "Control"))$ratio,0.975))
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f130_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample)) + geom_vline(xintercept = f130_control_95pct_interval, linetype = 2)
f130_fraction_unexcised <- sum((f130_subset %>% filter(sample == "Flanked"))$ratio > f130_control_95pct_interval[1])/nrow((f130_subset %>% filter(sample == "Flanked")))
flanking_flow_df <- rbind(flanking_flow_df, data.frame("flow" = "F130", "frac_unexcised" = f130_fraction_unexcised))
## F131
f131_none <- read.csv(file = "data/flow/Flanking/F131/F131_none_unselected_D4.csv.gz") %>% mutate("flow" = "F131", sample = "None")
f131_g718a <- read.csv(file = "data/flow/Flanking/F131/F131_G718A_unselected_D5.csv.gz") %>% mutate("flow" = "F131", sample = "Flanked")
f131_g747a <- read.csv(file = "data/flow/Flanking/F131/F131_G747A_unselected_D6.csv.gz") %>% mutate("flow" = "F131", sample = "Control")
flank_expt_cell_num <- 190000
flank_red_pos_cutoff <- 3e3
f131 <- rbind(f131_none[1:flank_expt_cell_num,], f131_g718a[1:flank_expt_cell_num,], f131_g747a[1:flank_expt_cell_num,]) %>% filter(!is.na(sample))
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) + geom_point(data = f131, aes(x = YL2.A, y = BL1.A), alpha = 0.1) + facet_grid(rows = vars(sample))
f131$ratio <- f131$BL1.A / f131$YL2.A
f131_subset <- f131 %>% filter(YL2.A >= flank_red_pos_cutoff)
f131_control_95pct_interval <- c(quantile((f131_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f131_subset %>% filter(sample == "Control"))$ratio,0.975))
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f131_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample)) + geom_vline(xintercept = f131_control_95pct_interval, linetype = 2)
f131_fraction_unexcised <- sum((f131_subset %>% filter(sample == "Flanked"))$ratio > f131_control_95pct_interval[1])/nrow((f131_subset %>% filter(sample == "Flanked")))
flanking_flow_df <- rbind(flanking_flow_df, data.frame("flow" = "F131", "frac_unexcised" = f131_fraction_unexcised))
## F132
f132_g718a <- read.csv(file = "data/flow/Flanking/F132/F132_G718A_unselected_G12.csv.gz") %>% mutate("flow" = "F132", sample = "Flanked")
f132_g747a <- read.csv(file = "data/flow/Flanking/F132/F132_G747A_unselected_H1.csv.gz") %>% mutate("flow" = "F132", sample = "Control")
flank_expt_cell_num <- 180000
flank_red_pos_cutoff <- 3e3
f132 <- rbind(f132_g718a[1:flank_expt_cell_num,], f132_g747a[1:flank_expt_cell_num,])
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) + geom_point(data = f132, aes(x = YL2.A, y = BL1.A), alpha = 0.1) + facet_grid(rows = vars(sample))
f132$ratio <- f132$BL1.A / f132$YL2.A
f132_subset <- f132 %>% filter(YL2.A >= flank_red_pos_cutoff)
f132_control_95pct_interval <- c(quantile((f132_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f132_subset %>% filter(sample == "Control"))$ratio,0.975))
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f132_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample)) + geom_vline(xintercept = f132_control_95pct_interval, linetype = 2)
f132_fraction_unexcised <- sum((f132_subset %>% filter(sample == "Flanked"))$ratio > f132_control_95pct_interval[1])/nrow((f132_subset %>% filter(sample == "Flanked")))
flanking_flow_df <- rbind(flanking_flow_df, data.frame("flow" = "F132", "frac_unexcised" = f132_fraction_unexcised))
## f280
f280_g718a <- read.csv(file = "data/flow/Flanking/F280/F280_G718A_unselected.csv.gz") %>% mutate("flow" = "F280", sample = "Flanked")
f280_g747a <- read.csv(file = "data/flow/Flanking/F280/F280_G747A_unselected.csv.gz") %>% mutate("flow" = "F280", sample = "Control")
flank_expt_cell_num <- 180000
flank_red_pos_cutoff <- 3e3
f280 <- rbind(f280_g718a[1:flank_expt_cell_num,], f280_g747a[1:flank_expt_cell_num,])
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) + geom_point(data = f280, aes(x = YL2.A, y = BL1.A), alpha = 0.1) + facet_grid(rows = vars(sample))
f280$ratio <- f280$BL1.A / f280$YL2.A
f280_subset <- f280 %>% filter(YL2.A >= flank_red_pos_cutoff)
f280_control_95pct_interval <- c(quantile((f280_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f280_subset %>% filter(sample == "Control"))$ratio,0.975))
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f280_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample)) + geom_vline(xintercept = f280_control_95pct_interval, linetype = 2)
f280_fraction_unexcised <- sum((f280_subset %>% filter(sample == "Flanked"))$ratio > f280_control_95pct_interval[1])/nrow((f280_subset %>% filter(sample == "Flanked")))
flanking_flow_df <- rbind(flanking_flow_df, data.frame("flow" = "F280", "frac_unexcised" = f280_fraction_unexcised))
## f281
f281_g718a <- read.csv(file = "data/flow/Flanking/F281/F281_G718A_unselected_A1.csv.gz") %>% mutate("flow" = "F281", sample = "Flanked")
f281_g747a <- read.csv(file = "data/flow/Flanking/F281/F281_G747A_unselected_A2.csv.gz") %>% mutate("flow" = "F281", sample = "Control")
flank_expt_cell_num <- 310000
flank_red_pos_cutoff <- 3e3
f281 <- rbind(f281_g718a[1:flank_expt_cell_num,], f281_g747a[1:flank_expt_cell_num,])
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) + geom_point(data = f281, aes(x = YL2.A, y = BL1.A), alpha = 0.1) + facet_grid(rows = vars(sample))
f281$ratio <- f281$BL1.A / f281$YL2.A
f281_subset <- f281 %>% filter(YL2.A >= flank_red_pos_cutoff)
f281_control_95pct_interval <- c(quantile((f281_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f281_subset %>% filter(sample == "Control"))$ratio,0.975))
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f281_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample)) + geom_vline(xintercept = f281_control_95pct_interval, linetype = 2)
f281_fraction_unexcised <- sum((f281_subset %>% filter(sample == "Flanked"))$ratio > f281_control_95pct_interval[1])/nrow((f281_subset %>% filter(sample == "Flanked")))
flanking_flow_df <- rbind(flanking_flow_df, data.frame("flow" = "F281", "frac_unexcised" = f281_fraction_unexcised))
## f281b
f281b_g718a <- read.csv(file = "data/flow/Flanking/F281/F281b_G718A_unselected_A3.csv.gz") %>% mutate("flow" = "F281b", sample = "Flanked")
flank_expt_cell_num <- 230000
flank_red_pos_cutoff <- 3e3
f281b <- rbind(f281b_g718a[1:flank_expt_cell_num,], f281_g747a[1:flank_expt_cell_num,])
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) + geom_point(data = f281b, aes(x = YL2.A, y = BL1.A), alpha = 0.1) + facet_grid(rows = vars(sample))
f281b$ratio <- f281b$BL1.A / f281b$YL2.A
f281b_subset <- f281b %>% filter(YL2.A >= flank_red_pos_cutoff)
f281b_control_95pct_interval <- c(quantile((f281b_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f281b_subset %>% filter(sample == "Control"))$ratio,0.975))
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f281b_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample)) + geom_vline(xintercept = f281b_control_95pct_interval, linetype = 2)
f281b_fraction_unexcised <- sum((f281b_subset %>% filter(sample == "Flanked"))$ratio > f281b_control_95pct_interval[1])/nrow((f281b_subset %>% filter(sample == "Flanked")))
flanking_flow_df <- rbind(flanking_flow_df, data.frame("flow" = "F281b", "frac_unexcised" = f281b_fraction_unexcised))
## f282
f282_g718a <- read.csv(file = "data/flow/Flanking/F282/F282_G718A_unselected_A1.csv.gz") %>% mutate("flow" = "F282", sample = "Flanked")
f282_g747a <- read.csv(file = "data/flow/Flanking/F282/F282_G747A_unselected_A2.csv.gz") %>% mutate("flow" = "F282", sample = "Control")
flank_expt_cell_num <- 280000
flank_red_pos_cutoff <- 3e3
f282 <- rbind(f282_g718a[1:flank_expt_cell_num,], f282_g747a[1:flank_expt_cell_num,])
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) + geom_point(data = f282, aes(x = YL2.A, y = BL1.A), alpha = 0.1) + facet_grid(rows = vars(sample))
f282$ratio <- f282$BL1.A / f282$YL2.A
f282_subset <- f282 %>% filter(YL2.A >= flank_red_pos_cutoff)
f282_control_95pct_interval <- c(quantile((f282_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f282_subset %>% filter(sample == "Control"))$ratio,0.975))
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f282_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample), scales="free_y") + geom_vline(xintercept = f282_control_95pct_interval, linetype = 2)
f282_fraction_unexcised <- sum((f282_subset %>% filter(sample == "Flanked"))$ratio > f282_control_95pct_interval[1])/nrow((f282_subset %>% filter(sample == "Flanked")))
flanking_flow_df <- rbind(flanking_flow_df, data.frame("flow" = "F282", "frac_unexcised" = f282_fraction_unexcised))
## f282b
f282b_g718a <- read.csv(file = "data/flow/Flanking/F282/F282b_G718A_unselected_A3.csv.gz") %>% mutate("flow" = "F282b", sample = "Flanked")
flank_expt_cell_num <- 930000
flank_red_pos_cutoff <- 3e3
f282b <- rbind(f282b_g718a[1:flank_expt_cell_num,], f282_g747a[1:flank_expt_cell_num,])
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) + geom_point(data = f282b, aes(x = YL2.A, y = BL1.A), alpha = 0.1) + facet_grid(rows = vars(sample))
f282b$ratio <- f282b$BL1.A / f282b$YL2.A
f282b_subset <- f282b %>% filter(YL2.A >= flank_red_pos_cutoff)
f282b_control_95pct_interval <- c(quantile((f282b_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f282b_subset %>% filter(sample == "Control"))$ratio,0.975))
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f282b_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample), scales="free_y") + geom_vline(xintercept = f282b_control_95pct_interval, linetype = 2)
f282b_fraction_unexcised <- sum((f282b_subset %>% filter(sample == "Flanked"))$ratio > f282b_control_95pct_interval[1])/nrow((f282b_subset %>% filter(sample == "Flanked")))
flanking_flow_df <- rbind(flanking_flow_df, data.frame("flow" = "F282b", "frac_unexcised" = f282b_fraction_unexcised))
## f283
f283_g718a <- read.csv(file = "data/flow/Flanking/F283/F283_G718A_unselected_A1.csv.gz") %>% mutate("flow" = "F283", sample = "Flanked")
f283_g747a <- read.csv(file = "data/flow/Flanking/F283/F283_G747A_unselected_A2.csv.gz") %>% mutate("flow" = "F283", sample = "Control")
flank_expt_cell_num <- 305000
flank_red_pos_cutoff <- 3e3
f283 <- rbind(f283_g718a[1:flank_expt_cell_num,], f283_g747a[1:flank_expt_cell_num,])
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) +geom_point(data = f283, aes(x = YL2.A, y = BL1.A), alpha = 0.1) +facet_grid(rows = vars(sample))
f283$ratio <- f283$BL1.A / f283$YL2.A
f283_subset <- f283 %>% filter(YL2.A >= flank_red_pos_cutoff)
f283_control_95pct_interval <- c(quantile((f283_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f283_subset %>% filter(sample == "Control"))$ratio,0.975))
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f283_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample), scales="free_y") + geom_vline(xintercept = f283_control_95pct_interval, linetype = 2)
f283_fraction_unexcised <- sum((f283_subset %>% filter(sample == "Flanked"))$ratio > f283_control_95pct_interval[1])/nrow((f283_subset %>% filter(sample == "Flanked")))
flanking_flow_df <- rbind(flanking_flow_df, data.frame("flow" = "F283", "frac_unexcised" = f283_fraction_unexcised))
## f283b
f283b_g718a <- read.csv(file = "data/flow/Flanking/F283/F283b_G718A_unselected_A3.csv.gz") %>% mutate("flow" = "F283b", sample = "Flanked")
flank_expt_cell_num <- 80000
flank_red_pos_cutoff <- 3e3
f283b <- rbind(f283b_g718a[1:flank_expt_cell_num,], f283_g747a[1:flank_expt_cell_num,])
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) + geom_point(data = f283b, aes(x = YL2.A, y = BL1.A), alpha = 0.1) + facet_grid(rows = vars(sample))
f283b$ratio <- f283b$BL1.A / f283b$YL2.A
f283b_subset <- f283b %>% filter(YL2.A >= flank_red_pos_cutoff)
f283b_control_95pct_interval <- c(quantile((f283b_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f283b_subset %>% filter(sample == "Control"))$ratio,0.975))
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f283b_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample), scales="free_y") + geom_vline(xintercept = f283b_control_95pct_interval, linetype = 2)
f283b_fraction_unexcised <- sum((f283b_subset %>% filter(sample == "Flanked"))$ratio > f283b_control_95pct_interval[1])/nrow((f283b_subset %>% filter(sample == "Flanked")))
flanking_flow_df <- rbind(flanking_flow_df, data.frame("flow" = "F283b", "frac_unexcised" = f283b_fraction_unexcised))
```
```{r Some more flanking data - Flow Cyometry}
## Some summary graphs
## An example set of samples
f131_example <- rbind(f131_none[1:20000,], f131_g718a[1:20000,], f131_g747a[1:20000,]) %>% filter(!is.na(sample))
f131_example$sample <- factor(f131_example$sample, levels = c("None", "Flanked", "Control"))
F131_example_scatterplot <- ggplot() + theme(panel.grid.minor = element_blank()) +
scale_x_log10(limits = c(1e1,1e5), expand = c(0,0), breaks = c(1e2,1e4)) +
scale_y_log10(limits = c(1e1,1e4), expand = c(0,0), breaks = c(1e2,1e3)) +
geom_point(data = f131_example, aes(x = YL2.A, y = BL1.A), alpha = 0.1, size = 0.5) +
facet_grid(cols = vars(sample))
ggsave(file = "plots/Flanking_example_scatterplot.pdf", F131_example_scatterplot, height = 1.6, width = 4.5)
F131_example_scatterplot
## To respond to reviewer comments, compare the fraction of red cells in the control and flanked transfections.
ggplot() + scale_x_log10() +
geom_histogram(data = f131_example, aes(x = YL2.A)) + facet_grid(rows = vars(sample))
paste("F131 flanked recomb percentage:",round(nrow(subset(f131_g718a, YL2.A > 1000)) / nrow(f131_g718a) * 100,1))
paste("F131 control recomb percentage:",round(nrow(subset(f131_g747a, YL2.A > 1000)) / nrow(f131_g747a) * 100,1))
f131_example <- rbind(f131_none[1:194000,], f131_g718a[1:194000,], f131_g747a[1:194000,]) %>% filter(!is.na(sample))
f131_example$sample <- factor(f131_example$sample, levels = c("None", "Flanked", "Control"))
f131_example$ratio <- (f131_example$BL1.A + 250) / (f131_example$YL2.A + 250)
f131_example_subset <- f131_example %>% filter(YL2.A >= flank_red_pos_cutoff & sample != "None")
f131_example_control_95pct_interval <- c(quantile((f131_example_subset %>% filter(sample == "Control"))$ratio,0.05))
F131_ratio_histogram <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), panel.grid.major.y = element_blank()) +
labs(x = "Green / Red ratio", y = "Number\nof cells") +
scale_x_continuous(limits = c(0, 0.6), breaks = c(0, 0.2, 0.4, 0.6), expand = c(0,0)) + scale_y_continuous(breaks = c(0,100)) +
geom_histogram(data = f131_example_subset, aes(x = ratio), binwidth = 0.01) + facet_grid(rows = vars(sample)) + geom_vline(xintercept = f131_example_control_95pct_interval, linetype = 2)
ggsave(file = "Plots/F131_ratio_histogram.pdf", F131_ratio_histogram, height = 1.4, width = 1.8)
F131_ratio_histogram
## To respond to reviewer comments, compare the fraction of pared ratio cells to unexcised
paste("F131 flanked and unexcised:",round(nrow(subset(f131_example_subset, sample == "Flanked" & YL2.A > 1000 & ratio > 0.15)) / nrow(subset(f131_example_subset, sample == "Flanked" & YL2.A > 1000)) * 100,1))
paste("F131 control and unexcised:",round(nrow(subset(f131_example_subset, sample == "Control" & YL2.A > 1000 & ratio > 0.15)) / nrow(subset(f131_example_subset, sample == "Control" & YL2.A > 1000)) * 100,1))
```
```{r Flanking data averages}
## Looking at fraction excised over time
flanking_flow_days <- read.csv(file = "data/Flanking_expt_timepoints.csv", header = T, stringsAsFactors = F)
flanking_flow_df2 <- merge(flanking_flow_df, flanking_flow_days, by = "flow")
flanking_flow_df2$replicate <- as.factor(flanking_flow_df2$replicate)
Flanking_timeplot <- ggplot() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_y_log10(limits = c(0.001,1)) + scale_x_log10() +
labs(x = "Days after\ntransfection", y = "Fraction of\ncells unexcised") +
geom_point(data = flanking_flow_df2 %>% filter(instrument == "Attune"), aes(x = days, y = frac_unexcised, color = replicate), color = "red")
ggsave(file = "plots/Flanking_timeplot.pdf", Flanking_timeplot, height = 1.6, width = 2)
Flanking_timeplot
## Looking at the reproducibility of recombination
flank_indep_recombs <- flanking_flow_df2 %>% filter(frac_unexcised != 0) %>% group_by(replicate) %>% summarize(frac_unexcised = 10^mean(log10(frac_unexcised)))
flank_flow_geomean <- 10^mean(log10(flank_indep_recombs$frac_unexcised))
flank_flow_upper_conf <- 10^(mean(log10(flank_indep_recombs$frac_unexcised)) + (sd(log10(flank_indep_recombs$frac_unexcised))/sqrt(nrow(flank_indep_recombs)) * 1.96))
flank_flow_lower_conf <- 10^(mean(log10(flank_indep_recombs$frac_unexcised)) - (sd(log10(flank_indep_recombs$frac_unexcised))/sqrt(nrow(flank_indep_recombs)) * 1.96))
Repeated_excisions_plot <- ggplot() + theme(panel.grid.minor = element_blank(), panel.grid.major.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
labs(x = NULL, y = "Fraction of cells unexcised") +
scale_y_log10(limits = c(0.001,1)) + #scale_x_continuous(limits = c(-0.1,0.1)) +
geom_point(data = flank_indep_recombs, aes(x = 0, y = frac_unexcised), alpha = 0.5, color = "Red") +
geom_point(aes(x = 0, y = flank_flow_geomean), shape = 95, size = 10, color = "Black") +
geom_errorbar(aes(x = 0, ymax = flank_flow_upper_conf, ymin = flank_flow_lower_conf), width = 0.01, alpha = 0.4)
ggsave(file = "plots/Repeated_excisions_plot.pdf", Repeated_excisions_plot, height = 1.1, width = 1)
Repeated_excisions_plot
```
```{r Effect of adding AP1903 to flanking recombinations}
## f280
f280_g718a <- read.csv(file = "data/flow/Flanking/F280/F280_G718A_unselected.csv.gz") %>% mutate("flow" = "F280", sample = "Flanked", treatment = "none")
f280_g747a <- read.csv(file = "data/flow/Flanking/F280/F280_G747A_unselected.csv.gz") %>% mutate("flow" = "F280", sample = "Control", treatment = "none")
f280_g718a_ap1903 <- read.csv(file = "data/flow/Flanking/F280/F280_G718A_AP1903.csv.gz") %>% mutate("flow" = "F280", sample = "Flanked", treatment = "AP1903")
f280_g747a_ap1903 <- read.csv(file = "data/flow/Flanking/F280/F280_G747A_AP1903.csv.gz") %>% mutate("flow" = "F280", sample = "Control", treatment = "AP1903")
flank_expt_cell_num <- 374000
flank_red_pos_cutoff <- 3e3
f280 <- rbind(f280_g718a[1:flank_expt_cell_num,], f280_g747a[1:flank_expt_cell_num,], f280_g718a_ap1903[1:flank_expt_cell_num,], f280_g747a_ap1903[1:flank_expt_cell_num,])
#ggplot() + theme_bw() + scale_x_log10(limits = c(1e1,1e5), expand = c(0,0)) + scale_y_log10(limits = c(1e1,1e5), expand = c(0,0)) + geom_vline(xintercept = flank_red_pos_cutoff, linetype = 2) + geom_point(data = f280, aes(x = YL2.A, y = BL1.A), alpha = 0.1) + facet_grid(rows = vars(sample))
f280$ratio <- f280$BL1.A / f280$YL2.A
f280_subset <- f280 %>% filter(YL2.A >= flank_red_pos_cutoff)
f280_control_95pct_interval <- c(quantile((f280_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f280_subset %>% filter(sample == "Control"))$ratio,0.975))
f280_subset$label <- paste0(f280_subset$sample,"_",f280_subset$treatment)
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f280_subset, aes(x = ratio), binwidth = 0.01) + facet_wrap(~label, scales = "free_y") + geom_vline(xintercept = f280_control_95pct_interval, linetype = 2)
f280_subset_summary1 <- f280_subset %>% group_by(label) %>% summarize(unexcised = sum(ratio > f280_control_95pct_interval))
f280_subset_summary2 <- f280_subset %>% group_by(label) %>% count()
f280_subset_summary <- merge(f280_subset_summary1, f280_subset_summary2, by = "label")
f280_subset_summary$frac_unexcised <- f280_subset_summary$unexcised / f280_subset_summary$n
f280_subset_summary$rep <- "F280"
## F281
f281_g718a <- read.csv(file = "data/flow/Flanking/F281/F281_G718A_unselected_A1.csv.gz") %>% mutate("flow" = "F281", sample = "Flanked", treatment = "none")
f281_g747a <- read.csv(file = "data/flow/Flanking/F281/F281_G747A_unselected_A2.csv.gz") %>% mutate("flow" = "F281", sample = "Control", treatment = "none")
f281_g718a_ap1903 <- read.csv(file = "data/flow/Flanking/F281/F281_G718A_AP1903_B1.csv.gz") %>% mutate("flow" = "F281", sample = "Flanked", treatment = "AP1903")
f281_g747a_ap1903 <- read.csv(file = "data/flow/Flanking/F281/F281_G747A_AP1903_B2.csv.gz") %>% mutate("flow" = "F281", sample = "Control", treatment = "AP1903")
flank_expt_cell_num <- 287000
flank_red_pos_cutoff <- 3e3
f281 <- rbind(f281_g718a[1:flank_expt_cell_num,], f281_g747a[1:flank_expt_cell_num,], f281_g718a_ap1903[1:flank_expt_cell_num,], f281_g747a_ap1903[1:flank_expt_cell_num,])
f281$ratio <- f281$BL1.A / f281$YL2.A
f281_subset <- f281 %>% filter(YL2.A >= flank_red_pos_cutoff)
f281_control_95pct_interval <- c(quantile((f281_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f281_subset %>% filter(sample == "Control"))$ratio,0.975))
f281_subset$label <- paste0(f281_subset$sample,"_",f281_subset$treatment)
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f281_subset, aes(x = ratio), binwidth = 0.01) + facet_wrap(~label, scales = "free_y") + geom_vline(xintercept = f281_control_95pct_interval, linetype = 2)
f281_subset_summary1 <- f281_subset %>% group_by(label) %>% summarize(unexcised = sum(ratio > f281_control_95pct_interval))
f281_subset_summary2 <- f281_subset %>% group_by(label) %>% count()
f281_subset_summary <- merge(f281_subset_summary1, f281_subset_summary2, by = "label")
f281_subset_summary$frac_unexcised <- f281_subset_summary$unexcised / f281_subset_summary$n
f281_subset_summary$rep <- "F281"
## F281b
f281b_g718a <- read.csv(file = "data/flow/Flanking/F281/F281b_G718A_unselected_A3.csv.gz") %>% mutate("flow" = "F281b", sample = "Flanked", treatment = "none")
f281b_g747a <- read.csv(file = "data/flow/Flanking/F281/F281b_G747A_unselected_A4.csv.gz") %>% mutate("flow" = "F281b", sample = "Control", treatment = "none")
f281b_g718a_ap1903 <- read.csv(file = "data/flow/Flanking/F281/F281b_G718A_AP1903_B3.csv.gz") %>% mutate("flow" = "F281b", sample = "Flanked", treatment = "AP1903")
f281b_g747a_ap1903 <- read.csv(file = "data/flow/Flanking/F281/F281b_G747A_AP1903_B4.csv.gz") %>% mutate("flow" = "F281b", sample = "Control", treatment = "AP1903")
flank_expt_cell_num <- 157000
flank_red_pos_cutoff <- 3e3
f281b <- rbind(f281b_g718a[1:flank_expt_cell_num,], f281b_g747a[1:flank_expt_cell_num,], f281b_g718a_ap1903[1:flank_expt_cell_num,], f281b_g747a_ap1903[1:flank_expt_cell_num,])
f281b$ratio <- f281b$BL1.A / f281b$YL2.A
f281b_subset <- f281b %>% filter(YL2.A >= flank_red_pos_cutoff)
f281b_control_95pct_interval <- c(quantile((f281b_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f281b_subset %>% filter(sample == "Control"))$ratio,0.975))
f281b_subset$label <- paste0(f281b_subset$sample,"_",f281b_subset$treatment)
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f281b_subset, aes(x = ratio), binwidth = 0.01) + facet_wrap(~label, scales = "free_y") + geom_vline(xintercept = f281b_control_95pct_interval, linetype = 2)
f281b_subset_summary1 <- f281b_subset %>% group_by(label) %>% summarize(unexcised = sum(ratio > f281b_control_95pct_interval))
f281b_subset_summary2 <- f281b_subset %>% group_by(label) %>% count()
f281b_subset_summary <- merge(f281b_subset_summary1, f281b_subset_summary2, by = "label")
f281b_subset_summary$frac_unexcised <- f281b_subset_summary$unexcised / f281b_subset_summary$n
f281b_subset_summary$rep <- "F281b"
## F282
f282_g718a <- read.csv(file = "data/flow/Flanking/F282/F282_G718A_unselected_A1.csv.gz") %>% mutate("flow" = "F282", sample = "Flanked", treatment = "none")
f282_g747a <- read.csv(file = "data/flow/Flanking/F282/F282_G747A_unselected_A2.csv.gz") %>% mutate("flow" = "F282", sample = "Control", treatment = "none")
f282_g718a_ap1903 <- read.csv(file = "data/flow/Flanking/F282/F282_G718A_AP1903_B1.csv.gz") %>% mutate("flow" = "F282", sample = "Flanked", treatment = "AP1903")
f282_g747a_ap1903 <- read.csv(file = "data/flow/Flanking/F282/F282_G747A_AP1903_B2.csv.gz") %>% mutate("flow" = "F282", sample = "Control", treatment = "AP1903")
flank_expt_cell_num <- 190000
flank_red_pos_cutoff <- 3e3
f282 <- rbind(f282_g718a[1:flank_expt_cell_num,], f282_g747a[1:flank_expt_cell_num,], f282_g718a_ap1903[1:flank_expt_cell_num,], f282_g747a_ap1903[1:flank_expt_cell_num,])
f282$ratio <- f282$BL1.A / f282$YL2.A
f282_subset <- f282 %>% filter(YL2.A >= flank_red_pos_cutoff)
f282_control_95pct_interval <- c(quantile((f282_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f282_subset %>% filter(sample == "Control"))$ratio,0.975))
f282_subset$label <- paste0(f282_subset$sample,"_",f282_subset$treatment)
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f282_subset, aes(x = ratio), binwidth = 0.01) + facet_wrap(~label, scales = "free_y") + geom_vline(xintercept = f282_control_95pct_interval, linetype = 2)
f282_subset_summary1 <- f282_subset %>% group_by(label) %>% summarize(unexcised = sum(ratio > f282_control_95pct_interval))
f282_subset_summary2 <- f282_subset %>% group_by(label) %>% count()
f282_subset_summary <- merge(f282_subset_summary1, f282_subset_summary2, by = "label")
f282_subset_summary$frac_unexcised <- f282_subset_summary$unexcised / f282_subset_summary$n
f282_subset_summary$rep <- "F282"
## F282b
f282b_g718a <- read.csv(file = "data/flow/Flanking/F282/F282b_G718A_unselected_A3.csv.gz") %>% mutate("flow" = "F282b", sample = "Flanked", treatment = "none")
f282b_g747a <- read.csv(file = "data/flow/Flanking/F282/F282b_G747A_unselected_A4.csv.gz") %>% mutate("flow" = "F282b", sample = "Control", treatment = "none")
f282b_g718a_ap1903 <- read.csv(file = "data/flow/Flanking/F282/F282b_G718A_AP1903_B3.csv.gz") %>% mutate("flow" = "F282b", sample = "Flanked", treatment = "AP1903")
f282b_g747a_ap1903 <- read.csv(file = "data/flow/Flanking/F282/F282b_G747A_AP1903_B4.csv.gz") %>% mutate("flow" = "F282b", sample = "Control", treatment = "AP1903")
flank_expt_cell_num <- 93000
flank_red_pos_cutoff <- 3e3
f282b <- rbind(f282b_g718a[1:flank_expt_cell_num,], f282b_g747a[1:flank_expt_cell_num,], f282b_g718a_ap1903[1:flank_expt_cell_num,], f282b_g747a_ap1903[1:flank_expt_cell_num,])
f282b$ratio <- f282b$BL1.A / f282b$YL2.A
f282b_subset <- f282b %>% filter(YL2.A >= flank_red_pos_cutoff)
f282b_control_95pct_interval <- c(quantile((f282b_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f282b_subset %>% filter(sample == "Control"))$ratio,0.975))
f282b_subset$label <- paste0(f282b_subset$sample,"_",f282b_subset$treatment)
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f282b_subset, aes(x = ratio), binwidth = 0.01) + facet_wrap(~label, scales = "free_y") + geom_vline(xintercept = f282b_control_95pct_interval, linetype = 2)
f282b_subset_summary1 <- f282b_subset %>% group_by(label) %>% summarize(unexcised = sum(ratio > f282b_control_95pct_interval))
f282b_subset_summary2 <- f282b_subset %>% group_by(label) %>% count()
f282b_subset_summary <- merge(f282b_subset_summary1, f282b_subset_summary2, by = "label")
f282b_subset_summary$frac_unexcised <- f282b_subset_summary$unexcised / f282b_subset_summary$n
f282b_subset_summary$rep <- "F282b"
## F283
f283_g718a <- read.csv(file = "data/flow/Flanking/F283/F283_G718A_unselected_A1.csv.gz") %>% mutate("flow" = "F283", sample = "Flanked", treatment = "none")
f283_g747a <- read.csv(file = "data/flow/Flanking/F283/F283_G747A_unselected_A2.csv.gz") %>% mutate("flow" = "F283", sample = "Control", treatment = "none")
f283_g718a_ap1903 <- read.csv(file = "data/flow/Flanking/F283/F283_G718A_AP1903_B1.csv.gz") %>% mutate("flow" = "F283", sample = "Flanked", treatment = "AP1903")
f283_g747a_ap1903 <- read.csv(file = "data/flow/Flanking/F283/F283_G747A_AP1903_B2.csv.gz") %>% mutate("flow" = "F283", sample = "Control", treatment = "AP1903")
f283_g718a_hygro <- read.csv(file = "data/flow/Flanking/F283/F283_G718A_Hygro_C1.csv.gz") %>% mutate("flow" = "F283b", sample = "Flanked", treatment = "Hygro")
f283_g747a_hygro <- read.csv(file = "data/flow/Flanking/F283/F283_G747A_Hygro_C2.csv.gz") %>% mutate("flow" = "F283b", sample = "Control", treatment = "Hygro")
flank_expt_cell_num <- 12000
flank_red_pos_cutoff <- 3e3
f283 <- rbind(f283_g718a[1:flank_expt_cell_num,], f283_g747a[1:flank_expt_cell_num,], f283_g718a_ap1903[1:flank_expt_cell_num,], f283_g747a_ap1903[1:flank_expt_cell_num,], f283_g718a_hygro[1:flank_expt_cell_num,], f283_g747a_hygro[1:flank_expt_cell_num,])
f283$ratio <- f283$BL1.A / f283$YL2.A
f283_subset <- f283 %>% filter(YL2.A >= flank_red_pos_cutoff)
f283_control_95pct_interval <- c(quantile((f283_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f283_subset %>% filter(sample == "Control"))$ratio,0.975))
f283_subset$label <- paste0(f283_subset$sample,"_",f283_subset$treatment)
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f283_subset, aes(x = ratio), binwidth = 0.01) + facet_wrap(~label, scales = "free_y") + geom_vline(xintercept = f283_control_95pct_interval, linetype = 2)
f283_subset_summary1 <- f283_subset %>% group_by(label) %>% summarize(unexcised = sum(ratio > f283_control_95pct_interval))
f283_subset_summary2 <- f283_subset %>% group_by(label) %>% count()
f283_subset_summary <- merge(f283_subset_summary1, f283_subset_summary2, by = "label")
f283_subset_summary$frac_unexcised <- f283_subset_summary$unexcised / f283_subset_summary$n
f283_subset_summary$rep <- "F283"
## F283b
f283b_g718a <- read.csv(file = "data/flow/Flanking/F283/F283b_G718A_unselected_A3.csv.gz") %>% mutate("flow" = "F283b", sample = "Flanked", treatment = "none")
f283b_g747a <- read.csv(file = "data/flow/Flanking/F283/F283b_G747A_unselected_A4.csv.gz") %>% mutate("flow" = "F283b", sample = "Control", treatment = "none")
f283b_g718a_ap1903 <- read.csv(file = "data/flow/Flanking/F283/F283b_G718A_AP1903_B3.csv.gz") %>% mutate("flow" = "F283b", sample = "Flanked", treatment = "AP1903")
f283b_g747a_ap1903 <- read.csv(file = "data/flow/Flanking/F283/F283b_G747A_AP1903_B4.csv.gz") %>% mutate("flow" = "F283b", sample = "Control", treatment = "AP1903")
flank_expt_cell_num <- 83000
flank_red_pos_cutoff <- 3e3
f283b <- rbind(f283b_g718a[1:flank_expt_cell_num,], f283b_g747a[1:flank_expt_cell_num,], f283b_g718a_ap1903[1:flank_expt_cell_num,], f283b_g747a_ap1903[1:flank_expt_cell_num,])
f283b$ratio <- f283b$BL1.A / f283b$YL2.A
f283b_subset <- f283b %>% filter(YL2.A >= flank_red_pos_cutoff)
f283b_control_95pct_interval <- c(quantile((f283b_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f283b_subset %>% filter(sample == "Control"))$ratio,0.975))
f283b_subset$label <- paste0(f283b_subset$sample,"_",f283b_subset$treatment)
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f283b_subset, aes(x = ratio), binwidth = 0.01) + facet_wrap(~label, scales = "free_y") + geom_vline(xintercept = f283b_control_95pct_interval, linetype = 2)
f283b_subset_summary1 <- f283b_subset %>% group_by(label) %>% summarize(unexcised = sum(ratio > f283b_control_95pct_interval))
f283b_subset_summary2 <- f283b_subset %>% group_by(label) %>% count()
f283b_subset_summary <- merge(f283b_subset_summary1, f283b_subset_summary2, by = "label")
f283b_subset_summary$frac_unexcised <- f283b_subset_summary$unexcised / f283b_subset_summary$n
f283b_subset_summary$rep <- "F283b"
## F284
f284_g718a_ap1903_hygro <- read.csv(file = "data/flow/Flanking/F284/F284_G718A_AP1903_Hygro_A1.csv.gz") %>% mutate("flow" = "F284", sample = "Flanked", treatment = "AP1903_Hygro")
f284_g747a_ap1903_hygro <- read.csv(file = "data/flow/Flanking/F284/F284_G747A_AP1903_Hygro_A2.csv.gz") %>% mutate("flow" = "F284", sample = "Control", treatment = "AP1903_Hygro")
f284_g718a_hygro <- read.csv(file = "data/flow/Flanking/F284/F284_G718A_Hygro_B1.csv.gz") %>% mutate("flow" = "F284", sample = "Flanked", treatment = "Hygro")
f284_g747a_hygro <- read.csv(file = "data/flow/Flanking/F284/F284_G747A_Hygro_B2.csv.gz") %>% mutate("flow" = "F284", sample = "Control", treatment = "Hygro")
flank_expt_cell_num <- 51000
flank_red_pos_cutoff <- 3e3
f284 <- rbind(f284_g718a_ap1903_hygro[1:flank_expt_cell_num,], f284_g747a_ap1903_hygro[1:flank_expt_cell_num,], f284_g718a_hygro[1:flank_expt_cell_num,], f284_g747a_hygro[1:flank_expt_cell_num,])
f284$ratio <- f284$BL1.A / f284$YL2.A
f284_subset <- f284 %>% filter(YL2.A >= flank_red_pos_cutoff)
f284_control_95pct_interval <- c(quantile((f284_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f284_subset %>% filter(sample == "Control"))$ratio,0.975))
f284_subset$label <- paste0(f284_subset$sample,"_",f284_subset$treatment)
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f284_subset, aes(x = ratio), binwidth = 0.01) + facet_wrap(~label, scales = "free_y") + geom_vline(xintercept = f284_control_95pct_interval, linetype = 2)
f284_subset_summary1 <- f284_subset %>% group_by(label) %>% summarize(unexcised = sum(ratio > f284_control_95pct_interval))
f284_subset_summary2 <- f284_subset %>% group_by(label) %>% count()
f284_subset_summary <- merge(f284_subset_summary1, f284_subset_summary2, by = "label")
f284_subset_summary$frac_unexcised <- f284_subset_summary$unexcised / f284_subset_summary$n
f284_subset_summary$rep <- "F284"
## F284b
f284b_g718a_hygro <- read.csv(file = "data/flow/Flanking/F284/F284b_G718A_Hygro_B3.csv.gz") %>% mutate("flow" = "F284b", sample = "Flanked", treatment = "Hygro")
f284b_g747a_hygro <- read.csv(file = "data/flow/Flanking/F284/F284b_G747A_Hygro_B4.csv.gz") %>% mutate("flow" = "F284b", sample = "Control", treatment = "Hygro")
f284b_g718a_ap1903_hygro <- read.csv(file = "data/flow/Flanking/F284/F284b_G718A_AP1903_Hygro_A3.csv.gz") %>% mutate("flow" = "F284b", sample = "Flanked", treatment = "AP1903_Hygro")
f284b_g747a_ap1903_hygro <- read.csv(file = "data/flow/Flanking/F284/F284b_G747A_AP1903_Hygro_A4.csv.gz") %>% mutate("flow" = "F284b", sample = "Control", treatment = "AP1903_Hygro")
flank_expt_cell_num <- 35000
flank_red_pos_cutoff <- 3e3
f284b <- rbind(f284b_g718a_hygro[1:flank_expt_cell_num,], f284b_g747a_hygro[1:flank_expt_cell_num,], f284b_g718a_ap1903_hygro[1:flank_expt_cell_num,], f284b_g747a_ap1903_hygro[1:flank_expt_cell_num,])
f284b$ratio <- f284b$BL1.A / f284b$YL2.A
f284b_subset <- f284b %>% filter(YL2.A >= flank_red_pos_cutoff)
f284b_control_95pct_interval <- c(quantile((f284b_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f284b_subset %>% filter(sample == "Control"))$ratio,0.975))
f284b_subset$label <- paste0(f284b_subset$sample,"_",f284b_subset$treatment)
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f284b_subset, aes(x = ratio), binwidth = 0.01) + facet_wrap(~label, scales = "free_y") + geom_vline(xintercept = f284b_control_95pct_interval, linetype = 2)
f284b_subset_summary1 <- f284b_subset %>% group_by(label) %>% summarize(unexcised = sum(ratio > f284b_control_95pct_interval))
f284b_subset_summary2 <- f284b_subset %>% group_by(label) %>% count()
f284b_subset_summary <- merge(f284b_subset_summary1, f284b_subset_summary2, by = "label")
f284b_subset_summary$frac_unexcised <- f284b_subset_summary$unexcised / f284b_subset_summary$n
f284b_subset_summary$rep <- "F284b"
## F285
f285_g718a_hygro <- read.csv(file = "data/flow/Flanking/F285/F285_G718A_Hygro_A3.csv.gz") %>% mutate("flow" = "F285", sample = "Flanked", treatment = "Hygro")
f285_g747a_hygro <- read.csv(file = "data/flow/Flanking/F285/F285_G747A_Hygro_A4.csv.gz") %>% mutate("flow" = "F285", sample = "Control", treatment = "Hygro")
flank_expt_cell_num <- 235000
flank_red_pos_cutoff <- 3e3
f285 <- rbind(f285_g718a_hygro[1:flank_expt_cell_num,], f285_g747a_hygro[1:flank_expt_cell_num,])
f285$ratio <- f285$BL1.A / f285$YL2.A
f285_subset <- f285 %>% filter(YL2.A >= flank_red_pos_cutoff)
f285_control_95pct_interval <- c(quantile((f285_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f285_subset %>% filter(sample == "Control"))$ratio,0.975))
f285_subset$label <- paste0(f285_subset$sample,"_",f285_subset$treatment)
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f285_subset, aes(x = ratio), binwidth = 0.01) + facet_wrap(~label, scales = "free_y") + geom_vline(xintercept = f285_control_95pct_interval, linetype = 2)
f285_subset_summary1 <- f285_subset %>% group_by(label) %>% summarize(unexcised = sum(ratio > f285_control_95pct_interval))
f285_subset_summary2 <- f285_subset %>% group_by(label) %>% count()
f285_subset_summary <- merge(f285_subset_summary1, f285_subset_summary2, by = "label")
f285_subset_summary$frac_unexcised <- f285_subset_summary$unexcised / f285_subset_summary$n
f285_subset_summary$rep <- "F285"
## F286
f286_g718a_hygro <- read.csv(file = "data/flow/Flanking/F286/F286_G718A_Hygro_B1.csv.gz") %>% mutate("flow" = "F286", sample = "Flanked", treatment = "Hygro")
f286_g747a_hygro <- read.csv(file = "data/flow/Flanking/F286/F286_G747A_Hygro_B2.csv.gz") %>% mutate("flow" = "F286", sample = "Control", treatment = "Hygro")
flank_expt_cell_num <- 88000
flank_red_pos_cutoff <- 3e3
f286 <- rbind(f286_g718a_hygro[1:flank_expt_cell_num,], f286_g747a_hygro[1:flank_expt_cell_num,])
f286$ratio <- f286$BL1.A / f286$YL2.A
f286_subset <- f286 %>% filter(YL2.A >= flank_red_pos_cutoff)
f286_control_95pct_interval <- c(quantile((f286_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f286_subset %>% filter(sample == "Control"))$ratio,0.975))
f286_subset$label <- paste0(f286_subset$sample,"_",f286_subset$treatment)
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f286_subset, aes(x = ratio), binwidth = 0.01) + facet_wrap(~label, scales = "free_y") + geom_vline(xintercept = f286_control_95pct_interval, linetype = 2)
f286_subset_summary1 <- f286_subset %>% group_by(label) %>% summarize(unexcised = sum(ratio > f286_control_95pct_interval))
f286_subset_summary2 <- f286_subset %>% group_by(label) %>% count()
f286_subset_summary <- merge(f286_subset_summary1, f286_subset_summary2, by = "label")
f286_subset_summary$frac_unexcised <- f286_subset_summary$unexcised / f286_subset_summary$n
f286_subset_summary$rep <- "F286"
## F286b
f286b_g718a_hygro <- read.csv(file = "data/flow/Flanking/F286/F286b_G718A_Hygro_B3.csv.gz") %>% mutate("flow" = "F286b", sample = "Flanked", treatment = "Hygro")
f286b_g747a_hygro <- read.csv(file = "data/flow/Flanking/F286/F286b_G747A_Hygro_B4.csv.gz") %>% mutate("flow" = "F286b", sample = "Control", treatment = "Hygro")
f286b_g718a_ap1903_hygro <- read.csv(file = "data/flow/Flanking/F286/F286b_G718A_AP1903_Hygro_A1.csv.gz") %>% mutate("flow" = "F286b", sample = "Flanked", treatment = "AP1903_Hygro")
f286b_g747a_ap1903_hygro <- read.csv(file = "data/flow/Flanking/F286/F286b_G747A_AP1903_Hygro_A2.csv.gz") %>% mutate("flow" = "F286b", sample = "Control", treatment = "AP1903_Hygro")
flank_expt_cell_num <- 51000
flank_red_pos_cutoff <- 3e3
f286b <- rbind(f286b_g718a_hygro[1:flank_expt_cell_num,], f286b_g747a_hygro[1:flank_expt_cell_num,], f286b_g718a_ap1903_hygro[1:flank_expt_cell_num,], f286b_g747a_ap1903_hygro[1:flank_expt_cell_num,])
f286b$ratio <- f286b$BL1.A / f286b$YL2.A
f286b_subset <- f286b %>% filter(YL2.A >= flank_red_pos_cutoff)
f286b_control_95pct_interval <- c(quantile((f286b_subset %>% filter(sample == "Control"))$ratio,0.05),quantile((f286b_subset %>% filter(sample == "Control"))$ratio,0.975))
f286b_subset$label <- paste0(f286b_subset$sample,"_",f286b_subset$treatment)
#ggplot() + theme_bw() + scale_x_continuous(limits = c(-0.05, 0.5)) + geom_histogram(data = f286b_subset, aes(x = ratio), binwidth = 0.01) + facet_wrap(~label, scales = "free_y") + geom_vline(xintercept = f286b_control_95pct_interval, linetype = 2)
f286b_subset_summary1 <- f286b_subset %>% group_by(label) %>% summarize(unexcised = sum(ratio > f286b_control_95pct_interval))
f286b_subset_summary2 <- f286b_subset %>% group_by(label) %>% count()
f286b_subset_summary <- merge(f286b_subset_summary1, f286b_subset_summary2, by = "label")
f286b_subset_summary$frac_unexcised <- f286b_subset_summary$unexcised / f286b_subset_summary$n
f286b_subset_summary$rep <- "F286b"
## Now combining everything
flank_ap1903_summary <- rbind(f280_subset_summary, f281_subset_summary, f281b_subset_summary, f282_subset_summary, f282b_subset_summary, f283_subset_summary, f283b_subset_summary, f284_subset_summary, f284b_subset_summary, f285_subset_summary, f286_subset_summary, f286b_subset_summary)
flank_ap1903_summary2 <- flank_ap1903_summary %>% group_by(label) %>% summarize(geomean = 10^mean(log10(frac_unexcised)))
flank_ap1903_summary$label <- factor(flank_ap1903_summary$label, levels = c("Control_none", "Control_AP1903", "Control_Hygro", "Control_AP1903_Hygro", "Flanked_none", "Flanked_AP1903", "Flanked_Hygro", "Flanked_AP1903_Hygro"))
flank_ap1903_summary2$label <- factor(flank_ap1903_summary2$label, levels = c("Control_none", "Control_AP1903", "Control_Hygro", "Control_AP1903_Hygro", "Flanked_none", "Flanked_AP1903", "Flanked_Hygro", "Flanked_AP1903_Hygro"))
Flanking_AP1903_plot <- ggplot() + theme(panel.grid.major.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
scale_y_log10(breaks = c(0.01,0.1,1)) +
labs(x = NULL, y = "Fraction of cells\nunexcised") +
geom_beeswarm(data = flank_ap1903_summary, aes(x = label, y = frac_unexcised), color = "red", alpha = 0.5, size = 1) +
geom_point(data = flank_ap1903_summary2, aes(x = label, y = geomean), shape = 95, size = 8, color = "Black", alpha = 0.4) +
NULL
ggsave(file = "plots/Flanking_AP1903_plot.pdf", Flanking_AP1903_plot, height = 2.25, width = 2.25)
Flanking_AP1903_plot
```
```{r}
## To respond to reviewer comments, compare the fraction of red cells in the control and flanked transfections.
ggplot() + scale_x_log10() +
geom_histogram(data = f131_example, aes(x = YL2.A)) + facet_grid(rows = vars(sample))
flanked_unselected_recomb_rates <- data.frame(rbind(
c("F130",nrow(subset(f130_g718a, YL2.A > 1000)) / nrow(f130_g718a) * 100,nrow(subset(f130_g747a, YL2.A > 1000)) / nrow(f130_g747a) * 100),
c("F131",nrow(subset(f131_g718a, YL2.A > 1000)) / nrow(f131_g718a) * 100,nrow(subset(f131_g747a, YL2.A > 1000)) / nrow(f131_g747a) * 100),
c("F132",nrow(subset(f132_g718a, YL2.A > 1000)) / nrow(f132_g718a) * 100,nrow(subset(f132_g747a, YL2.A > 1000)) / nrow(f132_g747a) * 100),
c("F280",nrow(subset(f280_g718a, YL2.A > 1000)) / nrow(f280_g718a) * 100,nrow(subset(f280_g747a, YL2.A > 1000)) / nrow(f280_g747a) * 100),
c("F282",nrow(subset(f282_g718a, YL2.A > 1000)) / nrow(f282_g718a) * 100,nrow(subset(f282_g747a, YL2.A > 1000)) / nrow(f282_g747a) * 100),
c("F283",nrow(subset(f283_g718a, YL2.A > 1000)) / nrow(f283_g718a) * 100,nrow(subset(f283_g747a, YL2.A > 1000)) / nrow(f283_g747a) * 100)))
colnames(flanked_unselected_recomb_rates) <- c("sample","flanked_rate","control_rate")
flanked_unselected_recomb_rates$flanked_rate <- as.numeric(flanked_unselected_recomb_rates$flanked_rate)
flanked_unselected_recomb_rates$control_rate <- as.numeric(flanked_unselected_recomb_rates$control_rate)
flanked_unselected_recomb_rates$fold_diff <- 0
flanked_unselected_recomb_rates$fold_diff <- flanked_unselected_recomb_rates$control_rate / flanked_unselected_recomb_rates$flanked_rate
mean(c(flanked_unselected_recomb_rates$flanked_rate,flanked_unselected_recomb_rates$control_rate))
##
control_red_not_green <- data.frame(rbind(
c("F130",nrow(subset(f130_subset, sample == "Control" & YL2.A > 1000 & ratio < quantile((f130_subset %>% filter(sample == "Flanked"))$ratio,0.95)))/nrow(subset(f130_subset, sample == "Control" & YL2.A > 1000))),
c("F131",nrow(subset(f131_subset, sample == "Control" & YL2.A > 1000 & ratio < quantile((f131_subset %>% filter(sample == "Flanked"))$ratio,0.95)))/nrow(subset(f131_subset, sample == "Control" & YL2.A > 1000))),
c("F132",nrow(subset(f132_subset, sample == "Control" & YL2.A > 1000 & ratio < quantile((f132_subset %>% filter(sample == "Flanked"))$ratio,0.95)))/nrow(subset(f132_subset, sample == "Control" & YL2.A > 1000))),
c("F280",nrow(subset(f280_subset, sample == "Control" & YL2.A > 1000 & ratio < quantile((f280_subset %>% filter(sample == "Flanked"))$ratio,0.95)))/nrow(subset(f280_subset, sample == "Control" & YL2.A > 1000))),
c("F282",nrow(subset(f282_subset, sample == "Control" & YL2.A > 1000 & ratio < quantile((f282_subset %>% filter(sample == "Flanked"))$ratio,0.95)))/nrow(subset(f282_subset, sample == "Control" & YL2.A > 1000))),
c("F283",nrow(subset(f283_subset, sample == "Control" & YL2.A > 1000 & ratio < quantile((f283_subset %>% filter(sample == "Flanked"))$ratio,0.95)))/nrow(subset(f283_subset, sample == "Control" & YL2.A > 1000)))
))
colnames(control_red_not_green) <- c("sample","control_redpos_grnneg")
control_red_not_green$control_redpos_grnneg <- as.numeric(control_red_not_green$control_redpos_grnneg)
10^mean(log10(control_red_not_green$control_redpos_grnneg))
```
```{r Flanking sequencing data}
flanked_seq <- read.csv(file = "data/Flanked_sequencing.csv", header = T, stringsAsFactors = F)
flanked_seq_geomean <- flanked_seq %>% group_by(sample, result) %>% summarize(geomean_fraction = 10^(mean(log10(fraction))))
Flanking_sequencing <- ggplot() + theme(panel.grid.major.x = element_blank(), legend.position = "top") +
labs(x = NULL, y = "Fraction of reads") +
geom_point(data = flanked_seq, aes(x = sample, y = fraction, color = result), position = position_dodge(width = 0.5), alpha = 0.4) +
geom_point(data = flanked_seq_geomean, aes(x = sample, y = geomean_fraction, color = result), position = position_dodge(width = 0.5), shape = 95, size = 10)
ggsave(file = "plots/Flanking_sequencing.pdf", Flanking_sequencing, height = 2, width = 2.2)
Flanking_sequencing
## Reviewer 2 asked how many reads underlied this figure. To answer this question, I created the table below.
flanked_read_counts <- flanked_seq %>% group_by(replicate, plasmid) %>% summarize(reads_sum = sum(reads))
print(flanked_read_counts$reads_sum)
```
## Creating and initially testing the double landing pad
## This is relevant to Figure 3
```{r Initial validation data for G542Ac3 and G783Ac2 cells}
c2_orig_none_raw <- read.csv(file = "data/flow/201002_F69_G783A_Clone_Comparisons/Clone2_none.csv.gz")
c2_orig_none <- c2_orig_none_raw[,c("FSC.A","SSC.A","VL1.A","BL1.A","YL2.A","RL1.A")]; colnames(c2_orig_none) <- c("fsc","ssc","blu","grn","red","nir")
c2_orig_none$clone <- "c2"; c2_orig_none$treatment <- "none"
c2_orig_recomb_raw <- read.csv(file = "data/flow/201002_F69_G783A_Clone_Comparisons/Clone2_recomb.csv.gz")
c2_orig_recomb <- c2_orig_recomb_raw[,c("FSC.A","SSC.A","VL1.A","BL1.A","YL2.A","RL1.A")]; colnames(c2_orig_recomb) <- c("fsc","ssc","blu","grn","red","nir")
c2_orig_recomb$clone <- "c2"; c2_orig_recomb$treatment <- "recomb"
c2_orig_ap1903_raw <- read.csv(file = "data/flow/201002_F69_G783A_Clone_Comparisons/Clone2_ap1903.csv.gz")
c2_orig_ap1903 <- c2_orig_ap1903_raw[,c("FSC.A","SSC.A","VL1.A","BL1.A","YL2.A","RL1.A")]; colnames(c2_orig_ap1903) <- c("fsc","ssc","blu","grn","red","nir")
c2_orig_ap1903$clone <- "c2"; c2_orig_ap1903$treatment <- "ap1903"
cell_number <- 2000
combined_data <- rbind(c2_orig_none[1:cell_number,], c2_orig_recomb[1:cell_number,], c2_orig_ap1903[1:cell_number,])
combined_data$treatment <- factor(combined_data$treatment, levels = c("recomb","none","ap1903"))
plot_alpha <- 0.2
axis_limits <- c(10,1e6)
custom_color_scale <- c("none" = "magenta", "recomb" = "black", "ap1903" = "cyan")
c2_orig_bn_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = blu, y = nir, color = treatment), alpha = plot_alpha)
c2_orig_bg_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = blu, y = grn, color = treatment), alpha = plot_alpha)
c2_orig_br_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = blu, y = red, color = treatment), alpha = plot_alpha)
c2_orig_ng_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = nir, y = grn, color = treatment), alpha = plot_alpha)
c2_orig_nr_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = nir, y = red, color = treatment), alpha = plot_alpha)
c2_orig_gr_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = grn, y = red, color = treatment), alpha = plot_alpha) +
geom_rect(mapping = aes(xmin = 3e4, xmax = 1e6, ymin = 3e4, ymax = 1e6), alpha = 0, color = "black", linetype = 2)
c2_orig_gr_plot
c2_orig_arranged_plot <- grid.arrange(c2_orig_bn_plot, c2_orig_bg_plot, c2_orig_br_plot, c2_orig_ng_plot, c2_orig_nr_plot, c2_orig_gr_plot, ncol=6, nrow=1)
ggsave(file = "plots/201002/c2_orig_arranged_plot.png", c2_orig_arranged_plot, height = 3, width = 18)
paste("Percent double positive before selection:", round(sum(c2_orig_recomb$grn >= 3e4 & c2_orig_recomb$red >= 3e4) / nrow(c2_orig_recomb) * 100,2))
paste("Percent double positive after selection:", round(sum(c2_orig_ap1903$grn >= 3e4 & c2_orig_ap1903$red >= 3e4) / nrow(c2_orig_ap1903) * 100,2))
```
```{r Initial validation data for G542Ac3 and G783Ac6 cells}
c6_orig_none_raw <- read.csv(file = "data/flow/201002_F69_G783A_Clone_Comparisons/Clone6_none.csv.gz")
c6_orig_none <- c6_orig_none_raw[,c("FSC.A","SSC.A","VL1.A","BL1.A","YL2.A","RL1.A")]; colnames(c6_orig_none) <- c("fsc","ssc","blu","grn","red","nir")
c6_orig_none$clone <- "c6"; c6_orig_none$treatment <- "none"
c6_orig_recomb_raw <- read.csv(file = "data/flow/201002_F69_G783A_Clone_Comparisons/Clone6_recomb.csv.gz")
c6_orig_recomb <- c6_orig_recomb_raw[,c("FSC.A","SSC.A","VL1.A","BL1.A","YL2.A","RL1.A")]; colnames(c6_orig_recomb) <- c("fsc","ssc","blu","grn","red","nir")
c6_orig_recomb$clone <- "c6"; c6_orig_recomb$treatment <- "recomb"
c6_orig_ap1903_raw <- read.csv(file = "data/flow/201002_F69_G783A_Clone_Comparisons/Clone6_ap1903.csv.gz")
c6_orig_ap1903 <- c6_orig_ap1903_raw[,c("FSC.A","SSC.A","VL1.A","BL1.A","YL2.A","RL1.A")]; colnames(c6_orig_ap1903) <- c("fsc","ssc","blu","grn","red","nir")
c6_orig_ap1903$clone <- "c6"; c6_orig_ap1903$treatment <- "ap1903"
cell_number <- 2000
combined_data <- rbind(c6_orig_none[1:cell_number,], c6_orig_recomb[1:cell_number,], c6_orig_ap1903[1:cell_number,])
combined_data$treatment <- factor(combined_data$treatment, levels = c("recomb","none","ap1903"))
plot_alpha <- 0.2
axis_limits <- c(10,1e6)
custom_color_scale <- c("none" = "magenta", "recomb" = "black", "ap1903" = "cyan")
c6_orig_bn_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = blu, y = nir, color = treatment), alpha = plot_alpha)
c6_orig_bg_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = blu, y = grn, color = treatment), alpha = plot_alpha)
c6_orig_br_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = blu, y = red, color = treatment), alpha = plot_alpha)
c6_orig_ng_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = nir, y = grn, color = treatment), alpha = plot_alpha)
c6_orig_nr_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = nir, y = red, color = treatment), alpha = plot_alpha)
c6_orig_gr_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = grn, y = red, color = treatment), alpha = plot_alpha) +
geom_rect(mapping = aes(xmin = 3e4, xmax = 1e6, ymin = 3e4, ymax = 1e6), alpha = 0, color = "black", linetype = 2)
c6_orig_gr_plot
c6_orig_arranged_plot <- grid.arrange(c6_orig_bn_plot, c6_orig_bg_plot, c6_orig_br_plot, c6_orig_ng_plot, c6_orig_nr_plot, c6_orig_gr_plot, ncol=6, nrow=1)
ggsave(file = "plots/201002/c6_orig_arranged_plot.png", c6_orig_arranged_plot, height = 3, width = 18)
paste("Percent double positive before selection:", round(sum(c6_orig_recomb$grn >= 3e4 & c6_orig_recomb$red >= 3e4) / nrow(c6_orig_recomb) * 100,2))
paste("Percent double positive after selection:", round(sum(c6_orig_ap1903$grn >= 3e4 & c6_orig_ap1903$red >= 3e4) / nrow(c6_orig_ap1903) * 100,2))
```
```{r Initial validation data for G542Ac3 and G783Ac11 cells}
c11_orig_none_raw <- read.csv(file = "data/flow/201002_F69_G783A_Clone_Comparisons/Clone11_none.csv.gz")
c11_orig_none <- c11_orig_none_raw[,c("FSC.A","SSC.A","VL1.A","BL1.A","YL2.A","RL1.A")]; colnames(c11_orig_none) <- c("fsc","ssc","blu","grn","red","nir")
c11_orig_none$clone <- "c11"; c11_orig_none$treatment <- "none"
c11_orig_recomb_raw <- read.csv(file = "data/flow/201002_F69_G783A_Clone_Comparisons/Clone11_recomb.csv.gz")
c11_orig_recomb <- c11_orig_recomb_raw[,c("FSC.A","SSC.A","VL1.A","BL1.A","YL2.A","RL1.A")]; colnames(c11_orig_recomb) <- c("fsc","ssc","blu","grn","red","nir")
c11_orig_recomb$clone <- "c11"; c11_orig_recomb$treatment <- "recomb"
c11_orig_ap1903_raw <- read.csv(file = "data/flow/201002_F69_G783A_Clone_Comparisons/Clone11_ap1903.csv.gz")
c11_orig_ap1903 <- c11_orig_ap1903_raw[,c("FSC.A","SSC.A","VL1.A","BL1.A","YL2.A","RL1.A")]; colnames(c11_orig_ap1903) <- c("fsc","ssc","blu","grn","red","nir")
c11_orig_ap1903$clone <- "c11"; c11_orig_ap1903$treatment <- "ap1903"
cell_number <- 2000
combined_data <- rbind(c11_orig_none[1:cell_number,], c11_orig_recomb[1:cell_number,], c11_orig_ap1903[1:cell_number,])
combined_data$treatment <- factor(combined_data$treatment, levels = c("recomb","none","ap1903"))
plot_alpha <- 0.2
axis_limits <- c(10,1e6)
custom_color_scale <- c("none" = "magenta", "recomb" = "black", "ap1903" = "cyan")
c11_orig_bn_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = blu, y = nir, color = treatment), alpha = plot_alpha)
c11_orig_bg_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = blu, y = grn, color = treatment), alpha = plot_alpha)
c11_orig_br_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = blu, y = red, color = treatment), alpha = plot_alpha)
c11_orig_ng_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = nir, y = grn, color = treatment), alpha = plot_alpha)
c11_orig_nr_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = nir, y = red, color = treatment), alpha = plot_alpha)
c11_orig_gr_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +
scale_x_log10(limits = axis_limits) + scale_y_log10(limits = axis_limits) +
geom_point(data = combined_data, aes(x = grn, y = red, color = treatment), alpha = plot_alpha) +
geom_rect(mapping = aes(xmin = 3e4, xmax = 1e6, ymin = 3e4, ymax = 1e6), alpha = 0, color = "black", linetype = 2)
c11_orig_gr_plot
c11_orig_arranged_plot <- grid.arrange(c11_orig_bn_plot, c11_orig_bg_plot, c11_orig_br_plot, c11_orig_ng_plot, c11_orig_nr_plot, c11_orig_gr_plot, ncol=6, nrow=1)
ggsave(file = "plots/201002/c11_orig_arranged_plot.png", c11_orig_arranged_plot, height = 3, width = 18)
paste("Percent double positive before selection:", round(sum(c11_orig_recomb$grn >= 3e4 & c11_orig_recomb$red >= 3e4) / nrow(c11_orig_recomb) * 100,2))
paste("Percent double positive after selection:", round(sum(c11_orig_ap1903$grn >= 3e4 & c11_orig_ap1903$red >= 3e4) / nrow(c11_orig_ap1903) * 100,2))
```
Below is now repeat data for testing these three clones to confirm which is best
```{r Initial validation data for G542Ac3 and G783Ac2 cells repeated}
c2_none_raw <- read.csv(file = "data/flow/201016_F74_G783A_Clone_Comparisons/Clone2_none.csv.gz")
c2_none <- c2_none_raw[,c("FSC.A","SSC.A","VL1.A","BL1.A","YL2.A","RL1.A")]; colnames(c2_none) <- c("fsc","ssc","blu","grn","red","nir")
c2_none$clone <- "c2"; c2_none$treatment <- "none"
c2_recomb_raw <- read.csv(file = "data/flow/201016_F74_G783A_Clone_Comparisons/Clone2_recomb.csv.gz")
c2_recomb <- c2_recomb_raw[,c("FSC.A","SSC.A","VL1.A","BL1.A","YL2.A","RL1.A")]; colnames(c2_recomb) <- c("fsc","ssc","blu","grn","red","nir")
c2_recomb$clone <- "c2"; c2_recomb$treatment <- "recomb"
c2_ap1903_raw <- read.csv(file = "data/flow/201016_F74_G783A_Clone_Comparisons/Clone2_ap1903.csv.gz")
c2_ap1903 <- c2_ap1903_raw[,c("FSC.A","SSC.A","VL1.A","BL1.A","YL2.A","RL1.A")]; colnames(c2_ap1903) <- c("fsc","ssc","blu","grn","red","nir")
c2_ap1903$clone <- "c2"; c2_ap1903$treatment <- "ap1903"
cell_number <- 2000
combined_data <- rbind(c2_none[1:cell_number,], c2_recomb[1:cell_number,], c2_ap1903[1:cell_number,])
combined_data$treatment <- factor(combined_data$treatment, levels = c("recomb","none","ap1903"))
plot_alpha <- 0.2
axis_limits <- c(10,1e6)
custom_color_scale <- c("none" = "magenta", "recomb" = "black", "ap1903" = "cyan")
c2_bn_plot <- ggplot() + theme_bw() + theme(panel.grid.minor = element_blank(), legend.position = "none") +
scale_color_manual(values = custom_color_scale) +