/
plot_roc.r
executable file
·151 lines (129 loc) · 5.87 KB
/
plot_roc.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env Rscript
# Plot ROC
require(plyr)
require(ggplot2)
require(pracma)
require(grid)
argv <- commandArgs(trailingOnly = TRUE)
prefix <- gsub("\\s","", argv[1])
print(prefix)
truthset <- argv[2]
print(truthset)
results <- argv[3]
print(results)
xmin <- as.numeric(argv[4])
xmax <- as.numeric(argv[5])
ymin <- as.numeric(argv[6])
ymax <- as.numeric(argv[7])
roc <- read.delim(results)
bests <- ddply(roc, .(set), function(x) { data.frame(best_snps=with(x, min(false_negative_snps + false_positive_snps)), best_snp_threshold=min(subset(x, (false_negative_snps + false_positive_snps) == with(x, min(false_negative_snps + false_positive_snps)))$threshold ), best_indels=with(x, min(false_negative_indels + false_positive_indels)), best_indel_threshold=min(subset(x, (false_negative_indels + false_positive_indels) == with(x, min(false_negative_indels + false_positive_indels)))$threshold )) })
write.table(bests, paste(prefix, ".bests.tsv", sep=""), row.names=FALSE, quote=FALSE, sep="\t")
#abs(trapz(c(1, roc$complexfpr), c(1, roc$complextpr)))
true_snps <- with(subset(roc, set==truthset), max(num_snps))
true_indels <- with(subset(roc, set==truthset), max(num_indels))
# get ROC AUC
auc <- ddply(roc, .(set),
function(x) {
data.frame(
snp_auc=ifelse(true_snps>0,
with(x,
abs(trapz(c(1,
false_positive_snps/(false_positive_snps+ max(false_negative_snps + num_snps - false_positive_snps))),
c(max(1- false_negative_snps/true_snps),
1- false_negative_snps/true_snps)))),
0),
indel_auc=ifelse(true_indels>0,
with(x,
abs(trapz(c(1,
false_positive_indels/(false_positive_indels+ max(false_negative_indels + num_indels - false_positive_indels))),
c(max(1- false_negative_indels/true_indels),
1- false_negative_indels/true_indels)))),
0)
)
}
)
write.table(auc, paste(prefix, ".auc.tsv", sep=""), row.names=FALSE, quote=FALSE, sep="\t")
rocsnps <- ddply(roc, .(set),
function(x) {
data.frame(
FPR=
with(x,
c(1,
false_positive_snps/(false_positive_snps+ max(false_negative_snps + num_snps - false_positive_snps)))),
TPR=
with(x,
c(max(1- false_negative_snps/true_snps),
1- false_negative_snps/true_snps)),
type=as.factor("snps")
)
}
)
rocindels <- ddply(roc, .(set),
function(x) {
data.frame(
FPR=
with(x,
c(1,
false_positive_indels/(false_positive_indels+ max(false_negative_indels + num_indels - false_positive_indels)))),
TPR=
with(x,
c(max(1- false_negative_indels/true_indels),
1- false_negative_indels/true_indels)),
type=as.factor("indels")
)
}
)
if (FALSE) {
if (true_snps>0) {
ggplot(subset(roc, set != truthset),
aes(false_positive_snps/(false_positive_snps+with(subset(roc, set==set), max(false_negative_snps + num_snps - false_positive_snps))),
1- false_negative_snps/with(subset(roc, set==set), max(false_negative_snps + num_snps - false_positive_snps)),
group=set,
color=set)) + scale_x_continuous("false positive rate") + scale_y_continuous("true positive rate") + geom_path() + theme_bw()
+ coord_cartesian(xlim=c(xmin,xmax), ylim=c(ymin,ymax))
ggsave(paste(prefix, ".snps.png", sep=""), height=6, width=9)
}
if (true_indels>0) {
ggplot(subset(roc, set != truthset),
aes(false_positive_indels/(false_positive_indels+with(subset(roc, set==set), max(false_negative_indels + num_indels - false_positive_indels))),
1- false_negative_indels/with(subset(roc, set==set), max(false_negative_indels + num_indels - false_positive_indels)),
group=set,
color=set)) + scale_x_continuous("false positive rate") + scale_y_continuous("true positive rate") + geom_path() + theme_bw()
+ coord_cartesian(xlim=c(xmin,xmax), ylim=c(ymin,ymax))
ggsave(paste(prefix, ".indels.png", sep=""), height=6, width=9)
}
}
# new versions
if (true_snps>0) {
ggplot(subset(rocsnps, set != truthset),
aes(FPR,
TPR,
group=set,
color=set)) + scale_x_continuous("false positive rate") + scale_y_continuous("true positive rate") + geom_path() + theme_bw() + coord_cartesian(xlim=c(xmin,xmax), ylim=c(ymin,ymax))
ggsave(paste(prefix, ".snps.png", sep=""), height=6, width=9)
}
if (true_indels>0) {
ggplot(subset(rocindels, set != truthset),
aes(FPR,
TPR,
group=set,
color=set)) + scale_x_continuous("false positive rate") + scale_y_continuous("true positive rate") + geom_path() + theme_bw() + coord_cartesian(xlim=c(xmin,xmax), ylim=c(ymin,ymax))
ggsave(paste(prefix, ".indels.png", sep=""), height=6, width=9)
}
if (true_indels>0 && true_snps>0) {
(
ggplot(subset(rbind(rocsnps,rocindels), set != truthset),
aes(FPR,
TPR,
group=set,
color=set))
+ scale_x_continuous("false positive rate")
+ scale_y_continuous("true positive rate")
+ geom_path()
+ theme_bw()
+ coord_cartesian(xlim=c(xmin,xmax), ylim=c(ymin,ymax))
+ facet_grid(type ~ .)
+ theme(panel.margin = unit(1, "lines"))
)
ggsave(paste(prefix, ".both.png", sep=""), height=5, width=5)
}