-
Notifications
You must be signed in to change notification settings - Fork 4
/
Shiny Cloud v1-00.R
641 lines (597 loc) · 34.5 KB
/
Shiny Cloud v1-00.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
library(shiny)
require(SuppDists)#required for the 'moments()' function
require(MASS) #required for the 'lqs()' function
library(gridExtra)
require(ggplot2)
########################################################
###### Function repository##############################
########################################################
###### plotting for Shiny ##############################
########################################################
ddplot <- function(drop, ...){
drp <- na.omit(drop)
temp <- sample(c(1:length(drop)), size = length(drop), replace = F)
plot(drop ~ temp, col = rgb(123, 168, 194, maxColorValue = 255), bty = "L", pch = 20 , xlab = "observation", ylab = "FU", ylim = c(min(drop), max(drop)), ...)
}# END of ddplot ########################################################
poplotter <- function(drop, piek, info, manthre, ...){
fail <- info[1]
pops <- info[2]
tresh <- info[3]
# check for threshold consistency
if(is.na(tresh) & !is.na(info[4])){
tresh <- info[4]
}else if(is.na(tresh) & is.na(info[4])){
tresh <- 0 # for shiny to produce output, tresh cannot be NA
}
# check for manual threshold
if(!is.na(manthre)){
tresh <- manthre
}
drp1 <- na.omit(drop)
dfdrops <- data.frame(FU = drp1, positive = drp1 > tresh, category = 0)
tdrp <- length(drp1) # total number of droplets
# the following should only be calculated if there is more than 1 population
if(pops > 1){
#also define 'cerntainly pos', 'rain' and 'drift'
dfdrops[!dfdrops$positive, 'category'] <- 1 # negative droplets
dfdrops[dfdrops$FU > tresh & dfdrops$FU < piek[3, 2], 'category'] <- 2 #rain is the population of the clearance band (counted positive)
dfdrops[dfdrops$FU > piek[3, 2] & dfdrops$FU < piek[2, 2], 'category'] <- 3 #c-pos is the population of the pos band only
dfdrops[dfdrops$FU > piek[2, 2], 'category'] <- 4 #drift is population above the pos band (high-Fluorescence outliers)
}else{ # in case of only 1 population Resolution & so on should still be defined
dfdrops[!dfdrops$positive, 'category'] <- 1 # negative droplets
dfdrops[dfdrops$positive, 'category'] <- 3 # positive droplets
}# END pops > 1
#piik thing
dfpiik <- as.data.frame(t(piek))
dfpiik$category <- c(1,3)
# we also have to add a ranomd order to dfdrops to plot them randomly
dfdrops$ordr <- sample(c(1:nrow(dfdrops)), size = nrow(dfdrops), replace = F)
# unfortunately our plan for using geom_density does not work out well, both populations get scaled differently which gives a very misleading plot
# so we must make a separate plotting data frame:
dens <- density(drp1)
plotdf <- data.frame(x=dens$x, y=dens$y)
plotdf$dropsplit <- factor(findInterval(plotdf$x, tresh))
#1# kernel density
plot1 <- ggplot(plotdf, aes(x,y)) + geom_ribbon(aes(ymin=0, ymax=y, fill=dropsplit)) + geom_line() +
scale_fill_manual(labels = c("negative", "positive"), limits = c(0, 1), values = c('#F8766D', '#00BFC4'), name = "Population") +
labs(tag = "A", x = "FU", y = "density")
if(fail == 0){ #piik lines can only be added if piik is not NA
plot1 <- plot1 + geom_vline(data = dfpiik, aes(xintercept = x.FU), linetype = "dashed") +
geom_vline(data = dfpiik, aes(xintercept = outer.bounds), linetype = "dashed") +
geom_vline(data = dfpiik, aes(xintercept = inner.bounds), linetype = "dashed")
}
#2# droplet view
plot2 <- ggplot() + geom_point(data = dfdrops, aes(y = FU, x = ordr, colour = category)) +
scale_colour_gradientn (breaks = c(1, 2, 3, 4), colors = c("#F8766D", "#FFC425", "#00BFC4", "#0076C4"), labels = c("negative", "rain", "positive", "outlier"), limits = c(0.9,4.1), name = "Population") +
geom_hline(yintercept = tresh, linetype = "dashed") +
labs(tag = "B", x = "Observation")
### put on screen
grid.arrange(plot1, plot2, nrow = 1)
}# END of poplotter ########################################################
###### Adapted internal functions ######################
########################################################
s.funk <- function(k){return(3.8 + 0.35 * log(k) + 0.045 * log(k)^2 +0.75)} # kurtosis to spread
findpeaks <- function(vec, bw = 1, trim = 0.01, ...){ #where bw = is box width, setting the sensitivity of the search
### initiate vectors
pos.x.max <- NULL ; pos.x.min <- 1
###Start of for loop: we walk down the vector with a window of size "bw"
for(i in 1:(length(vec)-1)){
#check if we have reached the end of the vector
if((i+1+bw)>length(vec)){sup.stop <- length(vec)}else{sup.stop <- i+1+bw}
#check if we are at beginning of the vector
if((i-bw) < 1){inf.stop <- 1}else{inf.stop <- i-bw}
#select window in two parts: values beyond i (superior), and values before i (inferior)
subset.sup <- vec[(i+1):sup.stop]
subset.inf <- vec[inf.stop:(i-1)]
##############################################################
is.max <- sum(subset.inf > vec[i]) == 0 #are ALL trailing data smaller than i?
is.nomin <- sum(subset.sup > vec[i]) == 0 #are ALL leading data smaller than i?
no.max <- sum(subset.inf > vec[i]) == length(subset.inf) #are ALL trailing data larger than i?
no.nomin <- sum(subset.sup > vec[i]) == length(subset.sup) #are ALL leading data larger than i?
##############################################################
#a maximum is found if all data before and after i are smaller than i
if(is.max & is.nomin){ pos.x.max <- c(pos.x.max, i) }#
#a minimum is found if all data before and after i are smaller than i
if(no.max & no.nomin){ pos.x.min <- c(pos.x.min, i) }#
}#end of for loop
### trim off tiny peaks
pos.x.max <- pos.x.max[vec[pos.x.max] > max(vec[pos.x.max])*trim]
###Consolidate peak-valley matrix # 2 indicates max, 1 indicates valley
pvmat <- matrix(c(pos.x.max, pos.x.min, rep(2, length(pos.x.max)), rep(1, length(pos.x.min))),
nrow = 2, ncol = length(c(pos.x.max, pos.x.min)), byrow = T)
pvmat <- pvmat[, order(pvmat[1,])] # order
# we want n + 1 valeys for n peaks (start & then one valley after avery peak)
# if there are multiple possibilities we want the valley centered between 2 peaks
check <- 1 ; j <- 1 ; busy <- TRUE
while(busy){ # loop over pv matrix, since we'll be adding columns to the matrix we use a while loop
if(pvmat[2, j] != check){ # we do not find what we expect!
if(pvmat[2, j] == 1){ # previous was also valley, merge both valleys
# move position to halfway between the two valeys, except if j = 2
if(j != 2){ # the first valley should always be the vector start
pvmat[1, j - 1] <- pvmat[1, j-1] + floor((pvmat[1, j] - pvmat[1, j-1])/2) }#
# remove current position (we've updated the previous valley)
pvmat <- pvmat[, -j]
# since we removed the col we should not advance j
}else if(pvmat[2, j] == 2){ # previous was also peak, add a valley
# use minimum value between this peak and the previous one
pvmat <- cbind(pvmat[, 1:(j-1)],
matrix(c(pvmat[1, j-1] + which.min(vec[(pvmat[1, j-1] + 1):(pvmat[1, j] - 1)]), 1),
ncol = 1),pvmat[, -c(1:(j-1))])
j <- j + 2
}# END check
}else{ # order is respected, switch check & advance
check <- c(2 ,1)[check]
j <- j + 1
}# END if != check
if(j > ncol(pvmat)){ busy <- F }# check if we reached the end of the matrix
}# END while
###Final check: last element shoud be a valley, if so set to vector end, if not add it
if(tail(pvmat[2,], 1) == 1){
pvmat[1, ncol(pvmat)] <- length(vec)
}else{
pvmat <- cbind(pvmat, matrix(c(length(vec), 1), ncol = 1))
}#END if tail
###Output
pea <- pvmat[1, pvmat[2, ] == 2]
val <- pvmat[1, pvmat[2, ] == 1]
return(list("peaks" = rbind(pea, vec[pea]), "valleys" = rbind(val, vec[val])))
}#End of peakfinder function########################################################
findthresh <- function(drops, silent, threshold, neg.ref,...){ #
skip <- 0
# create some values for the internal function to update (have to be internally defined for shiny!)
fail <- 0 #create failure tracker
pops <- 0 #create population tracker
tresh <- NA
tresh.bup1 <- NA
badluck <- rep(FALSE, times = 7)
##################################################################################
# KERNEL DENSITY SPLIT
##################################################################################
#be sure bandwidth for kernel density is at least 50
temp <- bw.nrd0(drops)
if(temp < 50){
temp <- 50
}
#kernel density estimation
krn <- density(drops, bw = temp)
krn <- rbind(krn$y, krn$x)
rownames(krn) <- c("y", "x")
#we use findpeaks with a high bandwith to single out the populations
piek <- findpeaks(krn[1, ], bw=20)
##################################################################################
##First major control point: we only proceed if there are no NA
if(!any(is.na(unlist(piek)))){ ##Control LVL 1
##################################################################################
if(length(piek$peaks[1,]) == 1){ # only one majore peak was found, we'll check if we can deduce if it's pos or neg
#Since there still may be few negatives / positives that got smoothed out.
#We should be able to detect them as 'outliers' from the single population
#therefore we calculate z-scores for all droplets
zs <- (drops - median(drops))/mad(drops)
s1 <- s.funk(abs(moments(drops)[4]))
# check for outliers and check if they are higher or lower to decide if the pop is pos or neg
# if we have BOTH positive and negative outliers we cannot make a meaningful decision without user input
# in the latter case, we'll just give up. Also, we must make sure there is more than one outlier,
# otherwise none of the below makes sense. In fact, we'll demand at least 3.
outzs <- abs(zs) > (1.5 * s1)
# pos and neg outzs may exist, we'll choose one. Either based on the neg.ref, or if it is NA we use
# decide based on the number of observations. The latter is not an ideal approach, but it should
# work for 99% of the cases where this question pops up
if(!is.na(neg.ref)){
if(median(drops) > neg.ref){ # ALL positive, we focus on the lower outliers (possibly negative)
outzs[outzs & zs > 0] <- F # set all positive outzs to F
}else{
outzs[outzs & zs < 0] <- F # set all negative outzs to F
}#
}else{ # no neg.ref, we decide based on number of outliers
if(sum(zs[outzs] > 0) > sum(zs[outzs] < 0)){
#there are more positives
outzs[outzs & zs < 0] <- F # set all negative outzs to F
}else{
#there are more negatives
outzs[outzs & zs > 0] <- F # set all positive outzs to F
}# END if-else
}# END if-else
if(length(zs[outzs]) > 2){
#we have outliers! we'll need to update piek so we can proceed as normal
if(all(zs[outzs] > 0)){ #outliers have higher fluorescence and are thus positive
popmean <- mean(drops[outzs])
#retrieve position in krn
popkrn <- which.min((krn[2, ] - popmean)^2)
#find lowest spot between the 2 peaks
minkrn <- piek$peaks[1,1] + 1 + which.min(krn[1, (piek$peaks[1,1] + 1):(popkrn - 1)])
#update pieks & valleys
piek$peaks <- cbind(piek$peaks, c(popkrn, krn[1, popkrn]))
piek$valleys <- cbind(piek$valleys[,1], c(minkrn, krn[1, minkrn]), piek$valleys[,2])
}else{ #outliers have lower fluorescence and are thus negative
popmean <- mean(drops[outzs])
#retrieve position in krn
popkrn <- which.min((krn[2, ] - popmean)^2)
#find lowest spot between the 2 peaks
minkrn <- popkrn + 1 + which.min(krn[1, (popkrn + 1):(piek$peaks[1,1] - 1)])
#update pieks & valleys
piek$peaks <- cbind(c(popkrn, krn[1, popkrn]), piek$peaks)
piek$valleys <- cbind(piek$valleys[,1], c(minkrn, krn[1, minkrn]), piek$valleys[,2])
}
### we have no outliers, but at least the threshold allows us to see if the drops are all pos or neg
#############################
}else if(!is.na(threshold)){
pops <- 1
tresh <- threshold
# we should construct a makeshift piik matrix so that we can still return an output (and produce some plots)
piik <- matrix(c(threshold - 100, threshold + 100, min(drops) - 100, max(drops) + 100, threshold -0.1, threshold + 0.1),
nrow = 3, ncol = 2, byrow = T)
rownames(piik) <- c("x.FU", "outer.bounds", "inner.bounds") # the plotting dataframe needs names!
# we should, however, not attempt further analysis of the data or we screw up our ersatz piik
skip <- 1
### we have no outliers, but at least the neg.ref may allows us to see if the drops are all pos or neg
#############################
}else if(!is.na(neg.ref)){
pops <- 1
if(median(drops) > neg.ref){ # ALL positive, we need at least 1 negative
tresh <- mean(tail(sort(drops, decreasing = T), 2)) # put threshold between two lowest droplets
# we should construct a makeshift piik matrix so that we can still return an output (and produce some plots)
piik <- matrix(c(min(drops), median(drops), min(drops) - 100, max(drops) + 100, tresh -0.1, tresh + 0.1),
nrow = 3, ncol = 2, byrow = T)
}else{ # all negative, zero positives is perfectly ok, we set the threshold above the max droplet
tresh <- max(drops) + 2 * mad(drops)
# we should construct a makeshift piik matrix so that we can still return an output (and produce some plots)
piik <- matrix(c(median(drops), tresh + 50, min(drops) - 100, tresh + 100, tresh - 0.1, tresh + 0.1),
nrow = 3, ncol = 2, byrow = T)
rownames(piik) <- c("x.FU", "outer.bounds", "inner.bounds") # the plotting dataframe needs names!
} # END if-else
# we should, however, not attempt further analysis of the data or we screw up our ersatz piik
skip <- 1
### no outliers, no threshold: we give up
#############################
}else{
piik <- matrix(NA, ncol = 2, nrow = 3) # make empty piik matrix
rownames(piik) <- c("x.FU", "outer.bounds", "inner.bounds") # the plotting dataframe needs names!
pops <- 1
fail <- 1 # notify failure
skip <- 1 # make sure we skip
badluck[2] <- TRUE
} #end of no outlier case
}# end of piek == 1 check
## #check if we should continue or give up#########################################
if(fail == 0 & skip == 0){#####################################################################
if(length(piek$peaks[1,]) >= 3){ #give a warning if we find more than 2 populations
#mark warning
badluck[3] <- TRUE
}# END if peaks >= 3
#*universal* population delimiter
################################
#set the number of populations
pops <- length(piek$peaks[1,])
# set the peak limits using valleys defined by peakfinder , row 1: limits low FU peak, row 2: limits high FU peak
pea.lim <- matrix(krn[2, c(head(piek$valleys[1,], 2), tail(piek$valleys[1,], 2))], nrow = 2, byrow = T) # in FU
#get started on the main population matrix
piik <- matrix(c(krn[2, piek$peaks[1, range(seq_along(piek$peaks[1,]))]], # FU of first and last peak
pea.lim[1,1], pea.lim[2,2], pea.lim[1,2], pea.lim[2,1]),ncol = 2, byrow = T)
# for clarity we add row names:
rownames(piik) <- c("x.FU", # first row is population center position in Fluorescence units
"outer.bounds",# second row is the outer limits (lowest and hightst FU over ALL)
"inner.bounds")# third row is the inner population bounds (between the populations)
# I know the order of the values in piik is not completely logical & I have no idea why i did it this way
# I suppose I had a valid reason when I first wrote this & I'll keep it this way for compatibility's sake
#allright, using this maximal delimination of the clouds we can calculate more accurate values
#by iterating the update process three times (i.e. use current estimate to calculate pos and neg,
# then recaculte sd and re-delimite bands). Three iterations is generaly enough to reach
# stability in the first decimal place.
for ( i in 1 : 3 ) {
#split in positive and negatives
pdrp <- drops[drops >= piik[3, 2] & drops <= piik[2, 2]]
ndrp <- drops[drops >= piik[2, 1] & drops <= piik[3, 1]]
#update s multiplier
s1 <- s.funk(abs(moments(ndrp)[4]))
s2 <- s.funk(abs(moments(pdrp)[4]))
#back-up
piik.bup <- piik
#update
piik[2, 1] <- piik[1, 1] - mad(ndrp) * s1 #leftmost extreme (start of neg band)
piik[2, 2] <- piik[1, 2] + mad(pdrp) * s2 #rigthmost extreme (end of pos band)
piik[3, 1] <- piik[1, 1] + mad(ndrp) * s1 #first inner extreme (end of the neg band)
piik[3, 2] <- piik[1, 2] - mad(pdrp) * s2 #second inner extreme (start of the pos band)
# check if we didn't screw up (which may happen with very few pos/neg)
if(any(is.na(piik))){
#maybe a warning?
piik <- piik.bup # error error, restore
break # stop optimization
}# END NA check
}#END of the iterative update
#If there is a threshold specified we'll use that, otherwise we calculate our own
if(is.na(threshold)){
#We now calculate the threshold for counting positives
tresh <- piik[1, 1] + (s1 + 1/2 * s1) * mad(ndrp)
#should the threshold fail, we provide a backup
tresh.bup1 <- pea.lim[1, 2]
}else{
tresh <- threshold
}#END if is.na(threshold)
# a last check before we return the output:
##############################################################################
###Control LVL 2 : a problem related to standard deviations is overlapping bounderies
#check : upper neg boundary is situated above lower pos boundary?
if(piik[3, 1] > piik[3, 2]){##Control LVL 5
badluck[4] <- TRUE
#we can use the 'valleys' returned by findpeaks to draw new population bounderies
piik[3, 1] <- pea.lim[1, 2]
piik[3, 2] <- pea.lim[2, 1]
}##Control LVL 2 END
} #END of fail== 0 & skip == 0
################################
# END of the *universal* population delimiter
##############################################################################
}else{ ##Control LVL 1
# to arrive here, piek must contain NA values. All is lost, throw an error and set failure to 1.
badluck[1] <- TRUE
piik <- matrix(NA, ncol = 2, nrow = 3)
fail <- 1 #notify failure
}##Control LVL 1 END
### extra bit for shiny, as this code cannot fit in valkyr
if(fail == 0){ ##Control LVL 2
##################################################################################
##third to fifth control points: checks for stability reasons
##################################################################################
#in case of failure (tresh is NA) we fall back on the backup threshold
if(is.na(tresh)){##Control LVL 3
badluck[6] <- TRUE
tresh <- tresh.bup1
## if tresh is NA the piik may be NA as well, check & redo all the calculations:
if(any(is.na(piik))){
piik <- c(median(ndrp),median(pdrp))
#Expand piik to take all values
piik <- rbind (piik, matrix(c(min(ndrp),max(pdrp),max(ndrp),min(pdrp)),nrow=2,ncol=2, byrow = T))
}# END piik rescue
}##Control LVL 3 END
##############################
# the next control is skipped if a threshold is specified (we don't want it to be overruled, even if it's wrong)
if(is.na(threshold)){
##############################
#excessive rain can make the standard deviations go haywire
#we build in this simple check to make sure the treshold is not placed into the positive band
if(tresh > piik[3, 2]){ ##Control LVL 4
badluck[7] <- TRUE
tresh <- tresh.bup1
}##Control LVL 4 END
}# end if(is.na(threshold))
}##Control LVL 2 END
##############################################################################
sumar <- c(fail, pops, tresh, tresh.bup1)
names(sumar) <- c("fail", "pops", "tresh", "tbup")
return(list(piik = piik, infos = sumar, bad = badluck))
}#End of findtresh function########################################################
valkyr <- function(zealot, drops, dVol, sVol, manthre,... ){# function that calculates the values for output from the treshold set by treshfinder
# split zealot into its components so we can re-use the same code as in the original cloudy algortihm
# (it's not lazy, it will make future updates more straightforward, we can just copy paste everything!)
drp1 <- na.omit(drops)
piik <- zealot$piik
fail <- zealot$infos[1]
pops <- zealot$infos[2]
# we should only continue if fail == 0, otherwise the entire script will fail
if(fail == 0){
tresh <- zealot$infos[3]
tresh.bup1 <- zealot$infos[4]
# check for threshold consistency
if(is.na(tresh) & !is.na(zealot$infos[4])){
tresh <- zealot$infos[4]
}else if(is.na(tresh) & is.na(zealot$infos[4])){
tresh <- 0 # for shiny to produce output, tresh cannot be NA
}#
# check for manual threshold
if(!is.na(manthre)){
tresh <- manthre
}#
badluck <- zealot$bad
# use piik to define the populations
pospop <- drp1 >= piik[3, 2] & drp1 <= piik[2, 2]
negpop <- drp1 >= piik[2, 1] & drp1 <= piik[3, 1]
pdrp <- drp1[pospop]
ndrp <- drp1[negpop]
## beyond this point is the 'copy-paste' of cloudy code ######################
##################################################################################
#we can now start the trivial calculations:
##################################################################################
#count the raindrops (i.e. the population of the 'clearance band' between the positive and negative band)
#again to avoid including negatives in the rain we use the treshold in stead of the peak base
#rain <- length(which(drp > piik[3, 1] & drp < piik[3, 2]))
# we do not calculate rain in case of 3 or more populations
# the middle populations end up as 'rain' and give a biased metric
# Since we allow for manual threshold placement it is now possible that
# reactions with 1 population arrive at this point. However,
# we also cannot calculate the rain if there is only 1 population
if(pops >= 3 | pops == 1){
rain <- NA
}else{
rain <- sum(drp1 > tresh & drp1 < piik[3, 2])
}#
# make a dataframe to label the positive and negatives so we can apply colours on the plot
dfdrops <- data.frame(FU = drp1, positive = drp1 > tresh, category = 0)
tdrp <- length(drp1) # total number of droplets
# the following should only be calculated if there is more than 1 population
if(pops > 1){
#also define 'cerntainly pos', 'rain' and 'drift'
dfdrops[!dfdrops$positive, 'category'] <- 1 # negative droplets
dfdrops[dfdrops$FU > tresh & dfdrops$FU < piik[3, 2], 'category'] <- 2 #rain is the population of the clearance band (counted positive)
dfdrops[dfdrops$FU > piik[3, 2] & dfdrops$FU < piik[2, 2], 'category'] <- 3 #c-pos is the population of the pos band only
dfdrops[dfdrops$FU > piik[2, 2], 'category'] <- 4 #drift is population above the pos band (high-Fluorescence outliers)
###Calculation of Performance parameters
#1# Resolution
Resol <- 2 * (piik[1, 2] - piik[1, 1]) / ((piik[3, 1] - piik[2, 1]) + (piik[2, 2] - piik[3, 2]))
}else{ # in case of only 1 population Resolution & so on should still be defined
Resol <- NA
dfdrops[!dfdrops$positive, 'category'] <- 1 # negative droplets
dfdrops[dfdrops$positive, 'category'] <- 3 # positive droplets
}# END pops > 1
#2# percentage rain
pRain <- rain / tdrp
#3# Compartmentalization
Comp <- tdrp * dVol * 0.001 / sVol
###Calculation of Lambda and confidence bounds
lambda <- -log(sum(!dfdrops$positive)/tdrp)
lamlo <- lambda - 1.96 * sqrt((tdrp - sum(!dfdrops$positive))/(tdrp * sum(!dfdrops$positive))) #lower 95% confidence bound for lambda 1 (cpd)
lamhi <- lambda + 1.96 * sqrt((tdrp - sum(!dfdrops$positive))/(tdrp * sum(!dfdrops$positive))) #upper 95% confidence bound for lambda 1 (cpd)
lambs <- c(lambda, lamhi, lamlo)
#total number of targets present in droplets analysed
targets <- tdrp * lambs
#total number of targets in sample volume
targall <- lambs * (sVol/(dVol * 0.001))
###gather and name all output
names(targets) <- c("targets", "upper", "lower")
names(targall) <- c("targ.in.sVol", "upper", "lower")
names(lambs) <- c("lambda", "upper", "lower")
drops <- c(sum(dfdrops$positive), sum(!dfdrops$positive), rain)
names(drops) <- c("positive", "negative", "rain")
perform <- c(Resol, pRain, Comp)
names(perform) <- c("resolution", "%rain", "%comparted")
names(pops) <- "populations"
names(tresh) <- "threshold"
##################################################################################
# actual output ##################################
##################################################################################
return(c(targall, lambs, perform, drops, pops, tresh))
}else{ # fail == 1, we return a lot of NA & zeroes
return(c(rep(NA, times = 6), 0, NA, length(drp1) * dVol * 0.001 / sVol, length(drp1), 0, 0, pops, 0))
}##
#return(list("targets.in.sVol" = targall, "lambda" = lambs, "performance" = perform, "droplets" = drops, "populations" = pops, "threshold" = tresh))
}#End of valkyr function########################################################
# pucblic adress messages
public.address <- c("WARNING: something is not right, please check data <br/>", #1
"WARNING: only one population detected, returning NA <br/>", #2
"WARNING: multiple population detected <br/>", #3
"WARNING!: population boundary overlap. The boundaries were rewritten and are not based on population parameters <br/>", #4
"Rotation seems to have screwed up! ...restoring droplets... <br/>", #5
"WARNING!: threshold placement failed. Check the data, an arbitrary threshold was set <br/>", #6
"WARNING!: threshold inside positive cloud. Algortihm forced a lower threshol which is not based on population parameters <br/>" #7
)##
########################################################
###### App Code ########################################
########################################################
# Define UI for data upload app ----
ui <- fluidPage(
titlePanel("welcome to Cloudy"),
#### tab 1
#tabPanel("Uploading Files",
# Sidebar layout with input and output definitions ----
sidebarLayout(
### Sidebar panel for inputs ----
sidebarPanel(
# Input: Select a file ----
fileInput("file1", "Choose CSV File",
multiple = FALSE,
accept = c("text/csv",
"text/comma-separated-values,text/plain",
".csv")),
## Horizontal line ----
tags$hr(),
# Input: Select Channel ----
numericInput(inputId = "Ch", label = "Channel:",
value = 1, min = 1, max = 2),
## Horizontal line ----
tags$hr(),
# Input: Select variables ----
sliderInput(inputId = "dVol", label = "droplet volume (nl):",
value = 0.85, min = 0.1, max = 1),
sliderInput(inputId = "sVol", label = "sample volume (ul):",
value = 20, min = 5, max = 25),
## Horizontal line ----
tags$hr(),
numericInput(inputId = "thresh", label = "manual threshold:",
value = NA, min = 1000, max = 25000),
#numericInput(inputId = "Nref", label = "neg.ref:",
# value = NA, min = 5000, max = 25000),
width = 3),
### Main panel for displaying outputs ----
mainPanel(
# Output: Data file ----
tabsetPanel(
#### tab 1
tabPanel("help",
h3("getting started"),
p("Use the 'browse' button to select a *.csv file containing digital droplet data. The algorithm is designed for use with data coming from the Biorad QX200 platform. It will probably choke on any other format."),
p("The 'inspect data' tab allows you to preview the data. You can use it to select the correct 'Channel' (either 1 or 2) if you are unsure which one contains the correct values"),
p("The other tabs contain the algorithm output. Please be patient as the plots may take a while to load. The 'plot' tab will also display some warning messages (if appropriate, most of the time there shouldn't be any)"),
p("I have made it so you can mess about with the further inputs. The first two provide the partition volume (in nanoliter) and the sample volume (in microliter). Changing these will affect the concentrations calculated, but will not affect the threshold setting or the calculation of lambda."),
p("Lastly, there is 'manual threshold'. If provided, a manual threshold will override the one calculated by the algorithm, this will affect a lot of the output values, It's quite possible to break the plots if you push it to silly values. Leave blank if you want everything to remain fully automatic.")
), #, tabpanel END
#### tab 1
tabPanel("Inspect data",
fluidRow(
column(5, tableOutput("contents")),
column(7, plotOutput(outputId = "ddplot"))
)), #fluidrow, tabpanel END
#### tab 2
tabPanel("plot", #""),
fluidRow(
column(12,plotOutput(outputId = "popplot"))
), #fluidrow1 END
fluidRow(
column(12, htmlOutput(outputId = "badluck"))
)), #fluidrow2, tabpanel END
#### tab 3
tabPanel("Table",
fluidRow(
column(12, tableOutput(outputId = "restable1"))
), #fluidrow1 END
fluidRow(
column(6, tableOutput(outputId = "restable2")),
column(6, tableOutput(outputId = "restable3"))
),#fluidrow2 END
fluidRow(
h3("A Note on Performance"),
p("For the performance parameters to make sense, they should be measured at a lambda of 0.7 (1:1 positive:negative partitions). Especially the resolution, which assumes peaks of equal height and approx. gaussian in shape, may be biased otherwise. Heavily tailed distributions will affect the parameters as well."),
p("This does not mean the measurement or analysis result is incorrect. That depends solely on the ability to distinguish positives from negatives, but the performance parameters may not be meaningful")
)) #fluidrow3, tabpanel END
)# tabsetpanel END
)# mainPanel END
)# sidebarLayout END
)# fluidPage END
server <- function(input, output){# server functions determines how inputs are converted to outputs
# server function rules:
# 1 # output objects are stored in the output list: output$something <- something
# output elements in the ui will look into this list to find their target. eg. plotOutput("hist") will look for output$hist
# 2 # what is saved into output should be built with a render function
# e.g. output$hist <- renderPlot()
# 3 # use input values with the inputId
# e.g. input$num after numericInput(inputId = "num")
# read user selected file
df <- reactive({req(input$file1)
read.csv(input$file1$datapath, header = T) })#
# extract droplet FU readings from file according to Ch chosen by user
drp <- reactive({req(input$file1)
df()[, input$Ch] })#
# call to treshfinder function
piki <- reactive({req(input$file1)
findthresh(drp(), silent = T, threshold = input$thresh, neg.ref = NA) })#
# call valkyr to make the calculations
res <- reactive({req(input$file1)
valkyr(zealot = piki(), drops = drp(), dVol = input$dVol, sVol = input$sVol, manthre = input$thresh) })#
# make some preview output for the selected channel (droplet table header, droplet plot)
output$contents <- renderTable({
req(input$file1)
# display file head for inspection
head(df(), n = 10)})
output$ddplot <- renderPlot({
req(input$file1)
ddplot(drp())
})# END renderplot
# make output plot (population plot)
output$popplot <- renderPlot({
req(input$file1)
poplotter(drp(), piki()$piik, piki()$infos, input$thresh)
})# END renderplot
# make output tables
output$restable1 <- renderTable({
data.frame('parameter' = c("lambda", "targ.in.sVol"), 'value'= res()[c(4,1)], 'upper.limit' = res()[c(5,2)], 'lower.limit' = res()[c(6,3)])
}, caption = "Sample measurements")# END rendertable1
output$restable2 <- renderTable({
data.frame('parameter' = c("threshold", "Resolution", "%rain", "%comparted", "populations"), 'value'= res()[c(14, 7:9, 13)])
}, caption = "Performance parameters")# END rendertable2
output$restable3 <- renderTable({
data.frame('count' = c("total", "positive", "negative", "rain"), 'value'= c((res()[10] + res()[11]),res()[10:12]))
}, caption = 'droplet counts')# END rendertable3
# make output warning messages
output$badluck <- renderUI({
HTML(public.address[piki()$bad])
})# END rendertext
}#END server
shinyApp(ui = ui, server = server)#