-
Notifications
You must be signed in to change notification settings - Fork 4
/
read_QX.R
88 lines (81 loc) · 4.1 KB
/
read_QX.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
## Function to read the raw fluorescence output files from the Biorad QX200
## digital PCR platform. Files can be exported by choosing options>export amplitude and cluster data
## The program will create one .csv file per reaction (well). All files
## should be placed into one folder. The latter's path (eg. "D:/Documents/dPCR Run")
## is submitted as a string to the function below.
## The QX .csv files should be the ONLY .csv files in the directory.
## Otherwise errors will occur!
# Updates will appear at: github.com/Gromgorgel
## Input:
#‘directory’ is a character vector of length 1 indicating the location of the CSV files
#`nr.r‘ is an optional number indicating the number of rows in the matrix (missing values will be padded with NA)
# (if nr.r = NA, the largest number of droplets found will be used)
# autoname logical. If TRUE the well names are collected from the csv files
# and used as column names. Only works if there are 96 or less csv files
# If there are more, a warning message will be reported. Problems
# may occur if additional underscores ("_") are present in the name of the run/experiment.
## Output:
# The function creates a list containing two matrices with fluorescence
# readings (one for each channel: $Ch1 and $Ch2). The matrix has as many columns
# as there are samples. The number of rows can be either set (max: approx 23500)
# otherwise, the number of rows equals the largest number of readings found.
# Missing values are reported as NA.
read.QX <- function(directory, nr.r = NA, autoname = TRUE){
##Prepping to read
bup <- getwd() #save current work directory
setwd(directory) #go to data directory
##step one: read the files required from the directory
d.max <- round(20/0.00085, digits = -1) *1.275 #maximum number of droplet reads possible, add 27.5% extra as some reactions have been yielding more droplest than possible
if(isTRUE(nr.r > d.max)){ #in case someone wants more padding than droplets are possible
message("this is not a good idea")
message(paste("nr.r reduced to ",d.max))
nr.r <- d.max
}
r.max <- 0 #highest number of droplets found in this run
files <- Sys.glob("*.csv") #save all filenames
output.Ch1 <- matrix(, nrow = d.max, ncol = length(files)) #generate output file for channel 1
output.Ch2 <- matrix(nrow = d.max, ncol = length(files)) #generate output file for channel 2
for(i in seq_along(files)){ #read each file, add variable zero padding to max size and store in matrix
temp <- read.csv(files[i], skip = 1, header = F) #read csv file
n.dr <- dim(temp)[1] #check how many droplets were generated in this run
#save data into output matrices
output.Ch1[1:n.dr, i] <- temp[,1]
output.Ch2[1:n.dr, i] <- temp[,2]
#check if current number of droplets is higher the run max
if(n.dr > r.max){
r.max <- n.dr #we have a new winner
}
}
##step two (optional): extract colnames from files
if(isTRUE(autoname)){
wells <- vector()
for(i in seq_along(files)){ #extract colnames from each file
wells <- append(wells, sub("?.*_(.*?)_.*","\\1",files[i]))
}
#check for errors and report:
if(length(wells) > 96 | length(wells) > length(files)){
if(length(wells) > 96){
message("more than 96 names found, switching off autoname")
}else{
message("more names than columns found, switching off autoname")
}
}else{ # add names to matrices
colnames(output.Ch1) <- wells
colnames(output.Ch2) <- wells
}
}
##step three: trimming
if(isTRUE(nr.r > r.max)){ #in this case we leave padding
output.Ch1 <- output.Ch1[1:nr.r, ]
output.Ch2 <- output.Ch2[1:nr.r, ]
}else{
output.Ch1 <- output.Ch1[1:r.max, ]
output.Ch2 <- output.Ch2[1:r.max, ]
if(isTRUE(nr.r < r.max)){
message("more droplets than expected")
message(paste("nr.r increased to ",r.max))}
}
##step four: output
setwd(bup) #return to previous wd
return(list(Ch1 = output.Ch1, Ch2 = output.Ch2))
}