/
PTR_Calculation_Graph.R
executable file
·147 lines (123 loc) · 4.58 KB
/
PTR_Calculation_Graph.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env Rscript
library("argparser")
library("Rsamtools")
library("random")
library("tidyverse")
library("dplyr")
library("reshape2")
library("qqman")
library("lattice")
library("ggplot2")
library("tictoc")
library("mgcv")
process_arguments <- function(){
p <- arg_parser(paste("Calculate PTR and the graph of coverage with bam file"))
# Positional arguments
p <- add_argument(p, "input",
help = paste("This is the input"),
type = "character")
# Optional arguments
p <- add_argument(p, "--window_size",
help = paste("This is the window_size"),
type = "Integer",
default = 10000)
# Optional arguments
p <- add_argument(p, "--step_size",
help = paste("This is the step_size"),
type = "Integer",
default = 100)
# Optional arguments
p <- add_argument(p, "--output",
help = paste("This is the output"),
type = "character",
default = "Coverage_Graph.jpg")
p <- add_argument(p, "--name",
help = "name of original file",
type = "character")
# Read arguments
cat("Processing arguments...\n")
args <- parse_args(p)
# Process arguments
return(args)
}
bam_into_PTR_and_graph <- function(root_path, w_size = 10000,
s_size = 100,
output = "",
name = ""){
bam_file <- scanBam(root_path, param = ScanBamParam(what=scanBamWhat()))
filtered_file <- data.frame(bam_file[[1]]$rname,bam_file[[1]]$pos)
filtered_bam <- na.omit(filtered_file)
sorted_bam_file <- filtered_bam[order(filtered_bam$bam_file..1...rname),]
# Set variables
window_size <- w_size
step_size <- s_size
# group with unique variables
group <- sorted_bam_file$bam_file..1...rname
uni_group <- unique(group)
# position in terms of "max_pos" in "sorted_bam_file"
max_pos <- matrix(nrow = length(uni_group), ncol = 1)
for (i in 1:length(uni_group)){
match_rname = which(sorted_bam_file$bam_file..1...rname == uni_group[i])
max_pos[i] = max(sorted_bam_file$bam_file..1...pos[match_rname])
}
column_num <- sum(max_pos)
# result is the final format of the matrix for xyplot
result = matrix(nrow = 0, ncol = 3)
for (i in 1:length(max_pos)){
index_temp_reads_pos <- which(sorted_bam_file$bam_file..1...rname == sorted_bam_file$bam_file..1...rname[i])
temp_reads_pos <- sorted_bam_file$bam_file..1...pos[index_temp_reads_pos]
if (max_pos[i] - window_size + 1 > 0){
temp <- matrix(nrow = max_pos[i] - window_size + 1, ncol = 3)
temp[,1] <- uni_group[i]
temp[,2] <- (1:(max_pos[i] - window_size + 1))
for (j in seq(1,(max_pos[i] - window_size + 1),step_size)){
temp[j,3] <- sum(between(sorted_bam_file$bam_file..1...pos,j,j + window_size))
}
}
else{
next
}
result <- rbind(result,temp)
}
result <- na.omit(result)
test <- data.frame(result[,2:3])
fitness_span <- 0.1
x <- test[,1]
y <- test[,2]
l <- loess(formula = y ~ x, data = test, span = fitness_span)
deri <- diff(l$fitted) / diff(x)
# Find absolute extrema
extrema <- matrix(ncol = 3)
colnames(extrema) <- c("x", "y", "multi_deri")
for (i in seq(1:length(x)-1)[2:length(x)-2]){
if (deri[i] * deri[i + 1] < -10 ** -21){
temp <- matrix(nrow = 1, ncol = 3)
temp[1,1] <- x[i]
temp[1,2] <- l$fitted[i]
temp[1,3] <- deri[i] * deri[i + 1]
extrema <- rbind(extrema, temp)
}
}
extrema <- na.omit(extrema)
Peak <- max(as.vector(extrema[,2]))
Trough <- min(as.vector(extrema[,2]))
PTR <- Peak / Trough
# Highlight the peak and trough
x_peak <- extrema[which(as.vector(extrema[,2]) == Peak), 1]
x_trough <- extrema[which(as.vector(extrema[,2]) == Trough), 1]
# Output
name = output
new_name <- sub(pattern = "[.]fastq$", replacement = "", x = basename(name))
qname <- bam_file[[1]]$qname[1]
qname <- sub(pattern = "[.][0-9]+$", replacement = "", x = qname)
filename <- paste0(qname, ".txt")
write.table(PTR, file = filename)
tiff(file = paste0(qname, "_", "Graph.tiff"))
plot(y ~ x, pch = ".")
lines(l$fitted[10000:length(l$fitted)-10000] ~ x[10000:length(l$fitted)-10000], col = "red")
abline(h = Peak, v = x_peak, col = "blue")
abline(h = Trough, v = x_trough, col = "blue")
dev.off()
}
args <- process_arguments()
bam_into_PTR_and_graph(args$input, args$window_size, args$step_size, args$output)