/
PTR_Calculation_Graph_Multiple_Input.R
139 lines (117 loc) · 4.49 KB
/
PTR_Calculation_Graph_Multiple_Input.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env Rscript
# install.packages("argparser")
tictoc::tic("Total Time")
library("argparser")
library("Rsamtools")
library("random")
library("tidyverse")
library("dplyr")
library("reshape2")
library("qqman")
library("lattice")
library("ggplot2")
library("tictoc")
library("mgcv")
process_arguments <- function(){
p <- arg_parser(paste("Calculate PTR and the graph of coverage with bam file"))
# Positional arguments
p <- add_argument(p, "input",
help = paste("This is the input"),
type = "character")
# Optional arguments
p <- add_argument(p, "--window_size",
help = paste("This is the window_size"),
type = "Integer",
default = 10000)
# Optional arguments
p <- add_argument(p, "--step_size",
help = paste("This is the step_size"),
type = "Integer",
default = 100)
# Optional arguments
p <- add_argument(p, "--output",
help = paste("This is the output"),
type = "character",
default = "Coverage_Graph.jpg")
# Read arguments
cat("Processing arguments...\n")
args <- parse_args(p)
# Process arguments
return(args)
}
bam_into_PTR_and_graph_multiple_input <- function(root_path, w_size = 10000, s_size = 100, output = "/Users/jerrypan/Desktop/Coverage_Graph.jpg"){
for (f in 1:length(root_path)){
bam_file <- scanBam(root_path[f], param = ScanBamParam(what=scanBamWhat()))
filtered_file <- data.frame(bam_file[[1]]$rname,bam_file[[1]]$pos)
filtered_bam <- na.omit(filtered_file)
sorted_bam_file <- filtered_bam[order(filtered_bam$bam_file..1...rname),]
# Set variables
window_size <- w_size
step_size <- s_size
# group with unique variables
group <- sorted_bam_file$bam_file..1...rname
uni_group <- unique(group)
# position in terms of "max_pos" in "sorted_bam_file"
max_pos <- matrix(nrow = length(uni_group), ncol = 1)
for (i in 1:length(uni_group)){
match_rname = which(sorted_bam_file$bam_file..1...rname == uni_group[i])
max_pos[i] = max(sorted_bam_file$bam_file..1...pos[match_rname])
}
column_num <- sum(max_pos)
# result is the final format of the matrix for xyplot
result = matrix(nrow = 0, ncol = 3)
for (i in 1:length(max_pos)){
index_temp_reads_pos <- which(sorted_bam_file$bam_file..1...rname == sorted_bam_file$bam_file..1...rname[i])
temp_reads_pos <- sorted_bam_file$bam_file..1...pos[index_temp_reads_pos]
if (max_pos[i] - window_size + 1 > 0){
temp <- matrix(nrow = max_pos[i] - window_size + 1, ncol = 3)
temp[,1] <- uni_group[i]
temp[,2] <- (1:(max_pos[i] - window_size + 1))
for (j in seq(1,(max_pos[i] - window_size + 1),step_size)){
temp[j,3] <- sum(between(sorted_bam_file$bam_file..1...pos,j,j + window_size))
}
}
else{
next
}
result <- rbind(result,temp)
}
result <- na.omit(result)
test <- data.frame(result[,2:3])
fitness_span <- 0.1
x <- test[,1]
y <- test[,2]
l <- loess(formula = y ~ x, data = test, span = fitness_span)
deri <- diff(l$fitted) / diff(x)
# Find absolute extrema
extrema <- matrix(ncol = 3)
colnames(extrema) <- c("x", "y", "multi_deri")
for (i in seq(1:length(x)-1)[2:length(x)-2]){
if (deri[i] * deri[i + 1] < -10 ** -21){
temp <- matrix(nrow = 1, ncol = 3)
temp[1,1] <- x[i]
temp[1,2] <- l$fitted[i]
temp[1,3] <- deri[i] * deri[i + 1]
extrema <- rbind(extrema, temp)
}
}
extrema <- na.omit(extrema)
Peak <- max(as.vector(extrema[,2]))
Trough <- min(as.vector(extrema[,2]))
PTR <- Peak / Trough
# Highlight the peak and trough
x_peak <- extrema[which(as.vector(extrema[,2]) == Peak), 1]
x_trough <- extrema[which(as.vector(extrema[,2]) == Trough), 1]
# Output
write.table(PTR, file = paste("PTR", basename(root_path[f]),".txt"))
jpeg(file = paste("Graph", basename(root_path[f]),".jpg"))
plot(y ~ x, pch = ".")
lines(l$fitted[1000:length(l$fitted)-1000] ~ x[1000:length(l$fitted)-1000], col = "red")
abline(h = Peak, v = x_peak, col = "blue")
abline(h = Trough, v = x_trough, col = "blue")
dev.off()
}
}
args <- process_arguments()
bam_into_PTR_and_graph_multiple_input(args$input, args$window_size, args$step_size, args$output)
toc()