/
Mariam_et_al_ACCORD_C4_PS_application.Rmd
143 lines (116 loc) · 4.27 KB
/
Mariam_et_al_ACCORD_C4_PS_application.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
---
title: "Prediction of individuals benefitting from intensive glycemia control"
author: "Rotroff Lab"
date: "`r Sys.Date()`"
output:
md_document:
variant: markdown_github
---
# Libraries
```{r}
library(bigstatsr)
library(data.table)
library(bigsnpr)
library(tidyverse)
theme_set(theme_bw())
```
# Load genetic data
`.bed` files were created by using plink with the following parameters:
* MAF: 0.03
* geno: 0.1
## Convert plink files to `.rds` object
This code will create a `.rds` object from the plink files and only needs to be run once. An error will be thrown if the given directory containing (`sample_dir` in this example) does not contain other associated plink files e.g. `.fam` as well.
```{r}
dir<-"sample_dir/sample.bed"
bed<-snp_readBed(dir) #creates an .rds file- neceassry for the following steps.
```
## Attach and impute genetic data
`.rds` file from the previous step will be attached for further analyses.
```{r}
dir<-"sample_dir/sample.rds" #rds file generated from the previous code chunk
obj.bigSNPall <- snp_attach(dir)
G<-obj.bigSNPall$genotypes
G.imp<-snp_fastImputeSimple(G)
y<-obj.bigSNPall$fam$affection-1
big_counts(G.imp,ind.col = 1:10)
```
# Load Models
## SCT-PS
```{r}
# load sct-ps betas
all_snps<-fread("Mariam_et_al_ACCORD_C4_SCT-PS.csv",data.table = F,
skip=1)
sumstats<-all_snps[,c('chr', 'rsid', 'pos', 'a0', 'a1', 'beta', 'p')]
sumstats<-sumstats[which(sumstats$beta!=0),]
map <- obj.bigSNPall$map[,-(2:3)]
names(map) <- c("chr", "pos", "a0", "a1")
info_snp <- snp_match(sumstats, map)
x<-obj.bigSNPall$map$marker.ID[which(obj.bigSNPall$map$marker.ID %in% info_snp$rsid)]
info_sct_ps<-info_snp[order(match(info_snp$rsid,x)),]
# load sct-ps model
final_model<-read_rds("Mariam_et_al_ACCORD_C4_SCT-PS.rds")
summary(final_model$mod)
```
## CT-PS Model
```{r}
# load ct-ps betas
all_snps<-fread("Mariam_et_al_ACCORD_C4_CT-PS.csv",data.table = F,
skip = 1)
sumstats<-all_snps[,c('chr', 'rsid', 'pos', 'a0', 'a1', 'beta', 'p')]
sumstats$beta<-as.numeric(sumstats$beta)
map <- obj.bigSNPall$map[,-(2:3)]
names(map) <- c("chr", "pos", "a0", "a1")
info_snp <- snp_match(sumstats, map)
x<-obj.bigSNPall$map$marker.ID[which(obj.bigSNPall$map$marker.ID %in% info_snp$rsid)]
info_ct_ps<-info_snp[order(match(info_snp$rsid,x)),]
```
# Apply PS
## SCT-PS
1. SNP data is subset down to contain only SNPs involved in SCT-PS.
2. SCT-PS scores are calculated.
3. A pre-specified threshold is used to call individuals predicted to benefit from intensive treatment.
```{r}
G.imp.sub<-big_copy(G.imp,
ind.row = 1:nrow(G.imp),
ind.col=which(obj.bigSNPall$map$marker.ID %in% info_sct_ps$rsid))
CHR.sub <- obj.bigSNPall$map$chromosome[which(obj.bigSNPall$map$marker.ID %in% info_sct_ps$rsid)]
POS.sub <- obj.bigSNPall$map$physical.pos[which(obj.bigSNPall$map$marker.ID %in% info_sct_ps$rsid)]
beta <- (info_sct_ps$beta)
lpval <- -log10(info_sct_ps$p)
threshold<- -0.6060066
pred<-final_model$intercept+
big_prodVec(G.imp.sub,beta)
preds.sct_ps <- data.frame(cbind(obj.bigSNPall$fam$family.ID,pred,
ifelse(pred>threshold, 1, 0)))
colnames(preds.sct_ps)<-c("ID","sctPS","prediction")
table(preds.sct_ps[,3])
```
## CT-PS
1. SNP data is subset down to contain only SNPs involved in CT-PS.
2. CT-PS scores are calculated.
3. A pre-specified threshold is used to call individuals predicted to benefit from intensive treatment.
```{r}
G.imp.sub<-big_copy(G.imp,
ind.row = 1:nrow(G.imp),
ind.col=which(obj.bigSNPall$map$marker.ID %in% info_ct_ps$rsid))
CHR.sub <- obj.bigSNPall$map$chromosome[which(obj.bigSNPall$map$marker.ID %in% info_ct_ps$rsid)]
POS.sub <- obj.bigSNPall$map$physical.pos[which(obj.bigSNPall$map$marker.ID %in% info_ct_ps$rsid)]
beta <- (info_ct_ps$beta)
lpval <- -log10(info_ct_ps$p)
threshold<- 5463.685
ct_ps<-snp_PRS(G.imp.sub, beta,
lpS.keep = lpval, thr.list = 1.073004)
preds.ct_ps <- data.frame(cbind(obj.bigSNPall$fam$family.ID,ct_ps,
ifelse(ct_ps>threshold, 1, 0)))
colnames(preds.ct_ps)<-c("ID","ctPS","prediction")
table(preds.ct_ps[,3])
```
# Save Output
```{r}
write.csv(preds.sct_ps,"sct_ps_preds.csv",row.names = F)
write.csv(preds.ct_ps,"ct_ps_preds.csv",row.names = F)
```
# SessionInfo
```{r}
sessionInfo()
```