/
sim dhs dat.R
137 lines (123 loc) · 5.28 KB
/
sim dhs dat.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
## Simulate DHS like data to be fit to make sure everything is working
## fine.
######################################################################
## Steve Bellan, February 2012
## sbellan@berkeley.edu / steve.bellan@gmail.com
######################################################################
## Set directory where output is to be written
if(getwd()=="/home/ubuntu/files") # if on cloud
{
setwd("~/files/")
}else{ # if on sb mac
setwd("~/Dropbox/disc mod backup/Couples Model Revision 120811/R scripts & Data Files/")
}
## load cleaned DHS data sets for all countries
load("alldhs.Rdata")
## load cleaned epidemic curves (w/ art-normalized prevalence for
## 15-49 by M & F
load("allepicm.Rdata")
load("allepicf.Rdata")
## load survival curve cs, monthly survival
load("csurv.Rdata")
######################################################################
## non western-africa indices are
######################################################################
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] "DRC" "Ethiopia" "Kenya" "Lesotho" "Malawi" "Rwanda" "Swaziland" "WA"
## [,9] [,10]
## [1,] "Zambia" "Zimbabwe"
######################################################################
## sample sizes
## DRC Ethiopia Kenya Lesotho Malawi Rwanda Swaziland WA
## 1199 1650 1620 1099 3045 1749 431 7915
## Zambia Zimbabwe
## 1598 1271
source("pcalc5.R")
if(!"group.ind" %in% ls()) group.ind <- 9
if(!"survive" %in% ls()) survive <- T
if(!"K.sim" %in% ls()) K.sim <- 1000
## Use this ds' couple & sex duration distributions in simulation.
group <- levels(dat$group)[group.ind]
ds <- levels(dat$ds)[grepl(group, levels(dat$ds))] # data set we're working on
dat <- dat[dat$ds %in% ds,] # only looking at that data set
print(paste("real",group,"serostatus"))
print(table(dat$ser))
odat <- dat[sample(1:nrow(dat), K.sim, replace = T),]
odat$ds <- paste(group, "simulated")
odat$group <- odat$ds
odat$ser <- NA
## Get before couple duration bd, where bd = max(mbd,fbd)+1
odat$bd <- apply(cbind(odat$tmar-odat$tms,odat$tmar-odat$tfs), 1, max) + 1
## Get couple duration cd
odat$cd <- odat$tint - odat$tmar
## Sexual partners per year of activity (s p acquisition rate: mspar, fspar)
odat$mspar <- odat$mlsp / (odat$tint - odat$tms + 1) * 12
odat$fspar <- odat$flsp / (odat$tint - odat$tfs + 1) * 12
if(!"survive" %in% ls()) survive <- T
## if(!"simpars" %in% ls()) # if sim pars is not already loaded (because normally sourcing from areldisc
## {
simpars <- c(bmb = .01, bfb = .02,
bme = .005, bfe=.005,
bmp=.015, lrho=0)
## }
load("pars.arr.Rdata") # load simpars from last fitted analysis for that country
if(!"heter" %in% ls()) heter <- F # default for individual heterogeneity in simulation
if(heter)
{
simpars[1:6] <- pars.arr[1:6,2, group.ind]
simpars[5] <- .9* simpars[5]
simpars[1:2] <- .5*simpars[1:2]
simpars[3:4] <- .7*simpars[3:4]
simpars[6] <- 0
}
K <- nrow(odat)
if(survive)
{
simout <- pcalc(simpars, dat = odat, trace = F, give.pis=T, heter = heter, lrho.sd = lrho.sd,
survive = survive, sim = T, browse=F)
}else{
simout <- pcalc(simpars, dat = odat, trace = F, give.pis=F, heter = heter, lrho.sd = lrho.sd,
survive = survive, sim = T, browse=F)
}
simdat <- simout$dat
simdat$ser <- NA
simdat$ser[simdat$cat.nm == "s..A"] <- 4
simdat$ser[simdat$cat.nm %in% c("mb.D", "me.D", "mb.A", "me.A")] <- 2
simdat$ser[simdat$cat.nm %in% c("f.bD", "f.eD", "f.bA", "f.eA")] <- 3
simdat$ser[simdat$cat.nm %in% c("hb1b2D", "hb2b1D", "hbeD","hebD", "hbpD", "hpbD", "hepD", "hpeD", "he1e2D", "he2e1D",
"hb1b2A", "hb2b1A", "hbeA","hebA", "hbpA", "hpbA", "hepA", "hpeA", "he1e2A", "he2e1A")] <- 1
simdat$alive <- T
simdat$alive[simdat$cat.nm %in% levels(simdat$cat.nm)[grepl("D",levels(simdat$cat.nm))]] <- F
allstates <- simout$allstates
## print(paste("simulated",group,"serostatus"))
## print(table(simdat$ser))
## print("dead couples")
## print(table(simdat$ser[!simdat$alive]))
## print("live couples")
## print(table(simdat$ser[simdat$alive]))
K <- nrow(simdat)
print("real dat serostatus breakdown")
print(round(xtabs(~ser,dat) / nrow(dat), 3))
print("simulated dat serostatus breakdown")
print(round(xtabs(~ser, simdat, subset = alive) / sum(simdat$alive), 3))
simdat <- data.frame(simdat, m.het = simout$m.het, f.het = simout$f.het)
save(simdat, file = "simulated dat.Rata")
save.image("workspace.Rdata")
simdat <- simdat[simdat$alive,]
cols <- c("yellow","green","purple","dark gray")
par(mar= rep(5,4))
plot(log(simdat$m.het), log(simdat$f.het), pch = 21, col = cols[simdat$ser], cex = 1, bty = "n",
xlab = "male log RF", ylab = "female log RF", lwd = 1.5)
## reload dhs data and add the columns necessry to bind it with the simulation
load("alldhs.Rdata")
dat$bd <- apply(cbind(dat$tmar-dat$tms,dat$tmar-dat$tfs), 1, max) + 1
dat$cd <- dat$tint - dat$tmar
dat$cat <- NA
dat$cat.nm <- NA
dat$alive <- NA
dat$mspar <- NA
dat$fspar <- NA
dat$m.het <- NA
dat$f.het <- NA
dat <- rbind(dat, simdat)
save(dat, file="alldhs plus sim.Rdata")