/
AgentGen-v1.1.3.R
142 lines (124 loc) · 8.01 KB
/
AgentGen-v1.1.3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# AgentGen-v1.1.3.R is part of Food INdustry CoViD Control Tool
# (FInd CoV Control), version 1.1.3.
# Copyright (C) 2020-2021 Cornell University.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
# create database of all agents and their attributes
#The default age_probabilities vector is based on finding the MLE 4-parameter
#beta distribution that best fits age category data on agricultural workers
AgentGen <- function (N, E0 = 1, IA0 = 0, IP0 = 0, IM0 = 0, initial_recovered = 0,
initial_V1 = 0, initial_V2 = 0,
age_probabilities = c(0.04, 0.26, 0.26, 0.21, 0.15, 0.07,
0.01, 0),
SEVERE_MULTIPLIER = 1) {
Age_Categories = c("10-19", "20-29", "30-39", "40-49", "50-59", "60-69",
"70-79", "80+")
##duration_E calculations
mu = 5.2
sd = .1
sdlog = sqrt(log(sd**2 / mu**2 + 1))
mulog = log(mu) - (sdlog ** 2) / 2
duration_IP = rgamma(N, shape=1.058, scale=2.174)
duration_E = pmax(rlnorm(N, mulog, sdlog) - duration_IP, 0) #Moghadas et al., 2020 & need for a non-negative duration
#Create a population of susceptibles
agents <- data.frame(ID = 1:N, #Unique ID number,
#Allows contact tracing (in a future version)
state = "S", #Initially susceptible
#Will be overwritten later in this
#function for some employees.
infector_ID = 0,
infector_state = '', #state of source of infection (if any)
mixing_propensity = 1, #leaving this in as a trivial
#property, for easier
#reintroduction later
#currently, it is unused
#The following are conceptualized as time *of*
#transition into the state in question, not time
#*since* transition. Thus, the value for a state that
#has not (yet) been reached (and hence, a transition
#that has not happened (yet) is infinity, not 0.
#This may in the future be changed to NA.
time_E = Inf,
time_IA = Inf,
time_IP = Inf,
time_IM = Inf,
time_IS = Inf,
time_IC = Inf,
time_R = Inf,
time_V1 = Inf,
time_V2 = Inf,
time_isolated = Inf, #setup for time in Isolation
#unlike the states listed above,
#this is not a mutually exclusive
#state, e.g., someone who can be
#both IM and isolated.
time_tested = -Inf, #Unlike most times, we want the "last time at which this person was tested," for someone who has never been tested, to be LESS than the last time at which someone was tested who has ever been tested; hence the use of -Inf instead of Inf.
isolated = FALSE, #initially
Age_Cat = sample(Age_Categories, N, replace = TRUE,
prob = age_probabilities),
duration_E = duration_E,
duration_IA = rgamma(N, shape=5, scale=1), # Moghadas et al., 2020
duration_IP = duration_IP, # Moghadas et al., 2020
duration_IM = rgamma(N, shape=16, scale=0.5), #Michelle based on Kerr et al
duration_IS = rgamma(N, shape=34.0278, scale=0.4114), #Michelle based on Kerr et al
duration_IC = rgamma(N, shape=34.0278, scale=0.4114), #Michelle based on Kerr et al
stringsAsFactors = FALSE ) #"stringsAsFactors = FALSE" is to allow other states later on
#pre-calculating these for clarity and ease of debugging
#as a bonus, the use of seq_len in this fashion means that guarding the
#operations with if statements is not necessary (to avoid the problem where
#R interprets "1:0" as "c(1,0)", rather than "numeric(0)").
index_E = seq_len(E0)
index_IA = E0 + seq_len(IA0)
index_IP = E0 + IA0 + seq_len(IP0)
index_IM = E0 + IA0 + IP0 + seq_len(IM0)
index_R = E0 + IA0 + IP0 + IM0 + seq_len(initial_recovered)
index_V2 = E0 + IA0 + IP0 + IM0 + initial_recovered + seq_len(initial_V2)
index_V1 = E0 + IA0 + IP0 + IM0 + initial_recovered + initial_V2 + seq_len(initial_V1)
agents$state[index_E]= "E"
agents$time_E[index_E]= -runif(E0, 0, agents$duration_E[index_E])
agents$state[index_IA] = "IA"
agents$time_IA[index_IA]= -runif(IA0, 0, agents$duration_IA[index_IA])
agents$state[index_IP]= "IP"
agents$time_IP[index_IP]= -runif(IP0, 0, agents$duration_IP[index_IP])
agents$state[index_IM]= "IM"
agents$time_IM[index_IM]= -runif(IM0, 0, agents$duration_IM[index_IM])
agents$state[index_R] = 'R'
agents$time_R[index_R]= -runif(initial_recovered, 0, 365) #adequate for now; may required revision when immune waning is added
agents$state[index_V1] = 'V1'
agents$time_V1[index_V1] = -runif(initial_V1, 0, 21) #not a perfect model of reality, but good enough
agents$state[index_V2] = 'V2'
agents$time_V2[index_V2] = -runif(initial_V2, 0, 21) #even worse, but adequate for now; may require revision when booster shots are added
agents$time_V1[index_V2] = agents$time_V2[index_V2] - 21 #again, not perfect, but doesn't actually matter (currently, and probably ever)
#generating transition times before the most recent
#maybe not necessary, but seems like a good guard against weird bugs
agents$time_IP[index_IM] = (agents$time_IM[index_IM] -
agents$duration_IP[index_IM])
agents$time_E[index_IP] = (agents$time_IP[index_IP] -
agents$duration_E[index_IP])
agents$time_E[index_IA] = (agents$time_IA[index_IA] -
agents$duration_E[index_IA])
#Import text file of disease progression probabilities
Probability_Matrix <- read.csv('Probability_Matrix.csv')
#Merge parameters for disease progression (Psymptomatic, Psevere, and Pcritical) from table into data frame
agents=merge.data.frame(agents, Probability_Matrix, by="Age_Cat", all.x=TRUE)
#pre-calculating, as with times
agents$symptomatic = rbinom(N, 1, agents$p_symptomatic) > 0
agents$severe = rbinom(N, 1, SEVERE_MULTIPLIER * agents$p_severe) > 0
agents$critical = rbinom(N, 1, agents$p_critical) > 0
agents$death = rbinom(N, 1, agents$p_death) > 0
agents = agents[sample(nrow(agents)),] # this is a reshuffled database, to randomize order of agents
# reminder: Age_Cat is first column
return(agents)
}