/
Homework04.R
187 lines (124 loc) · 5.91 KB
/
Homework04.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
## START WITH A POINT ESTIMATE. Write a function, gamma.est,
## which takes as input a vector of data values, and returns a vector
## containing the two estimated parameters of the gamma distribution,
## with components named shape and scale as appropriate.
gamma.est <- function(data) {
m <- mean(data)
v <- var(data)
scale <- v/m
shape <- m/scale
return(c(shape=shape,scale=scale))
}
## Verify that your function implements the appropriate formulas
## by showing that it matches the results in the lab solutions for all cats
## together, for the female cats, and for the male cats.
library(MASS)
data(cats)
gamma.est(cats$Hwt)
gamma.est(subset(cats, cats$Sex == "M")$Hwt)
gamma.est(subset(cats, cats$Sex == "F")$Hwt)
## DRAWS FROM THE GAMMA DISTRIBUTION. Generate a vector
## containing ten thousand random values from the gamma
## distribution with a = 19 and s = 0:56. What are the
## theoretical values of the mean and of the variance?
## What are their sample values?
random.gamma <- rgamma(n = 10000, shape = 19, scale = 0.56)
mean(random.gamma)
var(random.gamma)
# The theoretical mean is a*s, the theoretical variance is a*s^2.
## Plot the histogram of the random values, and add the curve of
## the theoretical probability density function.
hist(random.gamma, probability = TRUE, n = 31)
curve(dgamma(x, shape = 19, scale = 0.56), add = TRUE)
## Apply your gamma.est function to your random sample. Report
## the estimated parameters and how far they are from the true values.
gamma.est(random.gamma)
## TOP-LEVEL FUNCTION. Write a function, gamma.est.se, to calculate the
## standard error of your estimates of the gamma parameters, on simulated
## data drawn from the gamma distribution. It should take the following
## arguments: true shape parameter shape (or a), true scale parameter scale
## (or s), size of each sample n, and number of repetitions at that sample size
## B. It should return two standard errors, one for the shape parameter a and
## one for the scale parameter s. (These can be either in a vector or in a list,
## but should be named clearly.) It should call a function gamma.est.rep
## which takes the same arguments as gamma.est.se, and returns an array
## with two rows and B columns, one row holding shape estimates and the
## other row scale estimates. Your gamma.est.se function should not, itself,
## estimate any parameters or generate any random values.
gamma.est.se <- function (shape, scale, n, B) {
se.shape <- sd(gamma.est.rep(shape, scale, n, B)[1,])
se.scale <- sd(gamma.est.rep(shape, scale, n, B)[2,])
return(c(se.shape, se.scale))
}
## TESTING WITH A STUB. To check that gamma.est.se works properly,
## we write a stub or dummy version of gamma.est.rep, which takes the
## correct arguments and returns an array of the proper size, but whose
## entries are fixed so that it's easy for us to calculate what gamma.est.se
## ought to do.
## Write gamma.est.rep so that the entries in the first row of the
## returned array alternate between shape and shape+1, and those in
## the second row alternate between scale and scale+n. It should
## match the following, except for the row names, which are optional.
# gamma.est.rep(2,1,10,10)
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
# shapes 2 3 2 3 2 3 2 3 2 3
# scales 1 11 1 11 1 11 1 11 1 11
# gamma.est.rep(2,8,5,7)
# [,1] [,2] [,3] [,4] [,5] [,6] [,7]
# shapes 2 3 2 3 2 3 2
# scales 8 13 8 13 8 13 8
gamma.est.rep <- function(shape, scale, n, B) {
res.shape <- c()
res.scale <- c()
for (i in 1:B) {
if (i%%2 != 0) {
res.shape <- c(res.shape, shape)
res.scale <- c(res.scale, scale)
}
else {
res.shape <- c(res.shape, shape + 1)
res.scale <- c(res.scale, scale + n)
}
}
return(rbind(res.shape, res.scale))
}
## Calculate the standard deviations of each row in the two arrays
## above.
apply(gamma.est.rep(2,1,10,10), 1, sd)
apply(gamma.est.rep(2,8,5,7), 1, sd)
## Run your gamma.est.se, with this version of gamma.est.rep.
## Do its standard errors match the standard deviations you just calcu-
## lated? Should they?
gamma.est.se(2,1,10,10) # Yes.
gamma.est.se(2,8,5,7) # Yes.
## REPLACING THE STUB. Write the actual gamma.est.rep. Each of the B columns in its
## output should be the result of applying gamma.est to a vector of n
## random numbers generated by a different call to rgamma, all with the
## same shape and scale parameters. For full credit, use replicate
## rather than looping. Hint: Look at lecture 3, towards the end.
gamma.est.rep <- function(shape,scale,n,B) {
return(replicate(B, gamma.est(rgamma(n, shape = shape, scale = scale))))
}
## Run gamma.est.se, calling your new gamma.est.rep, with shape=2,
## scale=1, n=10 and B=1e5. Check that the standard error for shape
## is approximately 1.6 and that for scale approximately 0.54. Explain
## why this problem cannot give exact control values.
gamma.est.se(shape = 2, scale = 1, n = 10, B = 1e5)
## USING THE STANDARD ERRORS. Run gamma.est.se with the shape, scale, and sample size (n)
## parameters appropriate to the female cats, and B=1e5. Report both
## standard errors.
estimates.F <- gamma.est(subset(cats, cats$Sex == "F")$Hwt)
se.F <- gamma.est.se(shape = estimates.F["shape"], scale = estimates.F["scale"], n = length(subset(cats, cats$Sex == "F")$Hwt), B = 1e5)
se.F
## Repeat the previous problem with the male cats.
estimates.M <- gamma.est(subset(cats, cats$Sex == "M")$Hwt)
se.M <- gamma.est.se(shape = estimates.M["shape"], scale = estimates.M["scale"], n = length(subset(cats, cats$Sex == "M")$Hwt), B = 1e5)
se.M
## The standard error of a difference, d1-d2, is sqrt(se1^2 + se2^2).
## Calculate the standard errors for the two differences in parameter estimates
## between the two sexes of cat, and the ratio of the differences to their
## standard errors.
se.diff.shape <- sqrt(se.F[1]^2 + se.M[1]^2)
se.diff.scale <- sqrt(se.F[2]^2 + se.M[2]^2)
ratio.shape <- (se.F[1] - se.M[1])/se.diff.shape
ratio.scale <- (se.F[2] - se.M[2])/se.diff.scale