/
Homework06.R
136 lines (93 loc) · 4.22 KB
/
Homework06.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
## Using the version of gradient descent from lecture 10 and the log-
## likelihood function from lab, maximize the likelihood of the gamma dis-
## tribution on the cats' hearts. Start the optimization at the estimate you
## get from the method of moments.
gamma.loglike <- function(params) {
return(sum(dgamma(cats$Hwt, shape = params[1], scale = params[2], log = TRUE)))
}
gradient <- function(f, x, deriv.steps, ...) {
p <- length(x)
stopifnot(length(deriv.steps) == p)
f.old <- f(x, ...)
x.new <- matrix(rep(x, times = p), nrow = p) + diag(deriv.steps, nrow = p)
f.new <- apply(x.new, 2, f, ...)
gradient <- (f.new - f.old)/deriv.steps
return(gradient)
}
gradient.descent <- function(f, x, max.iterations, step.scale,
stopping.deriv, ...) {
for (iteration in 1:max.iterations) {
grad <- gradient(f, x, ...)
if(all(abs(grad) < stopping.deriv)) { break() }
x <- x - step.scale*grad
}
fit <- list(argmin = x, final.gradient = grad, final.value = f(x),
iterations = iteration)
return(fit)
}
## What command do you use to maximize the log-likelihood?
## What is the estimate? What is the log-likelihood there? The gradient?
fit.mle <- gradient.descent(f = gamma.loglike, x = cat.hearts.est, max.iterations=1e4,
step.scale = c(-1e-2, -1e-4), stopping.deriv = 1e-4, deriv.steps = c(1e-6,1e-6))
fit.mle$argmin
fit.mle$final.value
fit.mle$final.gradient
## We need standard errors for the estimated parameters. If the model
## is right, we can get standard errors for maximum likelihood estimates from
## the second derivatives of the log-likelihood. (The second derivatives tell
## us how sharp the maximum of the likelihood is.) Speciffcally:
## Var(Theta) = -H^(-1)(Theta), where H is the Hessian of the log-likelihood,
## its matrix of second par tial derivatives. (This is sometimes called the observed
## information matrix.)
## Install the package numDeriv. Its function hessian will calculate
## a numerical approximation to the Hessian of a given function at a
## given point. What is the Hessian matrix of the log-likelihood at the
## MLE?
install.packages("numDeriv")
library(numDeriv)
H <- hessian(gamma.loglike, fit.mle$argmin)
## What is the inverse of the Hessian matrix?
inverse.H <- solve(H)
## What standard errors does this imply for the shape and scale
## estimate? Hint: Remember how the standard error of an estimate
## relates to the variance of the estimator.
var.estimates <- -inverse.H
se.estimates <- sqrt(var.estimates)
## An alternative to using the Hessian is to use the jack-knife. This does
## not assume that the model is correct, but does mean we need to be able
## to re-compute the MLE after deleting points from the data set.
## Write a function, make.gamma.loglike, which takes in a data
## vector x and returns a log-likelihood function. Check that if x is
## cats$Hwt, then make.gamma.loglike returns a function which matches
## your old gamma.loglike at multiple parameter values.
make.gamma.loglike <- function(data) {
gamma.loglike <- function(params) {
return(sum(dgamma(data, shape = params[1], scale = params[2], log = TRUE)))
}
return(gamma.loglike)
}
## Write a function to calculate jack-knife standard errors for the
## MLE of the gamma distribution on an arbitrary data vector. The
## only argument should be the data vector. It should return a vec-
## tor of length two, giving the standard errors for the shape and the
## scale parameters. Your code should use make.gamma.loglike and
## gradient.descent, and may use a for loop, or sapply. Hint: look
## at the solutions to lab 4.
gamma.MLE.jackknife <- function(data) {
n <- length(data)
jack.ests <- matrix(0, n, ncol=2)
for (i in 1:n) {
moments <- gamma.est(data[-i])
loglike <- make.gamma.loglike(data[-i])
jack.ests[i,] <- gradient.descent(f = loglike , x = moments, max.iterations=1e4,
step.scale = c(-1e-2, -1e-4), stopping.deriv = 1e-4, deriv.steps = c(1e-6,1e-6))$argmin
}
jack.var <- ((n-1)^2/n)*apply(jack.ests, 2, var)
jack.se <- sqrt(jack.var)
return(jack.se)
}
## Are the maximum-likelihood and the method-of-moments estimates
## compatible with each other?
signif(fit.mle$argmin - cat.hearts.est, 3)
## What is the likelihood (not the log-likelihood) at the maximum?
signif(exp(fit.mle$final.value), 3)