Homework04.R

## START WITH A POINT ESTIMATE. Write a function, gamma.est, 
## which takes as input a vector of data values, and returns a vector
## containing the two estimated parameters of the gamma distribution,
## with components named shape and scale as appropriate.

gamma.est <- function(data) {
  m <- mean(data)
  v <- var(data)
  scale <- v/m
  shape <- m/scale
  return(c(shape=shape,scale=scale))
}


## Verify that your function implements the appropriate formulas
## by showing that it matches the results in the lab solutions for all cats
## together, for the female cats, and for the male cats.

library(MASS)
data(cats)

gamma.est(cats$Hwt)
gamma.est(subset(cats, cats$Sex == "M")$Hwt)
gamma.est(subset(cats, cats$Sex == "F")$Hwt)


## DRAWS FROM THE GAMMA DISTRIBUTION. Generate a vector
## containing ten thousand random values from the gamma 
## distribution with a = 19 and s = 0:56. What are the
## theoretical values of the mean and of the variance? 
## What are their sample values?

random.gamma <- rgamma(n = 10000, shape = 19, scale = 0.56)
mean(random.gamma)
var(random.gamma)

# The theoretical mean is a*s, the theoretical variance is a*s^2.


## Plot the histogram of the random values, and add the curve of
## the theoretical probability density function.

hist(random.gamma, probability = TRUE, n = 31)
curve(dgamma(x, shape = 19, scale = 0.56), add = TRUE)


## Apply your gamma.est function to your random sample. Report
## the estimated parameters and how far they are from the true values.

gamma.est(random.gamma)


## TOP-LEVEL FUNCTION. Write a function, gamma.est.se, to calculate the
## standard error of your estimates of the gamma parameters, on simulated
## data drawn from the gamma distribution. It should take the following
## arguments: true shape parameter shape (or a), true scale parameter scale
## (or s), size of each sample n, and number of repetitions at that sample size
## B. It should return two standard errors, one for the shape parameter a and
## one for the scale parameter s. (These can be either in a vector or in a list,
## but should be named clearly.) It should call a function gamma.est.rep
## which takes the same arguments as gamma.est.se, and returns an array
## with two rows and B columns, one row holding shape estimates and the
## other row scale estimates. Your gamma.est.se function should not, itself,
## estimate any parameters or generate any random values.

gamma.est.se <- function (shape, scale, n, B) {
	se.shape <- sd(gamma.est.rep(shape, scale, n, B)[1,])
	se.scale <- sd(gamma.est.rep(shape, scale, n, B)[2,])
	return(c(se.shape, se.scale))
}


## TESTING WITH A STUB. To check that gamma.est.se works properly,
## we write a stub or dummy version of gamma.est.rep, which takes the
## correct arguments and returns an array of the proper size, but whose
## entries are fixed so that it's easy for us to calculate what gamma.est.se
## ought to do.

## Write gamma.est.rep so that the entries in the first row of the
## returned array alternate between shape and shape+1, and those in
## the second row alternate between scale and scale+n. It should
## match the following, except for the row names, which are optional.

# gamma.est.rep(2,1,10,10)
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
# shapes 2 3 2 3 2 3 2 3 2 3
# scales 1 11 1 11 1 11 1 11 1 11

# gamma.est.rep(2,8,5,7)
# [,1] [,2] [,3] [,4] [,5] [,6] [,7]
# shapes 2 3 2 3 2 3 2
# scales 8 13 8 13 8 13 8

gamma.est.rep <- function(shape, scale, n, B) {
	res.shape <- c()
	res.scale <- c()
	for (i in 1:B) {
		if (i%%2 != 0) { 
			res.shape <- c(res.shape, shape)
			res.scale <- c(res.scale, scale)
		}
		else {
			res.shape <- c(res.shape, shape + 1)
			res.scale <- c(res.scale, scale + n)
		}
	}
	return(rbind(res.shape, res.scale))
}


## Calculate the standard deviations of each row in the two arrays
## above.

apply(gamma.est.rep(2,1,10,10), 1, sd)
apply(gamma.est.rep(2,8,5,7), 1, sd)


## Run your gamma.est.se, with this version of gamma.est.rep.
## Do its standard errors match the standard deviations you just calcu-
## lated? Should they?

gamma.est.se(2,1,10,10)		# Yes.
gamma.est.se(2,8,5,7)		# Yes.


## REPLACING THE STUB. Write the actual gamma.est.rep. Each of the B columns in its
## output should be the result of applying gamma.est to a vector of n
## random numbers generated by a different call to rgamma, all with the
## same shape and scale parameters. For full credit, use replicate
## rather than looping. Hint: Look at lecture 3, towards the end.

gamma.est.rep <- function(shape,scale,n,B) {
	return(replicate(B, gamma.est(rgamma(n, shape = shape, scale = scale))))
}


## Run gamma.est.se, calling your new gamma.est.rep, with shape=2,
## scale=1, n=10 and B=1e5. Check that the standard error for shape
## is approximately 1.6 and that for scale approximately 0.54. Explain
## why this problem cannot give exact control values.

gamma.est.se(shape = 2, scale = 1, n = 10, B = 1e5)


## USING THE STANDARD ERRORS. Run gamma.est.se with the shape, scale, and sample size (n)
## parameters appropriate to the female cats, and B=1e5. Report both
## standard errors.

estimates.F <- gamma.est(subset(cats, cats$Sex == "F")$Hwt)
se.F <- gamma.est.se(shape = estimates.F["shape"], scale = estimates.F["scale"], n = length(subset(cats, cats$Sex == "F")$Hwt), B = 1e5)
se.F


## Repeat the previous problem with the male cats.
estimates.M <- gamma.est(subset(cats, cats$Sex == "M")$Hwt)
se.M <- gamma.est.se(shape = estimates.M["shape"], scale = estimates.M["scale"], n = length(subset(cats, cats$Sex == "M")$Hwt), B = 1e5)
se.M


## The standard error of a difference, d1-d2, is sqrt(se1^2 + se2^2). 
## Calculate the standard errors for the two differences in parameter estimates
## between the two sexes of cat, and the ratio of the differences to their
## standard errors.

se.diff.shape <- sqrt(se.F[1]^2 + se.M[1]^2)
se.diff.scale <- sqrt(se.F[2]^2 + se.M[2]^2)

ratio.shape <- (se.F[1] - se.M[1])/se.diff.shape
ratio.scale <- (se.F[2] - se.M[2])/se.diff.scale