-
Notifications
You must be signed in to change notification settings - Fork 28
/
survey_mean.Rd
144 lines (120 loc) · 5.12 KB
/
survey_mean.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/survey_statistics.r
\name{survey_mean}
\alias{survey_mean}
\alias{survey_prop}
\title{Calculate mean/proportion and its variation using survey methods}
\usage{
survey_mean(
x,
na.rm = FALSE,
vartype = c("se", "ci", "var", "cv"),
level = 0.95,
proportion = FALSE,
prop_method = c("logit", "likelihood", "asin", "beta", "mean", "xlogit"),
deff = FALSE,
df = NULL,
...
)
survey_prop(
vartype = c("se", "ci", "var", "cv"),
level = 0.95,
proportion = TRUE,
prop_method = c("logit", "likelihood", "asin", "beta", "mean", "xlogit"),
deff = FALSE,
df = NULL,
...
)
}
\arguments{
\item{x}{A variable or expression, or empty}
\item{na.rm}{A logical value to indicate whether missing values should be dropped.
See the section "Missing Values" later in this help page.}
\item{vartype}{Report variability as one or more of: standard error ("se", default),
confidence interval ("ci"), variance ("var") or coefficient of variation
("cv").}
\item{level}{(For vartype = "ci" only) A single number or vector of numbers indicating
the confidence level}
\item{proportion}{Use methods to calculate the proportion that may have more accurate
confidence intervals near 0 and 1. Based on
\code{\link[survey]{svyciprop}}.}
\item{prop_method}{Type of proportion method to use if proportion is \code{TRUE}. See
\code{\link[survey]{svyciprop}} for details.}
\item{deff}{A logical value to indicate whether the design effect should be returned.}
\item{df}{(For vartype = "ci" only) A numeric value indicating the degrees of freedom
for t-distribution. The default (NULL) uses \code{\link[survey]{degf}},
but Inf is the usual survey package's default (except in
\code{\link[survey]{svyciprop}}.}
\item{...}{Ignored}
}
\description{
Calculate means and proportions from complex survey data.
\code{survey_mean} with \code{proportion = FALSE} (the default) or \code{survey_prop} with \code{proportion = FALSE}
is a wrapper around \code{\link[survey]{svymean}}.
\code{survey_prop} with \code{proportion = TRUE} (the default) or \code{survey_mean} with \code{proportion = TRUE}
is a wrapper around \code{\link[survey]{svyciprop}}.
\code{survey_mean} and \code{survey_prop} should always be called from \code{\link{summarise}}.
}
\details{
Using \code{survey_prop} is equivalent to leaving out the \code{x} argument in
\code{survey_mean} and setting \code{proportion = TRUE} and this calculates the proportion represented within the
data, with the last grouping variable "unpeeled". \code{\link{interact}}
allows for "unpeeling" multiple variables at once.
}
\section{Missing Values}{
When calculating proportions for a grouping variable \code{x}, \code{NA} values
will affect the estimated proportions unless they are first removed by calling
\code{filter(!is.na(x))}.
When calculating means for a numeric variable, equivalent results are obtained
by calling \code{filter(!is.na(x))} or using \code{survey_mean(x, na.rm = TRUE)}.
However, it is better to use \code{survey_mean(x, na.rm = TRUE)} if
you are simultaneously producing summaries for other variables
that might not have missing values for the same rows as \code{x}.
}
\examples{
data(api, package = "survey")
dstrata <- apistrat \%>\%
as_survey_design(strata = stype, weights = pw)
dstrata \%>\%
summarise(api99_mn = survey_mean(api99),
api_diff = survey_mean(api00 - api99, vartype = c("ci", "cv")))
dstrata \%>\%
group_by(awards) \%>\%
summarise(api00 = survey_mean(api00))
# Use `survey_prop` calculate the proportion in each group
dstrata \%>\%
group_by(awards) \%>\%
summarise(pct = survey_prop())
# Or you can also leave out `x` in `survey_mean`, so this is equivalent
dstrata \%>\%
group_by(awards) \%>\%
summarise(pct = survey_mean())
# When there's more than one group, the last group is "peeled" off and proportions are
# calculated within that group, each adding up to 100\%.
# So in this example, the sum of prop is 200\% (100\% for awards=="Yes" &
# 100\% for awards=="No")
dstrata \%>\%
group_by(stype, awards) \%>\%
summarize(prop = survey_prop())
# The `interact` function can help you calculate the proportion over
# the interaction of two or more variables
# So in this example, the sum of prop is 100\%
dstrata \%>\%
group_by(interact(stype, awards)) \%>\%
summarize(prop = survey_prop())
# Setting proportion = TRUE uses a different method for calculating confidence intervals
dstrata \%>\%
summarise(high_api = survey_mean(api00 > 875, proportion = TRUE, vartype = "ci"))
# level takes a vector for multiple levels of confidence intervals
dstrata \%>\%
summarise(api99 = survey_mean(api99, vartype = "ci", level = c(0.95, 0.65)))
# Note that the default degrees of freedom in srvyr is different from
# survey, so your confidence intervals might not be exact matches. To
# Replicate survey's behavior, use df = Inf
dstrata \%>\%
summarise(srvyr_default = survey_mean(api99, vartype = "ci"),
survey_defualt = survey_mean(api99, vartype = "ci", df = Inf))
comparison <- survey::svymean(~api99, dstrata)
confint(comparison) # survey's default
confint(comparison, df = survey::degf(dstrata)) # srvyr's default
}