/
resampling_corr.R
84 lines (70 loc) · 3.51 KB
/
resampling_corr.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# Sanity check: We repeat random imputation of missing correlation between the two measures (done in data_completion.R for the main database), and recompute effect sizes 100 times to check it doesn't change the results.
#set up the dataframe that will contain the results after resampling (one line per resampling)
resamp_stats = base_model$reg_table
resamp_prob = base_model$reg_table$prob
for (s in 1:100){
data = DB2 #make a copy of the database without the imputed correlations
data$corr_imputed[is.na(data$corr)==T] = NA
data$corr_imputed[data$participant_design == "within_two"] <- impute(data$corr[data$participant_design == "within_two"],fun='random')
# recompute effect sizes
# empty the relevant variables
data$d_calc = NA
data$d_var_calc = NA
data$g_calc = NA
data$g_var_calc = NA
data$es_method = NA
#effect size calculation
for (i in 1:nrow(data)){
db = data[i,]
if (is.na(db$corr) | db$corr > .99 | db$corr < .01){
#if correlation between two measures is not reported, use an imputed correlation value
#we also account for the observation that some re-calculated values are impossible and replace those
corr <- db$corr_imputed
}else{corr <- db$corr}
if (complete(db$x_1, db$x_2, db$SD_1, db$SD_2)) {
pooled_SD <- sqrt((db$SD_1 ^ 2 + db$SD_2 ^ 2) / 2) # Lipsey & Wilson (2001)
d_calc <- (db$x_1 - db$x_2) / pooled_SD # Lipsey & Wilson (2001)
es_method <- "group_means_two"
} else if (complete(db$t)) {
wc <- sqrt(2 * (1 - corr))
d_calc <- (db$t / sqrt(db$n_1)) * wc #Dunlap et al., 1996, p.171
es_method <- "t_two"
} else if (complete(db$F)) {
wc <- sqrt(2 * (1 - corr))
d_calc <- sqrt(db$F / db$n_1) * wc
es_method <- "f_two"
} else {d_calc = NA}
#now that effect sizes are calculated, effect size variance is calculated
if (complete(db$n_1, d_calc)) {
d_var_calc <- (2 * (1 - corr)/ db$n_1) + (d_calc ^ 2 / (2 * db$n_1)) # Lipsey & Wilson (2001)
} else if (complete(db$d, db$d_var)) {
#if d and d_var were already reported, use those values
d_calc <- db$d
d_var_calc <- db$d_var
es_method <- "d_two"
} else {d_var_calc = NA}
df <- db$n_1 - 1
J <- 1 - 3 / (4 * (df - 1))
g_calc <- d_calc * J
g_var_calc <- J ^ 2 * d_var_calc
#add the results to the database
db$d_calc = d_calc
db$d_var_calc = d_var_calc
db$g_calc = g_calc
db$g_var_calc = g_var_calc
db$es_method = es_method
data[i,] = db
}
#Mark effect sizes more than 3 SD away from the mean effect (in both positive and negative directions) as outliers
data$outlier <- F #create the variable, and set as no by default (majority of cases hopefully!)
data$outlier[data$d_calc > mean(data$d_calc, na.rm = TRUE) + 3*sd(data$d_calc, na.rm = TRUE) | data$d_calc < mean(data$d_calc, na.rm = TRUE) - 3*sd(data$d_calc, na.rm = TRUE)]<-T
# Re-fit the meta-analytic model
#average variance / infant group to build models with CORR, and allow robumeta to detect correlated effect sizes within papers.
data = data %>% group_by(same_infant) %>% mutate(g_var_m = mean(g_var_calc))
base <- robu(g_calc ~ 1, data=data, modelweights = "CORR", studynum = same_infant, var.eff.size = g_var_m, small = T)
resamp_stats = bind_rows(resamp_stats,base$reg_table)
}
# check that the results don't change in 95 % of cases
resamp_alphas <- ifelse(resamp_stats$prob < 0.05, 1, 0)
resamp_alphas <- length(resamp_alphas[resamp_alphas==0])/length(resamp_alphas)
resamp_alphas