-
Notifications
You must be signed in to change notification settings - Fork 2
/
booteffectsize.m
403 lines (366 loc) · 13.3 KB
/
booteffectsize.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
function [d,ci,stats,dist] = booteffectsize(x,m,varargin)
%BOOTEFFECTSIZE Effect size with bootstrapped confidence intervals.
% D = BOOTEFFECTSIZE(X) returns the effect size measure for a single
% sample X based on Cohen's d. By default, Cohen's d is bias-corrected
% for sample size, also known as Hedges' g. For ordinal data, Cliff's
% delta can be computed by setting the 'effect' parameter to 'cliff'.
%
% If X is a matrix, separate effect sizes are measured along each column
% of X, and a vector of results is returned. If the 'compare' parameter
% is set to 'pairwise', the effect sizes between every pair of columns in
% X are measured, and a matrix of results is returned.
%
% BOOTEFFECTSIZE treats NaNs as missing values, and ignores them.
%
% D = BOOTEFFECTSIZE(X,M) returns the effect size measure for a single
% sample X with a known mean M. M must be a scalar.
%
% D = BOOTEFFECTSIZE(X,Y) returns the effect size between two dependent
% samples X and Y using the pooled standard deviation. X and Y can be
% treated as independent samples by setting the 'paired' parameter to 0.
% If X and Y are independent samples with significantly different
% variances, an estimate based on the control sample's variance (Glass'
% delta) can be computed by setting the 'effect' parameter to 'glass'.
% For this, the control sample should be entered as X, and the test
% sample as Y.
%
% [D,CI] = BOOTEFFECTSIZE(...) returns the bootstrapped, bias-corrected
% confidence intervals using the percentile method.
%
% [D,CI,STATS] = BOOTEFFECTSIZE(...) returns a structure with the
% following fields:
% 'df' -- the degrees of freedom of each measure
% 'sd' -- the pooled standard deviation, or of X for a one-
% sample or Glass' delta measure
%
% [D,CI,STATS,DIST] = BOOTEFFECTSIZE(...) returns the bootstrapped
% sampling distribution of the effect size measure.
%
% [...] = BOOTEFFECTSIZE(...,'PARAM1',VAL1,'PARAM2',VAL2,...) specifies
% additional parameters and their values. Valid parameters are the
% following:
%
% Parameter Value
% 'alpha' A scalar between 0 and 1 specifying the confidence
% level as 100*(1-ALPHA)% (default=0.05 for 95%
% confidence).
% 'dim' A scalar specifying the dimension to work along: pass
% in 1 to work along the columns (default), or 2 to work
% along the rows. Applies to both X and Y.
% 'paired' A numeric scalar (0,1) or logical indicating whether
% the data in X and Y are paired: pass in 1 for paired
% samples (default), or 0 for unpaired samples.
% 'effect' A string specifying the effect size to measure:
% 'cohen' standardised mean difference based on
% Cohen's d (default)
% 'glass' standardised mean difference based on
% Glass' delta for comparing independent
% samples with significantly different
% variances
% 'cliff' unstandardised mean difference based
% on Cliff's delta for comparing ordinal
% data
% 'meandiff' unstandardised mean difference
% 'mediandiff' unstandardised median difference
% 'vartype' A string specifying the variance equivalence of X and Y
% to determine the SD and degrees of freedom:
% 'equal' assume equal variances (default)
% 'unequal' assume unequal variances
% 'compare' A string specifying what to compare each variable to
% when only X is entered:
% 'zero' compare each column of X to zero and
% return a vector of results (default)
% 'pairwise' compare every pair of columns in X to
% each other and return a matrix of
% results
% 'nboot' A scalar specifying the number of bootstraps used to
% estimate the confidence intervals (default=10,000).
% 'correct' A numeric scalar (0,1) or logical indicating whether to
% bias-correct the effect size and confidence intervals
% according to sample size (default=true). Note, this
% only applies to standardised effect size measures.
% 'rows' A string specifying the rows to use in the case of any
% missing values (NaNs):
% 'all' use all rows, even with NaNs (default)
% 'complete' use only rows with no NaNs
% 'seed' A scalar integer specifying the seed used to initialise
% the bootstrap generator. By default, the generator is
% initialised based on the current time, resulting in a
% different bootstrap each time.
%
% See also MEANEFFECTSIZE BOOTCI PERMUTTEST PERMUTTEST2 PERMUVARTEST2.
%
% PERMUTOOLS https://github.com/mickcrosse/PERMUTOOLS
% References:
% [1] Crosse MJ, Foxe JJ, Molholm S (2024) PERMUTOOLS: A MATLAB
% Package for Multivariate Permutation Testing. arXiv 2401.09401.
% [2] Hentschke H, Stuttgen MC (2011) Computation of measures of
% effect size for neuroscience data sets. Eur J Neurosci,
% 34:1887–1894.
% [3] Cohen J (1969) Statistical power for the behavioural sciences.
% London: Academic Press.
% [4] Hedges LV, Olkin I (1985) Statistical methods for meta-
% analysis. San Diego, CA: Academic Press.
% © 2018-2023 Mick Crosse <crossemj@tcd.ie>
% CNL, Albert Einstein College of Medicine, NY.
% TCBE, Trinity College Dublin, Ireland.
if nargin < 2 || isempty(m)
y = [];
elseif isscalar(m)
y = [];
x = x-m;
elseif ismatrix(m)
y = m;
end
% Parse input arguments
arg = ptparsevarargin(varargin);
% Validate input parameters
ptvalidateparamin(x,y,arg)
% Orient data column-wise
if arg.dim==2 || isrow(x)
x = x';
end
if ~isempty(y) && (arg.dim==2 || isrow(y))
y = y';
end
% Set up comparison
if isempty(y)
switch arg.compare
case 'zero'
y = zeros(size(x));
case 'pairwise'
warning('Comparing all columns of X...')
[x,y] = ptpaircols(x);
arg.mat = true;
end
else
switch arg.compare
case 'pairwise'
error('The PAIRWISE option can only be used with one sample.')
end
end
if size(x,2)~=size(y,2)
error('X and Y must have the same number of variables.')
end
% Get data dimensions, ignoring NaNs
nvar = size(x,2);
nobsx = sum(~isnan(x));
nobsy = sum(~isnan(y));
% Compute degrees of freedom
dfx = nobsx-1;
dfy = nobsy-1;
% For efficiency, only omit NaNs if necessary
if any(isnan(x(:))) || any(isnan(y(:)))
nanflag = 'omitmissing';
else
nanflag = 'includemissing';
end
% Compute sample variance using fast algo
smx = sum(x,nanflag);
smy = sum(y,nanflag);
varx = (sum(x.^2,nanflag)-(smx.^2)./nobsx)./dfx;
vary = (sum(y.^2,nanflag)-(smy.^2)./nobsy)./dfy;
if arg.paired
% Check input parameters
switch arg.effect
case 'glass'
error('GLASS can only be used for independent samples.')
end
switch arg.vartype
case 'unequal'
error('Cannot assume unequal variance for dependent samples.')
end
% Compute difference between samples
switch arg.effect
case 'cliff'
diffxy = zeros(max(nobsx)^2,nvar);
for i = 1:nvar
diffi = sign(x(:,i)-y(:,i)');
diffxy(:,i) = diffi(:);
end
diffxy(1:max(nobsx)+1:end,:) = 0;
otherwise
diffxy = x-y;
end
% Use only rows with no NaN values if specified
switch arg.rows
case 'complete'
diffxy = diffxy(~any(isnan(diffxy),2),:);
end
% Get data dimensions, ignoring NaNs
switch arg.effect
case 'cliff'
nobs = nobsx.*dfx;
otherwise
nobs = sum(~isnan(diffxy));
end
% Compute mean difference
switch arg.effect
case 'mediandiff'
mu = median(diffxy,nanflag);
otherwise
mu = sum(diffxy,nanflag)./nobs;
end
% Compute standard deviation
df = nobs-1;
sd = sqrt((varx+vary)/2);
else
% Use only rows with no NaN values if specified
switch arg.rows
case 'complete'
x = x(~any(isnan(x),2),:);
y = y(~any(isnan(y),2),:);
end
% Compute difference between samples
switch arg.effect
case 'cliff'
diffxy = zeros(max(nobsx)*max(nobsy),nvar);
for i = 1:nvar
diffi = sign(x(:,i)-y(:,i)');
diffxy(:,i) = diffi(:);
end
otherwise
diffxy = [x;y];
end
% Get data dimensions, ignoring NaNs
nobs = sum(~isnan(diffxy));
% Compute mean difference
switch arg.effect
case 'cliff'
mu = sum(diffxy,nanflag)./nobs;
case 'mediandiff'
mu = median(x,nanflag)-median(y,nanflag);
otherwise
mu = smx./nobsx-smy./nobsy;
end
% Compute standard deviation
switch arg.effect
case 'glass'
switch arg.vartype
case 'equal'
warning(['GLASS option should only be used for '...
'samples with unequal variances.'])
end
df = dfx;
sd = sqrt(varx);
otherwise
switch arg.vartype
case 'equal'
df = nobs-2;
sd = sqrt((dfx.*varx+dfy.*vary)./df);
case 'unequal'
df = (dfx.*dfy.*(varx+vary).^2)./(dfy.*varx.^2+...
dfx.*vary.^2);
sd = sqrt((varx+vary)/2);
end
end
end
% Compute effect size
switch arg.effect
case {'cliff','meandiff','mediandiff'}
d = mu;
otherwise
d = mu./sd;
end
if nargout > 1
rng(arg.seed);
ci = zeros(2,nvar);
for i = 1:nvar
% Generate random bootstraps
xb = x(:,i);
yb = y(:,i);
idx = ceil(nobsx(i)*rand([nobsx(i),arg.nboot]));
xb = xb(idx);
if ~arg.paired
idx = ceil(nobsy(i)*rand([nobsy(i),arg.nboot]));
end
yb = yb(idx);
% Estimate sampling distribution
smxb = sum(xb,nanflag);
smyb = sum(yb,nanflag);
varxb = (sum(xb.^2,nanflag)-(smxb.^2)/nobsx(i))/dfx(i);
varyb = (sum(yb.^2,nanflag)-(smyb.^2)/nobsy(i))/dfy(i);
if arg.paired
switch arg.effect
case 'cliff'
diffxyb = zeros(max(nobsx)^2,arg.nboot);
for j = 1:arg.nboot
diffi = sign(xb(:,j)-yb(:,j)');
diffxyb(:,j) = diffi(:);
end
diffxyb(1:max(nobsx)+1:end,:) = 0;
otherwise
diffxyb = xb-yb;
end
switch arg.effect
case 'mediandiff'
mub = median(diffxyb,nanflag);
otherwise
mub = sum(diffxyb,nanflag)/nobs(i);
end
sdb = sqrt((varxb+varyb)/2);
else
switch arg.effect
case 'cliff'
diffxyb = zeros(max(nobsx)*max(nobsy),arg.nboot);
for j = 1:arg.nboot
diffi = sign(xb(:,j)-yb(:,j)');
diffxyb(:,j) = diffi(:);
end
end
switch arg.effect
case 'cliff'
mub = sum(diffxyb,nanflag)/nobs(i);
case 'mediandiff'
mub = median(xb,nanflag)-median(yb,nanflag);
otherwise
mub = smxb/nobsx(i)-smyb/nobsy(i);
end
switch arg.effect
case 'glass'
sdb = sqrt(varxb);
case 'cohen'
switch arg.vartype
case 'equal'
sdb = sqrt((dfx(i)*varxb+dfy(i)*varyb)/df(i));
case 'unequal'
sdb = sqrt((varxb+varyb)/2);
end
end
end
switch arg.effect
case {'cliff','meandiff','mediandiff'}
dist = mub;
otherwise
dist = mub./sdb;
end
% Compute CI
ci(:,i) = prctile(dist,100*[arg.alpha/2;1-arg.alpha/2]);
end
end
% Bias-correct standardised measures
if arg.correct
switch arg.effect
case {'cohen','glass'}
factor = exp(gammaln(df/2)-log(sqrt(df/2))-gammaln((df-1)/2));
d = factor.*d;
if nargout > 1
ci = [factor;factor].*ci;
end
end
end
% Arrange results in a matrix if specified
if arg.mat
d = ptvec2mat(d);
if nargout > 1
ci = ptvec2mat(ci);
end
if nargout > 2
df = ptvec2mat(df);
sd = ptvec2mat(sd);
end
end
% Store statistics in a structure
if nargout > 2
stats = struct('df',df,'sd',sd);
end