-
Notifications
You must be signed in to change notification settings - Fork 2
/
permuttest.m
256 lines (232 loc) · 8.96 KB
/
permuttest.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
function [t,p,ci,stats,dist] = permuttest(x,m,varargin)
%PERMUTTEST One-sample and paired-sample permutation-based t-test.
% T = PERMUTTEST(X) performs a one-sample permutation test based on the
% t-statistic of the hypothesis that the data in X come from a
% distribution with mean zero, and returns the test statistic. If X is a
% matrix, separate permutation tests are performed along each column of
% X, and a vector of results is returned. If the 'compare' parameter is
% set to 'pairwise', two-tailed permutation tests between every pair of
% columns in X are performed, and a matrix of results is returned.
%
% PERMUTTEST treats NaNs as missing values, and ignores them.
%
% T = PERMUTTEST(X,M) returns the results of a one-sample permutation
% test of the hypothesis that the data in X come from a distribution with
% mean M. M must be a scalar.
%
% T = PERMUTTEST(X,Y) returns the results of a paired-sample permutation
% test between dependent samples X and Y of the hypothesis that the data
% in X and Y come from distributions with equal means. X and Y must have
% the same length. If X and Y are matrices, separate permutation tests
% are performed between each corresponding pair of columns in X and Y,
% and a vector of results is returned.
%
% [T,P] = PERMUTTEST(...) returns the probability (i.e. p-value) of
% observing the given result by chance if the null hypothesis is true. As
% the null distribution is generated empirically by permuting the data,
% no assumption is made about the shape of the distribution that the data
% come from. P-values are automatically adjusted for multiple comparisons
% using the max correction method.
%
% [T,P,CI] = PERMUTTEST(...) returns a 100*(1-ALPHA)% confidence interval
% (CI) for the true mean of X, or of X-Y for a paired test. CIs are also
% adjusted for multiple comparisons using the max correction method.
%
% [T,P,CI,STATS] = PERMUTTEST(...) returns a structure with the following
% fields:
% 'df' -- the degrees of freedom of each test
% 'sd' -- the estimated population standard deviation of X, or
% of X-Y for a paired test
% 'mu' -- the estimated population mean of X, or of X-Y for a
% paired test
%
% [T,P,CI,STATS,DIST] = PERMUTTEST(...) returns the permuted sampling
% distribution of the test statistic.
%
% [...] = PERMUTTEST(...,'PARAM1',VAL1,'PARAM2',VAL2,...) specifies
% additional parameters and their values. Valid parameters are the
% following:
%
% Parameter Value
% 'alpha' A scalar between 0 and 1 specifying the significance
% level as 100*ALPHA% (default=0.05).
% 'dim' A scalar specifying the dimension to work along: pass
% in 1 to work along the columns (default), or 2 to work
% along the rows. Applies to both X and Y.
% 'tail' A string specifying the alternative hypothesis:
% 'both' mean is not M (two-tailed, default)
% 'right' mean is greater than M (right-tailed)
% 'left' mean is less than M (left-tailed)
% 'compare' A string specifying what to compare each variable to
% when only X is entered:
% 'zero' compare each column of X to zero and
% return a vector of results (default)
% 'pairwise' compare every pair of columns in X to
% each other using two-tailed tests and
% return a matrix of results
% 'nperm' An integer scalar specifying the number of permutations
% (default=10,000).
% 'correct' A numeric scalar (0,1) or logical indicating whether
% to control FWER using max correction (default=true).
% 'rows' A string specifying the rows to use in the case of any
% missing values (NaNs):
% 'all' use all rows, even with NaNs (default)
% 'complete' use only rows with no NaNs
% 'seed' An integer scalar specifying the seed value used to
% initialise the permutation generator. By default, the
% generator is initialised based on the current time,
% resulting in a different permutation on each call.
% 'verbose' A numeric or logical specifying whether to execute in
% verbose mode: pass in 1 for verbose mode (default), or
% 0 for non-verbose mode.
%
% See also TTEST PERMUTTEST2 BOOTEFFECTSIZE.
%
% PERMUTOOLS https://github.com/mickcrosse/PERMUTOOLS
% References:
% [1] Crosse MJ, Foxe JJ, Molholm S (2024) PERMUTOOLS: A MATLAB
% Package for Multivariate Permutation Testing. arXiv 2401.09401.
% [2] Blair RC, Higgins JJ, Karniski W, Kromrey JD (1994) A Study of
% Multivariate Permutation Tests Which May Replace Hotelling's T2
% Test in Prescribed Circumstances. Multivariate Behav Res,
% 29(2):141-163.
% [3] Gondan M (2010) A permutation test for the race model
% inequality. Behav Res Methods, 42(1):23-28.
% [4] Groppe DM, Urbach TP, Kutas M (2011) Mass univariate analysis
% of event-related brain potentials/fields I: A critical tutorial
% review. Psychophysiology, 48(12):1711-1725.
% © 2018-2024 Mick Crosse <crossemj@tcd.ie>
% CNL, Albert Einstein College of Medicine, NY.
% TCBE, Trinity College Dublin, Ireland.
if nargin < 2 || isempty(m)
y = [];
elseif isscalar(m)
y = repmat(m,size(x));
elseif ismatrix(m)
y = m;
end
% Parse input arguments
arg = ptparsevarargin(varargin);
% Validate input parameters
ptvalidateparamin(x,y,arg)
% Orient data column-wise
if arg.dim==2 || isrow(x)
x = x';
end
if ~isempty(y) && (arg.dim==2 || isrow(y))
y = y';
end
% Set up comparison
if isempty(y)
switch arg.compare
case 'zero'
y = zeros(size(x));
case 'pairwise'
warning('Comparing all columns of X using two-tailed test...')
[x,y] = ptpaircols(x);
arg.tail = 'both';
arg.mat = true;
end
else
switch arg.compare
case 'pairwise'
error('The PAIRWISE option can only be used with one sample.')
end
end
if size(x)~=size(y)
error('X and Y must be the same size.')
end
% Compute difference between samples
x = x-y;
% Use only rows with no NaN values if specified
switch arg.rows
case 'complete'
x = x(~any(isnan(x),2),:);
end
% Get data dimensions, ignoring NaNs
[maxnobs,nvar] = size(x);
nobs = sum(~isnan(x));
% Compute degrees of freedom
df = nobs-1;
% For efficiency, only omit NaNs if necessary
if any(isnan(x(:)))
nanflag = 'omitmissing';
else
nanflag = 'includemissing';
end
% Compute standard deviation
sd = std(x,nanflag);
% Compute mean difference
mu = sum(x,nanflag)./nobs;
% Compute test statistic
se = sd./sqrt(nobs);
t = mu./se;
if nargout > 1
% Generate random permutations
rng(arg.seed);
signx = sign(rand(maxnobs,arg.nperm)-0.5);
% Estimate sampling distribution
sqrtn = sqrt(nobs.*df);
dist = zeros(arg.nperm,nvar);
for i = 1:arg.nperm
xp = x.*repmat(signx(:,i),1,nvar);
smx = sum(xp,nanflag);
dist(i,:) = smx./nobs./(sqrt(sum(xp.^2)-(smx.^2)./nobs)./sqrtn);
end
% Apply max correction if specified
if arg.correct
dist = max(abs(dist),[],2);
end
% Add negative values
dist(arg.nperm+1:2*arg.nperm,:) = -dist;
arg.nperm = 2*arg.nperm;
if arg.verbose
fprintf('Adding negative of values to permutation distribution.\n')
fprintf('Number of permutations used: %d\n',arg.nperm)
end
% Compute p-value & CI
switch arg.tail
case 'both'
p = 2*(sum(abs(t)<=dist)+1)/(arg.nperm+1);
if nargout > 2
crit = prctile(dist,100*(1-arg.alpha/2)).*se;
ci = [mu-crit;mu+crit];
end
case 'right'
p = (sum(t<=dist)+1)/(arg.nperm+1);
if nargout > 2
crit = prctile(dist,100*(1-arg.alpha)).*se;
ci = [mu-crit;Inf(1,nvar)];
end
case 'left'
p = (sum(t>=dist)+1)/(arg.nperm+1);
if nargout > 2
crit = prctile(dist,100*(1-arg.alpha)).*se;
ci = [-Inf(1,nvar);mu+crit];
end
end
end
% Arrange results in a matrix if specified
if arg.mat
t = ptvec2mat(t);
if nargout > 1
p = ptvec2mat(p);
end
if nargout > 2
ciLwr = ptvec2mat(ci(1,:));
ciUpr = ptvec2mat(ci(2,:));
ci = cat(3,ciLwr,ciUpr);
ci = permute(ci,[3,1,2]);
end
if nargout > 3
df = ptvec2mat(df);
sd = ptvec2mat(sd);
mu = ptvec2mat(mu);
end
end
% Store statistics in a structure
if nargout > 3
stats.df = df;
stats.sd = sd;
stats.mu = mu;
end