/
PED_permute_classifications.m
95 lines (89 loc) · 4.35 KB
/
PED_permute_classifications.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
function permuted_classifications = ...
PED_permute_classifications(Y, n_permutations)
% PED_permute_classifications
% Last edited March 12, 2014
% Samuel Clamons
%
% Computes permutations of a classification vector of 0s and 1s.
%
% This function uses different algorithms for calculating permutations
% for different numbers of permutations. For small classification
% vectors, uses an algorithm based on generating all possible
% combinations of samples from each condition to avoid
% double-selection. If the classification vector is large, instead
% randomly picks permutations.
%
% Parameters:
% Y - Original vector of classifications for an
% experiment. Must consist entirely of 0s and 1s.
% n_permutations - The number of permutations to create.
%
% Returns: A matrix wherein each column is a unique permutation of the
% classification vector.
n_samples = length(Y);
for i = 1:n_samples
if Y(i) ~= 0 && Y(i) ~= 1
error(['in permute_classifications: Y(%d)=%d, should be',...
' 0 or 1'], i, Y(i));
end
end
group_1 = find(Y);
group_2 = setdiff(1:n_samples, group_1);
n_swapped = min(...
floor(length(group_1)/2), ...
floor(length(group_2)/2)...
);
% For relatively small numbers of samples, list out every possible
% combination of samples from each condition, then choose two to swap
% to create each permutation.
if n_samples < 12
% List all of the permutations (technically combinations) of
% samples from each group. Each permutation is a swappable set of
% samples. Choosing two such sets and swapping their vales creates
% a permuted experiment.
all_permutations_1 = nchoosek(group_1, floor(length(group_1)/2));
all_permutations_2 = nchoosek(group_2, floor(length(group_2)/2));
% Encode each pair of sets of samples to swap as a base-D index,
% where D is the number of samples swapped, the least significant
% digit of the index is the index of the set in the first group to
% swap, and the most significant digit of the index is the index of
% the set in the second group to swap.
%
% Pick 'n_permutations' distinct permutations. For each
% permutation, swap the corresponding indices of Y.
base_1 = length(all_permutations_1);
base_2 = length(all_permutations_2);
n_permutations = min(base_1 * base_2, n_permutations);
permutations = randperm(base_1 * base_2, n_permutations);
permuted_classifications = zeros(n_samples, n_permutations);
for i = 1:n_permutations
% Indices run from 0 to base^2-1, but arrays run from 1
% to base^2. Thus the egregious adding of 1.
coded_indices = permutations(i);
index_1 = mod(coded_indices, base_1) + 1;
coded_indices = floor((coded_indices - index_1 + 1) / base_2);
index_2 = mod(coded_indices, base_2) + 1;
swap_indices_1 = all_permutations_1(index_1,:);
swap_indices_2 = all_permutations_2(index_2,:);
new_classifications = Y;
new_classifications(swap_indices_1) = Y(swap_indices_2);
new_classifications(swap_indices_2) = Y(swap_indices_1);
permuted_classifications(:, i) = new_classifications';
end
else
% For larger sample sizes, can't reasonably enumerate every
% possibility. Instead, pick n_permutations at random -- at this point,
% it is very unlikely that any identical permutations will be picked
% unless you are making a LOT of permuted classifications (on the order
% of (n_swapped!)).
permuted_classifications = zeros(n_permutations);
for i = 1:n_permutations
swap_indices_1 = group_1(randperm(length(group_1), n_swapped));
swap_indices_2 = group_2(randperm(length(group_2), n_swapped));
new_classifications = Y;
new_classifications(swap_indices_1) = Y(swap_indices_2);
new_classifications(swap_indices_2) = Y(swap_indices_1);
permuted_classifications(:, i) = new_classifications';
end
end
end