-
Notifications
You must be signed in to change notification settings - Fork 2
/
ltmlp_transform.m
114 lines (95 loc) · 3.61 KB
/
ltmlp_transform.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
function net = ltmlp_transform(net, input)
% transform nonlinear functions such that both the average output and
% average derivative of the output are zeroes
nlayers = numel(net.layers);
X = cell(nlayers,1);
Y = cell(nlayers,1);
bias = net.bias;
W = net.W;
nonlintypes = net.layertypes;
nonlintrans = net.nonlintrans;
dlen = size(input,2);
opt = net.options;
Y{1} = input;
if opt.verbose == 4
% contains variances of f(b_i x_t) and f'(b_i x_t) for each layer
nonlinoutput_vars = ones(nlayers,2)*Inf;
end
for l = 2:nlayers
X{l} = repmat(bias{l}, [1 dlen]);
for ll = 1:l-1
if ~isempty(W{l,ll})
X{l} = X{l} + W{l,ll} * Y{ll};
end
end
if any([strcmp(nonlintypes{l-1},'tanh') strcmp(nonlintypes{l-1},'softsign') strcmp(nonlintypes{l-1},'rect')])
oldtrans = nonlintrans{l};
nonlintrans{l}(:,1) = -mean(nonlin(X{l}, nonlintypes{l-1}, [zeros(size(nonlintrans{l},1),2) ones(size(nonlintrans{l},1),1)], 1), 2);
if opt.num_transf > 1
nonlintrans{l}(:,2) = -mean(nonlin(X{l}, nonlintypes{l-1}, [zeros(size(nonlintrans{l},1),2) ones(size(nonlintrans{l},1),1)], 0) + bsxfun(@times,nonlintrans{l}(:,1),X{l}), 2);
end
% Compensate alpha and beta by updating the shortcut weights
for lll = l+1:nlayers
if ~isempty(W{lll,l})
bias{lll} = bias{lll} + W{lll,l}*(bsxfun(@times,(oldtrans(:,1)-nonlintrans{l}(:,1)).*nonlintrans{l}(:,3),bias{l}) + (oldtrans(:,2)-nonlintrans{l}(:,2)).*nonlintrans{l}(:,3));
for ll = 1:l-1
if ~isempty(W{lll,ll})
W{lll,ll} = W{lll,ll} + bsxfun(@times,W{lll,l},((oldtrans(:,1)-nonlintrans{l}(:,1)).*nonlintrans{l}(:,3))') * W{l,ll};
end
end
end
end
end
Y{l} = nonlin(X{l}, nonlintypes{l-1}, nonlintrans{l});
end
if opt.num_transf > 2
for l = 2:nlayers
X{l} = repmat(bias{l}, [1 dlen]);
for ll = 1:l-1
if ~isempty(W{l,ll})
X{l} = X{l} + W{l,ll} * Y{ll};
end
end
if any([strcmp(nonlintypes{l-1},'tanh') strcmp(nonlintypes{l-1},'softsign') strcmp(nonlintypes{l-1},'rect')])
oldtrans = nonlintrans{l}(:,3);
if net.options.gammatype == 1
nonlintrans{l}(:,3) = bsxfun(@power, ...
bsxfun(@times, ...
mean(nonlin(X{l}, nonlintypes{l-1}, [nonlintrans{l}(:,1:2) ones(size(nonlintrans{l},1),1)],0).^2,2), ...
mean((nonlin(X{l}, nonlintypes{l-1}, [nonlintrans{l}(:,1:2) ones(size(nonlintrans{l},1),1)], 1)).^2,2)) ...
+ 1e-4, -0.25);
elseif net.options.gammatype == 2
nonlintrans{l}(:,3) = bsxfun(@power, ...
mean(nonlin(X{l}, nonlintypes{l-1}, [nonlintrans{l}(:,1:2) ones(size(nonlintrans{l},1),1)],0).^2,2) ...
+ 1e-2, -0.5);
else
warning(warn, 'Bad gammatype parameter (opt.gammatype) value')
end
% Compensate for gamma:
for lll = l+1:nlayers
if ~isempty(W{lll,l})
W{lll,l} = bsxfun(@times, W{lll,l}, repmat(bsxfun(@rdivide, oldtrans, nonlintrans{l}(:,3))', [size(W{lll,l},1) 1]));
end
end
end
Y{l} = nonlin(X{l}, nonlintypes{l-1}, nonlintrans{l});
if opt.verbose == 4
df = nonlin(X{l}, nonlintypes{l-1}, nonlintrans{l},1);
nonlinoutput_vars(l,:) = [var(Y{l}(:)) var(df(:))];
end
end
end
if opt.verbose == 4
fprintf('\nVariances of f() and df() on layer ')
for l = 2:nlayers
if any([strcmp(nonlintypes{l-1},'tanh') strcmp(nonlintypes{l-1},'softsign') strcmp(nonlintypes{l-1},'rect')])
fprintf('%d: %.2f (%.2f), ', l, nonlinoutput_vars(l,1), nonlinoutput_vars(l,2));
end
end
fprintf('\n')
end
net.X = X;
net.Y = Y;
net.bias = bias;
net.W = W;
net.nonlintrans = nonlintrans;