forked from gamaleldin/rand_tensor
/
utils.py
230 lines (187 loc) · 8.37 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
import numpy as np
from itertools import chain
def kron_mvprod(As, b):
x = b
numDraws = b[0, :].size
CTN = b[:, 0].size
for d in range(len(As)):
A = As[d]
Gd = A[:,0].size
X = np.reshape(x, tuple([Gd, CTN*numDraws/Gd]), order = 'F')
Z = np.dot(A, X).T
x = np.reshape(Z, tuple([CTN,numDraws]), order = 'F')
x = np.reshape(x, tuple([CTN*numDraws, 1]), order = 'F')
x = np.reshape(x, tuple([numDraws, CTN]), order = 'F').T
return x
def diagKronSum(Ds):
nmodes = len(Ds)
kronSumDs = np.zeros(1)
for i in range(nmodes):
kronSumDs = np.add(np.outer(kronSumDs, np.ones(len(Ds[i]))),np.outer(np.ones(len(kronSumDs)),Ds[i]))
kronSumDs = np.hstack(kronSumDs.T)
return kronSumDs
def sumTensor(T, sumDim):
sumT = T
for i in range(len(sumDim)):
sumT = np.expand_dims(np.sum(sumT, sumDim[i]), axis = sumDim[i])
return sumT
def first_moment(tensor):
sizes = np.array(tensor.shape)
nmodes = len(sizes)
tensorIxs = range(nmodes)
tensor0 = tensor
for x in tensorIxs:
nxSet = list(set(tensorIxs).difference(set([x])))
mu = sumTensor(tensor0, nxSet)/np.prod(sizes[nxSet])
tensor0 = tensor0 - mu
return tensor-tensor0
def second_moments(tensor):
sizes = np.array(tensor.shape)
nmodes = len(sizes)
tensorIxs = range(nmodes)
sec_moments = []
for x in tensorIxs:
nxSet = list(set(tensorIxs) - set([x]))
z = np.reshape(np.transpose(tensor, list(chain.from_iterable([[x], nxSet]))), tuple([sizes[x], np.prod(sizes[nxSet])]), order = 'F')
sec_moments.append(np.dot(z,z.T))
return sec_moments
def empiricalMean(tensors):
""" Compute the mean tensor from a list of tensors specified as an input
"""
nsamples = len(tensors);
M = tensors[0]/nsamples
for i in range(nsamples-1):
M = M+tensors[i+1]/nsamples
return M
def empiricalMargCovs(tensors):
"""Compute the marginal covariances empirically a list of tensors specified as an input
the code gives 2nd order statistic if n_samples = 1
"""
nsamples = len(tensors)
M = empiricalMean(tensors)
sizes = np.array(tensors[0].shape)
nmodes = len(sizes)
allDim = range(nmodes)
estMargCov = []
for i in allDim:
estMargCov.append(np.zeros([sizes[i], sizes[i]]))
for j in range(nsamples):
tensor = tensors[j]-M
for i in allDim:
niSet = list(set(allDim) - set([i]))
z = np.reshape(np.transpose(tensor, list(chain.from_iterable([[i], niSet]))), tuple([sizes[i], np.prod(sizes[niSet])]), order = 'F')
estMargCov[i] = (estMargCov[i]+(np.dot(z,z.T)/max(1., nsamples-1.)))
for i in allDim:
estMargCov[i] = (estMargCov[i])
return estMargCov
def minimize(X, obj_Fn, length, params):
supressOutPut = True;
INT = 0.1; # don't reevaluate within 0.1 of the limit of the current bracket
EXT = 3.0; # extrapolate maximum 3 times the current step-size
MAX = 20; # max 20 function evaluations per line search
RATIO = 10.0; # maximum allowed slope ratio
SIG = 0.1;
RHO = SIG/2.0; # SIG and RHO are the constants controlling the Wolfe-
# Powell conditions. SIG is the maximum allowed absolute ratio between
# previous and new slopes (derivatives in the search direction), thus setting
# SIG to low (positive) values forces higher precision in the line-searches.
# RHO is the minimum allowed fraction of the expected (from the slope at the
# initial point in the linesearch). Constants must satisfy 0 < RHO < SIG < 1.
# Tuning of SIG (depending on the nature of the function to be optimized) may
# speed up the minimization; it is probably not worth playing much with RHO.
realmin = np.finfo(np.double).tiny
def unwrap(s):
l = s.shape
v = np.reshape(s, np.prod(l), order = 'F')
return v
def rewrap(s, v):
l = s.shape;
v = np.reshape(v, l, order = 'F')
return v
S='Linesearch'
i = 0 # zero the run length counter
ls_failed = False # no previous line search has failed
f0, df0 = obj_Fn(X, params) # get function value and gradient
Z = X;X = unwrap(X); df0 = unwrap(df0);
if not supressOutPut:
print "%s %6i; Value %4.6e\r" %(S, i, f0)
fX = []
fX.append(f0);
s = -df0;
d0 = -np.dot(s.T,s) # initial search direction (steepest) and slope
x3 = 1.0/(1.0-d0)
while i < length: # while not finished
i = i + 1 # count iterations
X0 = X; F0 = f0; dF0 = df0; # make a copy of current values
M = MAX;
while True: # keep extrapolating as long as necessary
x2 = 0; f2 = f0; d2 = d0; f3 = f0; df3 = df0;
success = False;
while (not success) and (M > 0):
try:
M = M - 1.0; i = i + 1; # count epochs
f3, df3 = obj_Fn(rewrap(Z, X+x3*s), params);
df3 = unwrap(df3);
if np.isnan(f3) or np.isinf(f3) or any(np.isnan(df3)+np.isinf(df3)):
print "error(' ')"
success = True;
except: # catch any error which occured in f
x3 = (x2+x3)/2; # bisect and try again
if f3 < F0:
X0 = X+x3*s; F0 = f3; dF0 = df3; # keep best values
d3 = np.dot(df3.T,s); # new slope
if (d3 > (SIG*d0)) or (f3 > (f0+x3*RHO*d0)) or (M == 0): # are we done extrapolating?
break;
x1 = x2; f1 = f2; d1 = d2; # move point 2 to point 1
x2 = x3; f2 = f3; d2 = d3; # move point 3 to point 2
A = 6*(f1-f2)+3*(d2+d1)*(x2-x1); # make cubic extrapolation
B = 3*(f2-f1)-(2*d1+d2)*(x2-x1);
x3 = x1-d1*(x2-x1)**2/(B+np.sqrt(abs(B*B-A*d1*(x2-x1)))); # num. error possible, ok!
if (not np.isreal(x3)) or np.isnan(x3) or np.isinf(x3) or (x3 < 0.): # num prob | wrong sign?
x3 = x2*EXT; # extrapolate maximum amount
elif x3 > x2*EXT: # new point beyond extrapolation limit?
x3 = x2*EXT; # extrapolate maximum amount
elif x3 < (x2+INT*(x2-x1)): # new point too close to previous point?
x3 = x2+INT*(x2-x1);
### end extrapolation ####
while (abs(d3) > -SIG*d0 or (f3 > (f0+x3*RHO*d0))) and (M > 0): # keep interpolating
if d3 > 0 or f3 > f0+x3*RHO*d0: # choose subinterval
x4 = x3; f4 = f3; d4 = d3; # move point 3 to point 4
else:
x2 = x3; f2 = f3; d2 = d3; # move point 3 to point 2
if f4 > f0:
x3 = x2-(0.5*d2*(x4-x2)**2)/(f4-f2-d2*(x4-x2)); # quadratic interpolation
else:
A = 6*(f2-f4)/(x4-x2)+3*(d4+d2); # cubic interpolation
B = 3*(f4-f2)-(2*d2+d4)*(x4-x2);
x3 = x2+(np.sqrt(B*B-A*d2*(x4-x2)**2)-B)/A; # num. error possible, ok!
if np.isnan(x3) or np.isinf(x3):
x3 = (x2+x4)/2; # if we had a numerical problem then bisect
x3 = max(min(x3, x4-INT*(x4-x2)),x2+INT*(x4-x2)); # don't accept too close
f3, df3 = obj_Fn(rewrap(Z, X+x3*s), params);
df3 = unwrap(df3);
if f3 < F0:
X0 = X+x3*s; F0 = f3; dF0 = df3; # keep best values
M = M - 1.0; i = i + 1; # count epochs
d3 = np.dot(df3.T,s); # new slope
#### end interpolation ####
if (abs(d3) < (-SIG*d0)) and (f3 < (f0+x3*RHO*d0)): # if line search succeeded
X = X+x3*s; f0 = f3; fX.append(f0); # update variables
if not supressOutPut:
print "%s %6i; Value %4.6e\r" %(S, i, f0);
s = np.dot((np.dot(df3.T, df3)-np.dot(df0.T,df3))/(np.dot(df0.T,df0)), s) - df3; # Polack-Ribiere CG direction
df0 = df3; # swap derivatives
d3 = d0; d0 = np.dot(df0.T,s);
if d0 > 0: # new slope must be negative
s = -df0; d0 = -np.dot(s.T,s); # otherwise use steepest direction
x3 = x3 * min(RATIO, d3/(d0-realmin)); # slope ratio but max RATIO
ls_failed = False; # this line search did not fail
else:
X = X0; f0 = F0; df0 = dF0; # restore best point so far
if ls_failed or (i > abs(length)): # line search failed twice in a row
break; # or we ran out of time, so we give up
s = -df0; d0 = -np.dot(s.T,s); # try steepest
x3 = 1.0/(1.0-d0);
ls_failed = True; # this line search failed
X = rewrap(Z,X);
return X, fX, i