/
op_rel_entr_epi_cone.m
219 lines (197 loc) · 7.62 KB
/
op_rel_entr_epi_cone.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
function cvx_optpnt = op_rel_entr_epi_cone(sz,iscplx,m,k,e,apx)
%OP_REL_ENTR_EPI_CONE Operator relative entropy cone
% OP_REL_ENTR_EPI_CONE(sz,iscplx) returns a CVX triple {X,Y,TAU} of
% matrices constrained to satisfy
% X^{1/2} * logm(X^{1/2}*Y^{-1}*X^{1/2}) * X^{1/2} <= TAU
% where:
% * The inequality "<=" is in the positive semidefinite order
% * X,Y,TAU are symmetric (Hermitian if iscplx=1) matrices of size sz
% * logm is the matrix logarithm (in base e)
%
% Usage example:
% variable X(4,4) symmetric
% variable Y(4,4) symmetric
% variable TAU(4,4) symmetric
% {X,Y,TAU} == op_rel_entr_epi_cone(4,0)
%
% Note on parameter sz:
% It is possible to provide for sz an array [sz(1) sz(2) sz(3) ...].
% This will return an array of prod(sz(3:end)) triples, where each
% triple has size sz(1)=sz(2) and is constrained to live in the
% operator relative entropy ocne. Note that sz(1) and sz(2) must be
% equal in this case. See documentation for CVX's "sets/semidefinite.m"
% for more information.
%
% OP_REL_ENTR_EPI_CONE(sz,iscplx,m,k) The additional parameters m
% and k can be used to control the accuracy of the approximation (see
% reference for more details). m is the number of quadrature nodes to use
% and k is the number of square-roots to take. Default (m,k) = (3,3).
%
% OP_REL_ENTR_EPI_CONE(sz,iscplx,m,k,e) If an additional matrix e is
% provided of size nxr (where n=sz(1)), the returned tuple(s) {X,Y,TAU}
% is constrained to satisfy instead:
% e'*(D_{op}(X||Y))*e <= TAU.
% Note that TAU here is of size rxr. The default case corresponds to
% e=eye(n). When r is small compared to n this can be helpful to reduce
% the size of small LMIs from 2nx2n to (n+r)x(n+r).
%
% OP_REL_ENTR_EPI_CONE(sz,iscplx,m,k,e,apx) The last parameter apx
% indicates which approximation of the logarithm to use.
% Possible values:
% - apx = +1: Upper approximation on D_{op} [inner approximation of the
% operator relative entropy cone]
% - apx = -1: Lower approximation on D_{op} [outer approximation of the
% operator relative entropy cone]
% - apx = 0 (Default): Pade approximation (neither upper nor lower) but
% slightly better accuracy than apx = +1 or -1.
%
%AUTHORS
% Hamza Fawzi, James Saunderson and Pablo A. Parrilo
%
%REFERENCE
% This code is based on the paper: "Semidefinite approximations of matrix
% logarithm" by Hamza Fawzi, James Saunderson and Pablo A. Parrilo
if nargin < 1 || nargin > 6
error('Wrong number of arguments');
end
if nargin < 2
iscplx = 0;
end
if nargin == 2
m = 3;
k = 3;
end
if length(sz) == 1
sz = [sz sz];
end
if size(sz,2) == 1
sz = sz';
end
if sz(1) ~= sz(2)
error('The first two elements of sz must be equal');
end
if nargin < 5
e = eye(sz(1));
else
if size(e,1) ~= sz(1)
error('The number of rows of e must be the same as sz(1)');
end
end
r = size(e,2);
if nargin < 6
% By default use Pade approximant
apx = 0;
end
% Note that D_{op}(x,y) is the (operator) perspective of -log, namely
% D_{op}(x,y) = x*(-log)(y/x).
% If f <= log <= F then we get upper and lower bound on D_{op} as follows:
% -x*F(y/x) <= D_{op}(x,y) <= -x*f(y/x)
% This means:
% upper bound on D_{op} <-> lower bound on log
% lower bound on D_{op} <-> upper bound on log
% Compute Gauss-Legendre quadrature nodes and weights on [0,1]
if apx == +1
% Upper bound on D_{op} <-> lower bound on log
% Use Gauss-Radau quadrature with an endpoint at 1.
% Note: m=1 gives the lower bound 1-1/x <= log(x).
[s,w] = gaussradau(m,1);
elseif apx == -1
% Lower bound on D_{op} <-> upper bound on log
% Use Gauss-Radau quadrature with an endpoint at 0. This leads to
% the (m,m-1) Pade approximant of log.
% Note: m=1 gives the upper bound log(x) <= x-1.
[s,w] = gaussradau(m,0);
elseif apx == 0
% Pade approximant: use Gaussian quadrature
[s,w] = glquad(m);
else
error('Unknown value of parameter apx. (Should be either +1, -1, or 0)');
end
% Make sure that w is a column vector
if size(w,1) == 1
w = w';
end
cvx_begin set
if iscplx
variable X(sz) hermitian
variable Y(sz) hermitian
variable Z(sz) hermitian
variable TAU([r r sz(3:end)]) hermitian
variable T([r r sz(3:end) m]) hermitian
else
variable X(sz) symmetric
variable Y(sz) symmetric
variable Z(sz) symmetric
variable TAU([r r sz(3:end)]) symmetric
variable T([r r sz(3:end) m]) hermitian
end
% X #_{1/2^k} Y >= Z
{X,Y,Z} == matrix_geo_mean_hypo_cone(sz,1/(2^k),iscplx,0);
for ii=1:m
sr.type = '()';
sr.subs = repmat({':'},1,length(sz));
sr.subs{length(sz)+1} = ii;
% subsref(T,sr) corresponds to T(:,...,:,ii)
% Also note that we are dividing by w here because it is easier
% to do this than to do sum w_i T(:,...,:,ii) later (cf. line that
% involves TAU)
if s(ii) < 1e-6
% for s=0, x*f_s(z/x) = z - x
e'*(Z-X)*e - subsref(T,sr)/w(ii) == semidefinite([r r sz(3:end)],iscplx);
else
% x f_s(z/x) >= T_i / w_i
[e'*X*e - s(ii)*subsref(T,sr)/w(ii) e'*X;
X*e (1-s(ii))*X+s(ii)*Z] == semidefinite([r+sz(1) r+sz(1) sz(3:end)],iscplx);
end
end
(2^k)*sum(T,length(sz)+1) + TAU == semidefinite([r r sz(3:end)],iscplx);
cvx_end
cvx_optpnt = cvxtuple(struct('X',X,'Y',Y,'TAU',TAU));
end
function [s,w] = glquad(m)
% Compute Gauss-Legendre quadrature nodes and weights on [0,1]
% Code below is from [Trefethen, "Is Gauss quadrature better than
% Clenshaw-Curtis?", SIAM Review 2008] and computes the weights and
% nodes on [-1,1].
beta = .5./sqrt(1-(2*(1:(m-1))).^(-2)); % 3-term recurrence coeffs
T = diag(beta,1) + diag(beta,-1); % Jacobi matrix
[V,D] = eig(T); % eigenvalue decomposition
s = diag(D); % nodes
w = 2*V(1,:).^2; % weights
% Translate and scale to [0,1]
s = (s+1)/2; w = w'/2;
end
function [s,w] = gaussradau(m,endpoint)
% Computes m-point Gauss-Radau quadrature nodes and weights on [0,1]
% endpoint should be either 0 or 1
% We use the modified Golub-Welsch method, explained in the first two
% pages of
% https://www.cs.purdue.edu/homes/wxg/selected_works/section_08/164.pdf
% The nodes are computed as the eigenvalues of the modified Jacobi
% matrix T, where T(m,m) is set to be
% T(m,m) = a - beta(m)*P_{m-1}(a) / P_m(a)
% where P_m are the monic orthogonal polynomial with respect to the
% base measure on [-1,1] (here uniform measure).
% In our case P_m = Legendre polynomials which satisfies
% P_m(1) = 2^m / (2m choose m)
% so that P_m(1) / P_{m-1}(1) = (2m-3)/(m-1).
if m == 1
s = endpoint;
w = 1;
else
beta = .5./sqrt(1-(2*(1:(m-1))).^(-2));
a = sign(endpoint-.5)*(1 - beta(m-1)^2*(2*m-3)/(m-1));
T = diag(beta,1) + diag(beta,-1); % Jacobi matrix
T(m,m) = a;
[V,D] = eig(T); % eigenvalue decomposition
s = diag(D);
w = 2*V(1,:).^2;
% Translate and scale to [0,1]
s = (s+1)/2; w = w'/2;
end
% The following must be true:
% * One of the s_i's is equal to endpoint
% * Furthermore:
% sum_{i=1}^m w_i s_i^j = 1/(j+1) for all j=0,...,2*m-2
% ( i.e., w'*s.^(0:(2*m-2)) == 1./(1+(0:(2*m-2))) )
end