-
Notifications
You must be signed in to change notification settings - Fork 0
/
submod_alg.py
342 lines (303 loc) · 12.1 KB
/
submod_alg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
import numpy as np
# NOTE: may not be fast - need to improve by using edge indices after
# finding the subproblem
#---------------------- General Utilities ------------------------#
# Clustering energy
def energy(V,E,w,labels):
"""
Calculates the weight of hyperedges cut by a clustering
Args:
V (list of int): list of vertices
E (list of list of int): list of hyperedges (subsets of V)
w (list of float): corresponding weights of edges in E
labels (dict of int: int): clustering labels for each v in V
Returns:
energy (float): sum of weight of edges cut by clustering labels
"""
energy = 0.
for i in range(len(E)):
allSame = True
label0 = labels[E[i][0]]
for v in E[i][1:]:
if labels[v] != label0:
allSame = False
break
if allSame == False:
energy += w[i]
return energy
# Generalized submodular function that captures a-b swaps and a-exps
def submodEval(x,E0,E1,w0,w1):
"""
Evaluates the general submodular function
Args:
x (dict of int: float): probability each v in V is set to 1
E0 (list of list of int): list of hyperedges that are not cut iff all v in e are 0 or 1
E1 (list of list of int): list of hyperedges that are not cut iff all v in e are 1
w0 (list of float): corresponding weights of edges in E0
w1 (list of float): corresponding weights of edges in E1
Returns:
sumCut (float): sum of weight of edges cut by x in expectation
"""
sumCut = 0.
for i in range(len(E0)):
probAllZero = 1.0
probAllOne = 1.0
for v in E0[i]:
probAllZero = probAllZero*(1-x[v])
probAllOne = probAllOne*(x[v])
sumCut += w0[i]*(1-probAllZero-probAllOne)
for i in range(len(E1)):
probAllOne = 1.0
for v in E1[i]:
probAllOne = probAllOne*(x[v])
sumCut += w1[i]*(1-probAllOne)
return sumCut
#--------------------- Alpha Exp --------------------------------#
# Create the general submodular problem for an alpha-expansion
def AExpSubproblem(VOrig,EOrig,wOrig,labels,alpha):
"""
Creates the submodular problem corresponding to an alpha-expansion
Args:
VOrig (list of int): list of vertices
EOrig (list of list of int): list of hyperedges (subsets of VOrig)
wOrig (list of float): corresponding weights of edges in EOrig
labels (dict of int: int): clustering labels for each v in VOrig
alpha (int): cluster chosen for an alpha-expansion
Returns:
E0 (list of list of int): list of hyperedges that are not cut by labels and not between alphas
w0 (list of float): corresponding weights of edges in E0
E1 (list of list of int): list of hyperedges that are cut by labels
w1 (list of float): corresponding weights of edges in E1
"""
E0, w0, E1, w1 = [], [], [], []
for i in range(len(EOrig)):
e = EOrig[i]
label0 = labels[e[0]]
allSame = True
for v in e[1:]:
if labels[v] != label0:
allSame = False
break
if allSame and label0 != alpha:
E0.append(e)
w0.append(wOrig[i])
elif allSame == False:
E1.append(e)
w1.append(wOrig[i])
return E0, w0, E1, w1
# Update labels given submodular problem output for alpha-expansion
def AExpUpdate(labels,x,alpha):
"""
Update labels given submodular problem output for alpha-expansion
Args:
labels (dict of int: int): clustering labels for each v in V
x (dict of int: float): probability each v in V is set to 1 in the submod prob
alpha (int): cluster chosen for an alpha-expansion
Returns:
newLabels (dict of int: int): updated clustering labels for each v in V
"""
newLabels = dict()
for v in labels.keys():
if x[v] == 1:
newLabels[v] = alpha
else:
newLabels[v] = labels[v]
return newLabels
#--------------------- AB Swaps --------------------------------#
# Create the general submodular problem for an alpha-beta swap
def ABSubproblem(VOrig,EOrig,wOrig,labels,alpha,beta):
"""
Creates the submodular problem corresponding to an alpha-beta swap
Args:
VOrig (list of int): list of vertices
EOrig (list of list of int): list of hyperedges (subsets of VOrig)
wOrig (list of float): corresponding weights of edges in EOrig
labels (dict of int: int): clustering labels for each v in VOrig
alpha (int): first cluster chosen for the alpha-beta swap
beta (int): second cluster chosen for the alpha-beta swap
Returns:
V (list of int): list of vertices labeled alpha or beta
E0 (list of list of int): list of hyperedges that only involve V
w0 (list of float): corresponding weights of edges in E
"""
V = []
for v in VOrig:
if (labels[v] == alpha) or (labels[v] == beta):
V.append(v)
E0, w0 = [], []
for i in range(len(EOrig)):
e = EOrig[i]
keepE = True
for v in e:
if (labels[v] != alpha) and (labels[v] != beta):
keepE = False
break
if keepE:
E0.append(e)
w0.append(wOrig[i])
return V, E0, w0
# Update labels given submodular problem output for alpha-beta swap
def ABUpdate(labels,x,alpha,beta):
"""
Update labels given submodular problem output for alpha-beta swap
Args:
labels (dict of int: int): clustering labels for each v in V
x (dict of int: float): probability each v in V is set to 1 in the submod prob
alpha (int): first cluster chosen for an alpha-expansion
beta (int): second cluster chosen for an alpha-beta swap
Returns:
newLabels (dict of int: int): updated clustering labels for each v in V
"""
newLabels = dict()
for v in labels.keys():
if labels[v] != alpha and labels[v] != beta:
newLabels[v] = labels[v]
elif x[v] == 1:
newLabels[v] = alpha
else:
newLabels[v] = beta
return newLabels
#--------------------- Submod Alg --------------------------------#
# Find the best alpha-beta swap or alpha-expansion by solving the corresponding submodular
# problem using the Buchbinder et al algorithm
def submodAlg(VOrig,EOrig,wOrig,labels,moveType,alpha,beta=None):
"""
Find the best alpha-beta swap or alpha-expansion by solving the corresponding submodular
problem using the Buchbinder et al algorithm
Args:
VOrig (list of int): list of vertices
EOrig (list of list of int): list of hyperedges (subsets of VOrig)
wOrig (list of float): corresponding weights of edges in EOrig
labels (dict of int: int): clustering labels for each v in VOrig
alpha (int): first cluster chosen for the alpha-beta swap
beta (int): second cluster chosen for the alpha-beta swap
moveType (str): 'ab' indicates alpha-beta swap, 'aexp' indicates alpha-expansion
Returns:
newLabels (dict of int: int): updated clustering labels for each v in VOrig
"""
if moveType == 'aexp':
V = VOrig
E0,w0,E1,w1 = AExpSubproblem(VOrig,EOrig,wOrig,labels,alpha)
else:
V,E0,w0 = ABSubproblem(VOrig,EOrig,wOrig,labels,alpha,beta)
E1, w1 = [], []
x, y = dict(), dict()
for v in V:
if moveType == 'aexp' and labels[v] == alpha:
x[v] = 1
else:
x[v] = 0
y[v] = 1
# Get probabilities
for v in V:
# If v is already set, skip this iteration
if x[v] == 1:
continue
# Calc current values
cutX = submodEval(x,E0,E1,w0,w1)
cutY = submodEval(y,E0,E1,w0,w1)
# Calc difference of changing x or y's value for v
x[v], y[v] = 1, 0
newCutX = submodEval(x,E0,E1,w0,w1)
newCutY = submodEval(y,E0,E1,w0,w1)
tv = newCutX - cutX
fv = newCutY - cutY
# Update value for v
if tv <= 0:
pv = 0
elif fv <= 0:
pv = 1
else:
pv = tv/(tv+fv)
x[v], y[v] = pv, pv
# Derandomize x
for v in V:
if x[v] == 1:
continue
x[v] = 1
cut1 = submodEval(x,E0,E1,w0,w1)
x[v] = 0
cut0 = submodEval(x,E0,E1,w0,w1)
if cut1 >= cut0:
x[v] = 1
# Get new labels
if moveType == 'aexp':
newLabels = AExpUpdate(labels,x,alpha)
else:
newLabels = ABUpdate(labels,x,alpha,beta)
return newLabels
#--------------------- Local Search --------------------------------#
# Run local search using alpha-beta swaps of alpha-expansions
def localSearchSubmod(V,E,w,k,origLabels,moveType = 'ab',moveSequence=None,maxIt=1000):
"""
Approximately solve the Max-K-Hypercut problem represented by V,E,w using a submodular maximization
Args:
V (list of int): list of vertices
E (list of list of int): list of hyperedges (subsets of V)
w (list of float): corresponding weights of edges in E
k (int): number of clusters
origLabels (dict of int: int): clustering labels for each v in V
moveType (str): 'ab' indicates alpha-beta swap, 'aexp' indicates alpha-expansion
maxIt (int): maximum number of iterations run
Returns:
labels (dict of int: int): updated clustering labels for each v in V
"""
assert moveType in ("ab", "aexp", "aebs", "aexp-ran", "aexp-2b")
if moveSequence == None:
if moveType == 'ab':
moveSequence = [[i,j] for i in range(k) for j in range(i+1,k)]
else:
moveSequence = range(k)
labels = origLabels.copy()
it, maxEne = 0, energy(V,E,w,labels)
updated = True
while updated and it <= maxIt:
updated = False
for move in moveSequence:
if moveType == 'ab':
alpha,beta = move[0], move[1]
#print("Swapping (%d, %d), Current Class.: %s" % (alpha, beta, labels))
newLabels = submodAlg(V,E,w,labels,moveType,alpha,beta)
elif moveType == 'aexp':
alpha = move
#print("Expanding (%d), Current Class.: %s" % (alpha, labels))
newLabels = submodAlg(V,E,w,labels,moveType,alpha)
elif moveType == 'aebs':
alpha,beta = move[0],move[1]
labelsTemp = labels.copy()
for v in V:
if labelsTemp[v] == alpha:
labelsTemp[v] = beta
newLabels = submodAlg(V,E,w,labelsTemp,'aexp',alpha)
elif moveType == 'aexp-ran':
alpha = move
labelsTemp = labels.copy()
# Relabel those with label alpha randomly before alpha exp
for v in V:
while (labelsTemp[v] == alpha):
labelsTemp[v] = np.random.randint(0,k)
newLabels = submodAlg(V,E,w,labelsTemp,'aexp',alpha)
elif moveType == 'aexp-2b':
alpha = move
labelsTemp = labels.copy()
# Relabel those with label alpha to other best option before alpha-exp
for v in V:
if labelsTemp[v] == alpha:
best_label, best_ene = alpha, 0
for i in range(k):
if i == alpha:
continue
labelsTemp[v] = i
ene = energy(V,E,w,labelsTemp)
if ene < best_ene:
labelsTemp[v] = best_label
else:
best_label, best_ene = i, ene
newLabels = submodAlg(V,E,w,labelsTemp,'aexp',alpha)
ene = energy(V,E,w,newLabels)
if ene > maxEne:
maxEne = ene
labels = newLabels
updated = True
it += 1
return labels, it