Skip to content

Commit

Permalink
Merge pull request #55 from FZJ-IEK3-VSA/kmedoid_contiguity
Browse files Browse the repository at this point in the history
K-medoid contiguity
  • Loading branch information
maximilian-hoffmann committed Sep 20, 2021
2 parents 25c5b39 + 69eb9b2 commit 2afce0e
Show file tree
Hide file tree
Showing 7 changed files with 271 additions and 53 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -1,5 +1,6 @@
# directories
.idea/
.vscode/
__pycache__/
trash/
*.pytest_cache/
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
@@ -1,4 +1,5 @@
sklearn>=0.0
pandas>=0.18.1
numpy>=1.11.0
pyomo>=5.3
pyomo>=5.3
networkx
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -7,7 +7,7 @@

setuptools.setup(
name='tsam',
version='1.1.1',
version='1.1.2',
author='Leander Kotzur, Maximilian Hoffmann',
author_email='l.kotzur@fz-juelich.de, max.hoffmann@fz-juelich.de',
description='Time series aggregation module (tsam) to create typical periods',
Expand Down
4 changes: 2 additions & 2 deletions test/test_k_medoids.py
Expand Up @@ -8,7 +8,7 @@



def test_hierarchical():
def test_k_medoids():

raw = pd.read_csv(os.path.join(os.path.dirname(__file__),'..','examples','testdata.csv'), index_col = 0)

Expand All @@ -35,4 +35,4 @@ def test_hierarchical():


if __name__ == "__main__":
test_hierarchical()
test_k_medoids()
77 changes: 77 additions & 0 deletions test/test_k_medoids_contiguity.py
@@ -0,0 +1,77 @@
import os
import time

import pandas as pd
import numpy as np

from tsam.utils.k_medoids_exact import KMedoids
from tsam.utils.k_medoids_contiguity import k_medoids_contiguity, _contiguity_to_graph

# similarity between node 0, 1 and 2
# 0===1
# \ /
# 2
DISTANCES = np.array([ [0,1,4],
[1,0,5],
[4,5,0],])

# add the adjacency between node 0, 1 and 2
# 0 1
# \ /
# 2 -
adjacency = np.array([ [1,0,1],
[0,1,1],
[1,1,1],])


import networkx as nx

def test_node_cuts():
"""Tests the cutting of the nodes in the adjacency network
"""
G = _contiguity_to_graph(adjacency)
cuts = list(nx.all_node_cuts(G))
# only one cut expected
assert len(cuts)==1



def test_k_medoids_simple():
'''
Checks that k-medoid brings the results as expected without the adjacency constraints
'''

n_clusters = 2
cluster_instance = KMedoids(n_clusters=n_clusters)
r_y, r_x, r_obj = cluster_instance._k_medoids_exact(DISTANCES, n_clusters)

labels_raw = r_x.argmax(axis=0)

# check that node 0 and node 1 are in the same cluster
assert labels_raw[0] == labels_raw[1]
# check that node 1 and node 2 are in a different cluster
assert labels_raw[1] != labels_raw[2]
# check that the error/objective is the distance between node 0 and 1
assert r_obj == 1



def test_k_medoids_simple_contiguity():
'''
Checks if the adjacency constraint holds true
'''

n_clusters = 2
r_y, r_x, r_obj = k_medoids_contiguity(DISTANCES, n_clusters, adjacency)
labels_raw = r_x.argmax(axis=0)
# check that node 0 and node 2 are in the same cluster
assert labels_raw[0] == labels_raw[2]
# check that node 0 and node 1 are in a different cluster
assert labels_raw[0] != labels_raw[1]
# check that the error/objective is the distance between node 0 and 2
assert r_obj == 4


if __name__ == "__main__":
test_k_medoids_simple()
test_k_medoids_simple_contiguity()
118 changes: 118 additions & 0 deletions tsam/utils/k_medoids_contiguity.py
@@ -0,0 +1,118 @@
# -*- coding: utf-8 -*-

import numpy as np
import time
import pyomo.environ as pyomo
import pyomo.opt as opt
import networkx as nx
from tsam.utils.k_medoids_exact import _setup_k_medoids, KMedoids, _solve_given_pyomo_model


#class KMedoids_contiguity(KMedoids):


def k_medoids_contiguity(distances, n_clusters, adjacency, max_iter=500, solver='glpk'):
'''Declares a k-medoids model and iteratively adds cutting planes to hold on adjacency/contiguity
The algorithm is based on: Oehrlein and Hauner (2017): A cutting-plane method for adjacency-constrained spatial aggregation
'''
# First transform the network to a networkx instance which is required for cut generation
G = _contiguity_to_graph(adjacency, distances=distances)

# check if inputs are correct
np.size(distances) == np.size(adjacency)

# and test for connectivity
if not nx.is_connected(G):
raise ValueError("The give adjacency matrix is not connected.")

# Initial setup of k medoids
M = _setup_k_medoids(distances, n_clusters)

M.adjacency = adjacency

# Add constraintlist for the cuts later added
M.cuts = pyomo.ConstraintList()


# Loop over the relaxed k-medoids problem and add cuts until the problem fits
_all_cluster_connected=False
_iter = 0
_cuts_added=[]
while not _all_cluster_connected and _iter < max_iter:
# first solve instance
t_presolve = time.time()
print(str(_iter) + " iteration: Solving instance")
r_x, r_y, obj = _solve_given_pyomo_model(M, solver=solver)
t_aftersolve = time.time()
print("Total distance: " + str(obj) + " with solving time: " + str(t_aftersolve - t_presolve))

candidates ,labels = np.where(r_x == 1)
# claim that the resulting clusters are connected
_all_cluster_connected=True
_new_cuts_added=[]
for label in np.unique(labels):
# extract the cluster
cluster = G.subgraph(np.where(labels == label)[0])
# Identify if the cluster is contineous, instead of validating the constraints such as Validi and Oehrlein.
if not nx.is_connected(cluster):
_all_cluster_connected=False
# if not add contiguity constraints based on c-v (Oehrlein) o a-b (Validi) separators
for candidate in cluster.nodes:
# It is not clear in Validi and Oehrlein, if cuts between all cluster candidates or just the center and the candidates shall be made. The latter one does not converge for the test system wherefore the first one is chosen.
for node in cluster.nodes:
# different to Validi et al. (2021) and Oehrlein and Haunert (2017), check first and just add continuity constraints for the not connected candidates to increase performance
if nx.node_connectivity(cluster, node, candidate) == 0:
# check that the cut was not added so far for the cluster
if (label, candidate, node) not in _cuts_added:
# include the cut in the cut list
_new_cuts_added.append((label,candidate,node))
# Cuts to Separators - Appendix A Minimum-weight vertex separators (Oehrlein and Haunert, 2017)
# Validi uses an own cut generator and Oehrlein falls back to a Java library, here we use simple max flow cutting
# TODO: Check performance for large networks
cut_set = nx.minimum_node_cut(G,node,candidate)
# (Eq. 13 - Oehrlein and Haunert, 2017)
M.cuts.add( sum( M.z[u, node] for u in cut_set) >= M.z[candidate, node] )
else:
raise ValueError("Minimal cluster,candidate separation/minimum cut does not seem sufficient. Adding additional separators is could help.")
# Total cuts
_cuts_added.extend(_new_cuts_added)
_iter += 1
t_afteradding = time.time()

print(str(len(_new_cuts_added)) + " contiguity constraints/cuts added, adding to a total number of " + str(len(_cuts_added)) + " cuts within time: " + str(t_afteradding - t_aftersolve))

labels = np.where(r_x == 1)

return (r_y, r_x.T, obj)




def _contiguity_to_graph(adjacency, distances=None):
"""Transforms a adjacency matrix to a networkx.Graph
Args:
adjacency (np.ndarray): 2-diimensional adjacency matrix
distances (np.ndarray, optional): If provided, delivers the distances between the nodes. Defaults to None.
Returns:
nx.Graph: Graph with every index as node name.
"""
rows, cols = np.where(adjacency == 1)
G = nx.Graph()
if distances is None:
edges = zip(rows.tolist(), cols.tolist())
G.add_edges_from(edges)
else:
normed_distances=distances/np.max(distances)
weights=1-normed_distances
if np.any(weights<0) or np.any(weights>1):
raise ValueError("Weight calculation went wrong.")

edge_weights=weights[rows,cols]
edges = zip(rows.tolist(), cols.tolist(), edge_weights.tolist())
G.add_weighted_edges_from(edges)
return G


119 changes: 70 additions & 49 deletions tsam/utils/k_medoids_exact.py
Expand Up @@ -129,69 +129,90 @@ def _k_medoids_exact(self, distances, n_clusters):
n_clusters : int, required
Number of clusters.
"""
# Create model
M=pyomo.ConcreteModel()

# Create pyomo model
M = _setup_k_medoids(distances, n_clusters)

# get distance matrix
M.d = distances
# And solve
r_x, r_y, r_obj = _solve_given_pyomo_model(M, solver=self.solver)

# set number of clusters
M.no_k = n_clusters
return (r_y, r_x.T, r_obj)

# Distances is a symmetrical matrix, extract its length
length = distances.shape[0]
def _setup_k_medoids(distances, n_clusters):
'''Define the k-medoids model with pyomo.
In the spatial aggregation community, it is referred to as Hess Model for political districting
with an additional constraint of cluster-sizes/populations.
(W Hess, JB Weaver, HJ Siegfeldt, JN Whelan, and PA Zitlau. Nonpartisan political redistricting by computer. Operations Research, 13(6):998–1006, 1965.)
'''
# Create model
M=pyomo.ConcreteModel()

# get indices
M.i = [j for j in range(length)]
M.j = [j for j in range(length)]
# get distance matrix
M.d = distances

# initialize vars
M.z = pyomo.Var(M.i, M.j, within=pyomo.Binary)
M.y = pyomo.Var(M.i, within=pyomo.Binary)
# set number of clusters
M.no_k = n_clusters

# Distances is a symmetrical matrix, extract its length
length = distances.shape[0]

# get objective
def objRule(M):
return sum(sum(M.d[i,j] * M.z[i,j] for j in M.j) for i in M.i)
M.obj=pyomo.Objective(rule=objRule)
# get indices
M.i = [j for j in range(length)]
M.j = [j for j in range(length)]

# s.t.
# Assign all candidates to clusters
def candToClusterRule(M,j):
return sum(M.z[i,j] for i in M.i) == 1
M.candToClusterCon = pyomo.Constraint(M.j, rule=candToClusterRule)
# initialize vars
# Decision every candidate to every possible other candidate as cluster center
M.z = pyomo.Var(M.i, M.j, within=pyomo.Binary)

# no of clusters
def noClustersRule(M):
return sum(M.y[i] for i in M.i) == M.no_k
M.noClustersCon = pyomo.Constraint(rule=noClustersRule)

# cluster relation
def clusterRelationRule(M,i,j):
return M.z[i,j] <= M.y[i]
M.clusterRelationCon = pyomo.Constraint(M.i,M.j, rule=clusterRelationRule)
# get objective
# Minimize the distance of every candidate to the cluster center
def objRule(M):
return sum(sum(M.d[i,j] * M.z[i,j] for j in M.j) for i in M.i)
M.obj=pyomo.Objective(rule=objRule)

# s.t.
# Assign all candidates to one clusters
def candToClusterRule(M,j):
return sum(M.z[i,j] for i in M.i) == 1
M.candToClusterCon = pyomo.Constraint(M.j, rule=candToClusterRule)

# create optimization problem
optprob = opt.SolverFactory(self.solver)
if self.solver =='gurobi':
optprob.set_options("Threads=" + str(self.threads) +
" TimeLimit=" + str(self.timelimit))
# Predefine the number of clusters
def noClustersRule(M):
return sum(M.z[i,i] for i in M.i) == M.no_k
M.noClustersCon = pyomo.Constraint(rule=noClustersRule)

results = optprob.solve(M,tee=False)
# check that it does not fail
if self.solver=='gurobi' and results['Solver'][0]['Termination condition'].index == 11:
print(results['Solver'][0]['Termination message'])
return False
elif self.solver=='gurobi' and not results['Solver'][0]['Termination condition'].index in [2,7,8,9,10]: # optimal
raise ValueError(results['Solver'][0]['Termination message'])
# Describe the choice of a candidate to a cluster
def clusterRelationRule(M,i,j):
return M.z[i,j] <= M.z[i,i]
M.clusterRelationCon = pyomo.Constraint(M.i,M.j, rule=clusterRelationRule)
return M

# Get results
r_x = np.array([[round(M.z[i,j].value) for i in range(length)]
for j in range(length)])
def _solve_given_pyomo_model(M, solver="glpk"):
"""Solves a given pyomo model clustering model an returns the clusters
r_y = np.array([round(M.y[j].value) for j in range(length)])
Args:
M (pyomo.ConcreteModel): Concrete model instance that gets solved.
solver (str, optional): solver, defines the solver for the pyomo model. Defaults to "glpk".
r_obj = pyomo.value(M.obj)
Raises:
ValueError: [description]
Returns:
[type]: [description]
"""
# create optimization problem
optprob = opt.SolverFactory(solver)
results = optprob.solve(M,tee=False)
# check that it does not fail

# Get results
r_x = np.array([[round(M.z[i,j].value) for i in M.i]
for j in M.j])

r_y = np.array([round(M.z[j,j].value) for j in M.j])

r_obj = pyomo.value(M.obj)

return (r_x, r_y, r_obj)

return (r_y, r_x.T, r_obj)

0 comments on commit 2afce0e

Please sign in to comment.