Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

This commit is intended to make the SVD code faster. In the initial e… #1064

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
52 changes: 32 additions & 20 deletions graspologic/embed/mase.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
# Licensed under the MIT License.

from typing import Optional

import numpy as np

from ..utils import is_almost_symmetric
from .base import BaseEmbedMulti
from .svd import SvdAlgorithmType, select_dimension, select_svd
from .svd import select_dimension, select_svd

from joblib import delayed, Parallel


class MultipleASE(BaseEmbedMulti):
Expand Down Expand Up @@ -59,6 +60,13 @@ class MultipleASE(BaseEmbedMulti):
'truncated'. The default is larger than the default in randomized_svd
to handle sparse matrices that may have large slowly decaying spectrum.

n_jobs: int, default: None
The maximum number of concurrently running jobs, such as the number of
Python worker processes when backend=”multiprocessing” or the size of
the thread-pool when backend=”threading”. If -1 all CPUs are used. If
1 is given, no parallel computing code is used at all, which is
useful for debugging.

scaled : bool, optional (default=True)
Whether to scale individual eigenvectors with eigenvalues in first embedding
stage.
Expand Down Expand Up @@ -111,14 +119,16 @@ class MultipleASE(BaseEmbedMulti):

def __init__(
self,
n_components: Optional[int] = None,
n_elbows: Optional[int] = 2,
algorithm: SvdAlgorithmType = "randomized",
n_iter: int = 5,
scaled: bool = True,
diag_aug: bool = True,
concat: bool = False,
n_components=None,
n_elbows=2,
algorithm="randomized",
n_iter=5,
scaled=True,
diag_aug=True,
concat=False,
n_jobs=-1,
svd_seed: Optional[int] = None,

):
if not isinstance(scaled, bool):
msg = "scaled must be a boolean, not {}".format(scaled)
Expand All @@ -134,25 +144,27 @@ def __init__(
svd_seed=svd_seed,
)
self.scaled = scaled
self.n_jobs = n_jobs

def _reduce_dim(self, graphs): # type: ignore
def _reduce_dim(self, graphs):
if self.n_components is None:
# first embed into log2(n_vertices) for each graph
n_components = int(np.ceil(np.log2(np.min(self.n_vertices_))))
else:
n_components = self.n_components

# embed individual graphs
embeddings = [
select_svd(

embeddings = Parallel(n_jobs=self.n_jobs)(
delayed(selectSVD)(
graph,
n_components=n_components,
algorithm=self.algorithm,
n_iter=self.n_iter,
svd_seed=self.svd_seed,
)
for graph in graphs
]
)
Us, Ds, Vs = zip(*embeddings)

# Choose the best embedding dimension for each graphs
Expand Down Expand Up @@ -206,15 +218,15 @@ def _reduce_dim(self, graphs): # type: ignore
)
return Uhat, Vhat, sing_vals_left, sing_vals_right

def fit(self, graphs, y=None): # type: ignore
def fit(self, graphs, y=None):
"""
Fit the model with graphs.

Parameters
----------
graphs : list of nx.Graph, ndarray or scipy.sparse.csr_array
graphs : list of nx.Graph, ndarray or scipy.sparse.csr_matrix
If list of nx.Graph, each Graph must contain same number of nodes.
If list of ndarray or csr_array, each array must have shape (n_vertices, n_vertices).
If list of ndarray or csr_matrix, each array must have shape (n_vertices, n_vertices).
If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).

Returns
Expand Down Expand Up @@ -245,16 +257,16 @@ def fit(self, graphs, y=None): # type: ignore

return self

def fit_transform(self, graphs, y=None): # type: ignore
def fit_transform(self, graphs, y=None):
"""
Fit the model with graphs and apply the embedding on graphs.
n_components is either automatically determined or based on user input.

Parameters
----------
graphs : list of nx.Graph, ndarray or scipy.sparse.csr_array
graphs : list of nx.Graph, ndarray or scipy.sparse.csr_matrix
If list of nx.Graph, each Graph must contain same number of nodes.
If list of ndarray or csr_array, each array must have shape (n_vertices, n_vertices).
If list of ndarray or csr_matrix, each array must have shape (n_vertices, n_vertices).
If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).

Returns
Expand All @@ -265,4 +277,4 @@ def fit_transform(self, graphs, y=None): # type: ignore
The first corresponds to the left latent positions, and the second to the right latent positions.
When ``concat`` is True left and right (out and in) latent positions are concatenated along axis 1.
"""
return self._fit_transform(graphs)
return self._fit_transform(graphs)