Skip to content

Commit

Permalink
Fixed NetworkX 3 compatibility and switched to sparse arrays (not mat…
Browse files Browse the repository at this point in the history
…rices) (#1018)

* Update setup.cfg

* add conditional types logic for pipeline

* fix a reference to sparse matrix

* remove all reference to csr_matrix

* fix array

* remove nx.testing

* fix graphs equal

* fix some weird reversions to csr_matrix

* remove OrderedGraph support

* fix tutorial reference to csr_array

---------

Co-authored-by: hugwuoke <85888975+hugwuoke@users.noreply.github.com>
  • Loading branch information
bdpedigo and hugwuoke committed Mar 27, 2023
1 parent aebdf1e commit 13d0d46
Show file tree
Hide file tree
Showing 29 changed files with 183 additions and 159 deletions.
10 changes: 6 additions & 4 deletions docs/tutorials/models/edge_swaps.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@
"from graspologic.plot import heatmap\n",
"from graspologic.utils import binarize, symmetrize\n",
"import networkx as nx\n",
"from scipy.sparse import csr_matrix"
"from scipy.sparse import csr_array"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"`EdgeSwapper` is a class that performs degree preserving edge swaps on networks. The distributions of graphs with a fixed degree sequence are known as configuration models, and these have extensive application for analyzing network datasets. The current implementation works on simple graphs (unewighted, no loops) that are of type `np.ndarray` or `csr_matrix`."
"`EdgeSwapper` is a class that performs degree preserving edge swaps on networks. The distributions of graphs with a fixed degree sequence are known as configuration models, and these have extensive application for analyzing network datasets. The current implementation works on simple graphs (unewighted, no loops) that are of type `np.ndarray` or `csr_array`."
]
},
{
Expand Down Expand Up @@ -115,10 +116,11 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"`EdgeSwapper` also works with `csr_matrix` adjacency representations. "
"`EdgeSwapper` also works with `csr_array` adjacency representations. "
]
},
{
Expand All @@ -127,7 +129,7 @@
"metadata": {},
"outputs": [],
"source": [
"swapper = EdgeSwapper(csr_matrix(adj), seed=8888)\n",
"swapper = EdgeSwapper(csr_array(adj), seed=8888)\n",
"swapped_adj, _ = swapper.swap_edges(n_swaps=1000)\n",
"g = nx.from_numpy_array(adj)\n",
"swapped_g = nx.from_numpy_array(swapped_adj)\n",
Expand Down
4 changes: 2 additions & 2 deletions graspologic/embed/ase.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class AdjacencySpectralEmbed(BaseSpectralEmbed):
:func:`sklearn.utils.extmath.randomized_svd`
- 'full'
Computes full svd using :func:`scipy.linalg.svd`
Does not support ``graph`` input of type scipy.sparse.csr_matrix
Does not support ``graph`` input of type scipy.sparse.csr_array
- 'truncated'
Computes truncated svd using :func:`scipy.sparse.linalg.svds`
Expand Down Expand Up @@ -149,7 +149,7 @@ def fit(
Parameters
----------
graph : array-like, scipy.sparse.csr_matrix, or networkx.Graph
graph : array-like, scipy.sparse.csr_array, or networkx.Graph
Input graph to embed.
y: Ignored
Expand Down
2 changes: 1 addition & 1 deletion graspologic/embed/lse.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def fit(
Parameters
----------
graph : array-like, scipy.sparse.csr_matrix, or networkx.Graph
graph : array-like, scipy.sparse.csr_array, or networkx.Graph
Input graph to embed. see graspologic.utils.import_graph
Returns
Expand Down
8 changes: 4 additions & 4 deletions graspologic/embed/mase.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,9 @@ def fit(self, graphs, y=None): # type: ignore
Parameters
----------
graphs : list of nx.Graph, ndarray or scipy.sparse.csr_matrix
graphs : list of nx.Graph, ndarray or scipy.sparse.csr_array
If list of nx.Graph, each Graph must contain same number of nodes.
If list of ndarray or csr_matrix, each array must have shape (n_vertices, n_vertices).
If list of ndarray or csr_array, each array must have shape (n_vertices, n_vertices).
If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).
Returns
Expand Down Expand Up @@ -252,9 +252,9 @@ def fit_transform(self, graphs, y=None): # type: ignore
Parameters
----------
graphs : list of nx.Graph, ndarray or scipy.sparse.csr_matrix
graphs : list of nx.Graph, ndarray or scipy.sparse.csr_array
If list of nx.Graph, each Graph must contain same number of nodes.
If list of ndarray or csr_matrix, each array must have shape (n_vertices, n_vertices).
If list of ndarray or csr_array, each array must have shape (n_vertices, n_vertices).
If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).
Returns
Expand Down
10 changes: 5 additions & 5 deletions graspologic/embed/omni.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import numpy as np
from beartype import beartype
from scipy.sparse import csr_matrix, hstack, isspmatrix_csr, vstack
from scipy.sparse import csr_array, hstack, isspmatrix_csr, vstack

from graspologic.types import List

Expand All @@ -17,7 +17,7 @@


@beartype
def _get_omnibus_matrix_sparse(matrices: List[csr_matrix]) -> csr_matrix:
def _get_omnibus_matrix_sparse(matrices: List[csr_array]) -> csr_array:
"""
Generate the omnibus matrix from a list of sparse adjacency matrices as described by 'A central limit theorem
for an omnibus embedding of random dot product graphs.'
Expand Down Expand Up @@ -52,7 +52,7 @@ def _get_omnibus_matrix_sparse(matrices: List[csr_matrix]) -> csr_matrix:
# row
rows.append(hstack(current_row))

return vstack(rows, format="csr")
return csr_array(vstack(rows, format="csr"))


def _get_laplacian_matrices(
Expand Down Expand Up @@ -97,7 +97,7 @@ def _get_omni_matrix(
out : 2d-array
Array of shape (n_vertices * n_graphs, n_vertices * n_graphs)
"""
if isspmatrix_csr(graphs[0]):
if isinstance(graphs[0], csr_array):
return _get_omnibus_matrix_sparse(graphs) # type: ignore

shape = graphs[0].shape
Expand Down Expand Up @@ -244,7 +244,7 @@ def fit(self, graphs, y=None): # type: ignore
Parameters
----------
graphs : list of nx.Graph or ndarray, or csr_matrix
graphs : list of nx.Graph or ndarray, or csr_array
If list of nx.Graph, each Graph must contain same number of nodes.
If list of ndarray, each array must have shape (n_vertices, n_vertices).
If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).
Expand Down
10 changes: 5 additions & 5 deletions graspologic/embed/svd.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def _compute_likelihood(arr: np.ndarray) -> np.ndarray:


def select_dimension(
X: Union[np.ndarray, sp.csr_matrix],
X: Union[np.ndarray, sp.csr_array],
n_components: Optional[int] = None,
n_elbows: int = 2,
threshold: Optional[float] = None,
Expand Down Expand Up @@ -108,7 +108,7 @@ def select_dimension(
"""
# Handle input data
if not isinstance(X, np.ndarray) and not sp.isspmatrix_csr(X):
msg = "X must be a numpy array or scipy.sparse.csr_matrix, not {}.".format(
msg = "X must be a numpy array or scipy.sparse.csr_array, not {}.".format(
type(X)
)
raise ValueError(msg)
Expand Down Expand Up @@ -185,7 +185,7 @@ def select_dimension(


def select_svd(
X: Union[np.ndarray, sp.csr_matrix],
X: Union[np.ndarray, sp.csr_array],
n_components: Optional[int] = None,
n_elbows: Optional[int] = 2,
algorithm: SvdAlgorithmType = "randomized",
Expand Down Expand Up @@ -223,7 +223,7 @@ def select_svd(
:func:`sklearn.utils.extmath.randomized_svd`
- 'full'
Computes full svd using :func:`scipy.linalg.svd`
Does not support ``graph`` input of type scipy.sparse.csr_matrix
Does not support ``graph`` input of type scipy.sparse.csr_array
- 'truncated'
Computes truncated svd using :func:`scipy.sparse.linalg.svds`
- 'eigsh'
Expand Down Expand Up @@ -266,7 +266,7 @@ def select_svd(
raise ValueError(msg)

if algorithm == "full" and sp.isspmatrix_csr(X):
msg = "'full' agorithm does not support scipy.sparse.csr_matrix inputs."
msg = "'full' agorithm does not support scipy.sparse.csr_array inputs."
raise TypeError(msg)

if n_components is None:
Expand Down
10 changes: 5 additions & 5 deletions graspologic/match/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from beartype import beartype
from ot import sinkhorn
from scipy.optimize import linear_sum_assignment
from scipy.sparse import csr_matrix
from scipy.sparse import csr_array
from sklearn.utils import check_scalar

from graspologic.types import List, RngType, Tuple
Expand Down Expand Up @@ -492,7 +492,7 @@ def _check_input_matrix(
) -> MultilayerAdjacency:
if isinstance(A, np.ndarray) and (np.ndim(A) == 2):
A = [A]
elif isinstance(A, (csr_matrix, csr_array)):
elif isinstance(A, (csr_array, csr_array)):
A = [A]
elif isinstance(A, list):
# iterate over to make sure they're all same shape
Expand All @@ -507,7 +507,7 @@ def _check_input_matrix(
)
if isinstance(A[0], np.ndarray):
A = np.array(A, dtype=float)
elif isinstance(A[0], csr_matrix):
elif isinstance(A[0], csr_array):
pass
if (n_layers is not None) and (len(A) != n_layers):
msg = (
Expand Down Expand Up @@ -649,7 +649,7 @@ def _multilayer_adj_pad(


def _adj_pad(matrix: AdjacencyMatrix, n_padded: Int, method: PaddingType) -> np.ndarray:
if isinstance(matrix, (csr_matrix, csr_array)) and (method == "adopted"):
if isinstance(matrix, (csr_array, csr_array)) and (method == "adopted"):
msg = (
"Using adopted padding method with a sparse adjacency representation; this "
"will convert the matrix to a dense representation and likely remove any "
Expand All @@ -661,7 +661,7 @@ def _adj_pad(matrix: AdjacencyMatrix, n_padded: Int, method: PaddingType) -> np.
if method == "adopted":
matrix = 2 * matrix - np.ones(matrix.shape)

if (method == "naive") and isinstance(matrix, (csr_matrix, csr_array)):
if (method == "naive") and isinstance(matrix, (csr_array, csr_array)):
matrix_padded = csr_array((n_padded, n_padded))
else:
matrix_padded = np.zeros((n_padded, n_padded))
Expand Down
6 changes: 3 additions & 3 deletions graspologic/match/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@
import numpy as np
from packaging import version
from scipy import __version__ as scipy_version
from scipy.sparse import csr_matrix
from scipy.sparse import csr_array

if version.parse(scipy_version) >= version.parse("1.8.0"):
from scipy.sparse import csr_array
else:
csr_array = csr_matrix
csr_array = csr_array

from typing_extensions import Literal

from graspologic.types import List, Tuple

# redefining since I don't want to add csr_array for ALL code in graspologic yet
AdjacencyMatrix = Union[np.ndarray, csr_matrix, csr_array]
AdjacencyMatrix = Union[np.ndarray, csr_array, csr_array]

MultilayerAdjacency = Union[List[AdjacencyMatrix], AdjacencyMatrix, np.ndarray]

Expand Down
10 changes: 5 additions & 5 deletions graspologic/match/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,25 +83,25 @@ def graph_match(
Parameters
----------
A : {ndarray, csr_matrix, csr_array} of shape (n, n), or a list thereof
A : {ndarray, csr_array, csr_array} of shape (n, n), or a list thereof
The first (potentially multilayer) adjacency matrix to be matched. Multiplex
networks (e.g. a network with multiple edge types) can be used by inputting a
list of the adjacency matrices for each edge type.
B : {ndarray, csr_matrix, csr_array} of shape (m, m), or a list thereof
B : {ndarray, csr_array, csr_array} of shape (m, m), or a list thereof
The second (potentially multilayer) adjacency matrix to be matched. Must have
the same number of layers as ``A``, but need not have the same size
(see ``padding``).
AB : {ndarray, csr_matrix, csr_array} of shape (n, m), or a list thereof, default=None
AB : {ndarray, csr_array, csr_array} of shape (n, m), or a list thereof, default=None
A (potentially multilayer) matrix representing connections from the objects
indexed in ``A`` to those in ``B``, used for bisected graph matching (see [2]).
BA : {ndarray, csr_matrix, csr_array} of shape (m, n), or a list thereof, default=None
BA : {ndarray, csr_array, csr_array} of shape (m, n), or a list thereof, default=None
A (potentially multilayer) matrix representing connections from the objects
indexed in ``B`` to those in ``A``, used for bisected graph matching (see [2]).
S : {ndarray, csr_matrix, csr_array} of shape (n, m), default=None
S : {ndarray, csr_array, csr_array} of shape (n, m), default=None
A matrix representing the similarity of objects indexed in ``A`` to each object
indexed in ``B``. Note that the scale (i.e. the norm) of this matrix will affect
how strongly the similarity (linear) term is weighted relative to the adjacency
Expand Down
14 changes: 7 additions & 7 deletions graspologic/models/edge_swaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numba as nb
import numpy as np
from beartype import beartype
from scipy.sparse import csr_matrix, lil_matrix
from scipy.sparse import csr_array, lil_matrix
from sklearn.utils import check_scalar

from graspologic.preconditions import check_argument
Expand All @@ -21,7 +21,7 @@ class EdgeSwapper:
Attributes
----------
adjacency : np.ndarray OR csr_matrix, shape (n_verts, n_verts)
adjacency : np.ndarray OR csr_array, shape (n_verts, n_verts)
The initial adjacency matrix to perform edge swaps on. Must be unweighted and undirected.
edge_list : np.ndarray, shape (n_verts, 2)
Expand Down Expand Up @@ -65,7 +65,7 @@ def __init__(self, adjacency: AdjacencyMatrix, seed: Optional[int] = None):

adjacency = import_graph(adjacency, copy=True)

if isinstance(adjacency, csr_matrix):
if isinstance(adjacency, csr_array):
# more efficient for manipulations which change sparsity structure
adjacency = lil_matrix(adjacency)
self._edge_swap_function = _edge_swap
Expand Down Expand Up @@ -128,7 +128,7 @@ def swap_edges(self, n_swaps: int = 1) -> Tuple[AdjacencyMatrix, np.ndarray]:

adjacency = self.adjacency
if isinstance(adjacency, lil_matrix):
adjacency = csr_matrix(adjacency)
adjacency = csr_array(adjacency)
else:
adjacency = adjacency.copy()

Expand All @@ -141,11 +141,11 @@ def _edge_swap(
"""
Performs the edge swap on the adjacency matrix. If adjacency is
np.ndarray, then nopython=True is used in numba, but if adjacency
is csr_matrix, then forceobj=True is used in numba
is csr_array, then forceobj=True is used in numba
Parameters
----------
adjacency : np.ndarray OR csr_matrix, shape (n_verts, n_verts)
adjacency : np.ndarray OR csr_array, shape (n_verts, n_verts)
The initial adjacency matrix in which edge swaps are performed on it
edge_list : np.ndarray, shape (n_verts, 2)
Expand All @@ -156,7 +156,7 @@ def _edge_swap(
Returns
-------
adjacency : np.ndarray OR csr_matrix, shape (n_verts, n_verts)
adjacency : np.ndarray OR csr_array, shape (n_verts, n_verts)
The adjancency matrix after an edge swap is performed on the graph
edge_list : np.ndarray (n_verts, 2)
Expand Down
8 changes: 4 additions & 4 deletions graspologic/partition/leiden.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def _adjacency_matrix_to_edge_list(
shape = matrix.shape
if len(shape) != 2 or shape[0] != shape[1]:
raise ValueError(
"graphs of type np.ndarray or csr.sparse.csr.csr_matrix should be "
"graphs of type np.ndarray or csr.sparse.csr.csr_array should be "
"adjacency matrices with n x n shape"
)

Expand Down Expand Up @@ -205,7 +205,7 @@ def leiden(
graph : Union[List[Tuple[Any, Any, Union[int, float]]], GraphRepresentation]
A graph representation, whether a weighted edge list referencing an undirected
graph, an undirected networkx graph, or an undirected adjacency matrix in either
numpy.ndarray or scipy.sparse.csr.csr_matrix form. Please see the Notes section
numpy.ndarray or scipy.sparse.csr_array form. Please see the Notes section
regarding node ids used.
starting_communities : Optional[Dict[Any, int]]
Default is ``None``. An optional community mapping dictionary that contains a node
Expand Down Expand Up @@ -419,7 +419,7 @@ def hierarchical_leiden(
List[Tuple[Any, Any, Union[int, float]]],
nx.Graph,
np.ndarray,
scipy.sparse.csr_matrix,
scipy.sparse.csr_array,
],
max_cluster_size: int = 1000,
starting_communities: Optional[Dict[str, int]] = None,
Expand Down Expand Up @@ -466,7 +466,7 @@ def hierarchical_leiden(
graph : Union[List[Tuple[Any, Any, Union[int, float]]], GraphRepresentation]
A graph representation, whether a weighted edge list referencing an undirected
graph, an undirected networkx graph, or an undirected adjacency matrix in either
numpy.ndarray or scipy.sparse.csr.csr_matrix form. Please see the Notes section
numpy.ndarray or scipy.sparse.csr_array form. Please see the Notes section
regarding node ids used.
max_cluster_size : int
Default is ``1000``. Any partition or cluster with
Expand Down
5 changes: 5 additions & 0 deletions graspologic/pipeline/embed/_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from typing import Union

import networkx as nx

NxGraphType = Union[nx.Graph, nx.DiGraph]

0 comments on commit 13d0d46

Please sign in to comment.