Fixed NetworkX 3 compatibility and switched to sparse arrays (not mat…

…rices) (#1018) * Update setup.cfg * add conditional types logic for pipeline * fix a reference to sparse matrix * remove all reference to csr_matrix * fix array * remove nx.testing * fix graphs equal * fix some weird reversions to csr_matrix * remove OrderedGraph support * fix tutorial reference to csr_array --------- Co-authored-by: hugwuoke <85888975+hugwuoke@users.noreply.github.com>
graspologic-org · Mar 27, 2023 · 13d0d46 · 13d0d46
1 parent aebdf1e
commit 13d0d46
Show file tree

Hide file tree

Showing 29 changed files with 183 additions and 159 deletions.
diff --git a/docs/tutorials/models/edge_swaps.ipynb b/docs/tutorials/models/edge_swaps.ipynb
@@ -18,14 +18,15 @@
     "from graspologic.plot import heatmap\n",
     "from graspologic.utils import binarize, symmetrize\n",
     "import networkx as nx\n",
-    "from scipy.sparse import csr_matrix"
+    "from scipy.sparse import csr_array"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "`EdgeSwapper` is a class that performs degree preserving edge swaps on networks. The distributions of graphs with a fixed degree sequence are known as configuration models, and these have extensive application for analyzing network datasets. The current implementation works on simple graphs (unewighted, no loops) that are of type `np.ndarray` or `csr_matrix`."
+    "`EdgeSwapper` is a class that performs degree preserving edge swaps on networks. The distributions of graphs with a fixed degree sequence are known as configuration models, and these have extensive application for analyzing network datasets. The current implementation works on simple graphs (unewighted, no loops) that are of type `np.ndarray` or `csr_array`."
    ]
   },
   {
@@ -115,10 +116,11 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "`EdgeSwapper` also works with `csr_matrix` adjacency representations. "
+    "`EdgeSwapper` also works with `csr_array` adjacency representations. "
    ]
   },
   {
@@ -127,7 +129,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "swapper = EdgeSwapper(csr_matrix(adj), seed=8888)\n",
+    "swapper = EdgeSwapper(csr_array(adj), seed=8888)\n",
     "swapped_adj, _ = swapper.swap_edges(n_swaps=1000)\n",
     "g = nx.from_numpy_array(adj)\n",
     "swapped_g = nx.from_numpy_array(swapped_adj)\n",

diff --git a/graspologic/embed/ase.py b/graspologic/embed/ase.py
@@ -40,7 +40,7 @@ class AdjacencySpectralEmbed(BaseSpectralEmbed):
             :func:`sklearn.utils.extmath.randomized_svd`
         - 'full'
             Computes full svd using :func:`scipy.linalg.svd`
-            Does not support ``graph`` input of type scipy.sparse.csr_matrix
+            Does not support ``graph`` input of type scipy.sparse.csr_array
         - 'truncated'
             Computes truncated svd using :func:`scipy.sparse.linalg.svds`
 
@@ -149,7 +149,7 @@ def fit(
 
         Parameters
         ----------
-        graph : array-like, scipy.sparse.csr_matrix, or networkx.Graph
+        graph : array-like, scipy.sparse.csr_array, or networkx.Graph
             Input graph to embed.
 
         y: Ignored

diff --git a/graspologic/embed/lse.py b/graspologic/embed/lse.py
@@ -159,7 +159,7 @@ def fit(
 
         Parameters
         ----------
-        graph : array-like, scipy.sparse.csr_matrix, or networkx.Graph
+        graph : array-like, scipy.sparse.csr_array, or networkx.Graph
             Input graph to embed. see graspologic.utils.import_graph
 
         Returns

diff --git a/graspologic/embed/mase.py b/graspologic/embed/mase.py
@@ -212,9 +212,9 @@ def fit(self, graphs, y=None):  # type: ignore
 
         Parameters
         ----------
-        graphs : list of nx.Graph, ndarray or scipy.sparse.csr_matrix
+        graphs : list of nx.Graph, ndarray or scipy.sparse.csr_array
             If list of nx.Graph, each Graph must contain same number of nodes.
-            If list of ndarray or csr_matrix, each array must have shape (n_vertices, n_vertices).
+            If list of ndarray or csr_array, each array must have shape (n_vertices, n_vertices).
             If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).
 
         Returns
@@ -252,9 +252,9 @@ def fit_transform(self, graphs, y=None):  # type: ignore
 
         Parameters
         ----------
-        graphs : list of nx.Graph, ndarray or scipy.sparse.csr_matrix
+        graphs : list of nx.Graph, ndarray or scipy.sparse.csr_array
             If list of nx.Graph, each Graph must contain same number of nodes.
-            If list of ndarray or csr_matrix, each array must have shape (n_vertices, n_vertices).
+            If list of ndarray or csr_array, each array must have shape (n_vertices, n_vertices).
             If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).
 
         Returns

diff --git a/graspologic/embed/omni.py b/graspologic/embed/omni.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 from beartype import beartype
-from scipy.sparse import csr_matrix, hstack, isspmatrix_csr, vstack
+from scipy.sparse import csr_array, hstack, isspmatrix_csr, vstack
 
 from graspologic.types import List
 
@@ -17,7 +17,7 @@
 
 
 @beartype
-def _get_omnibus_matrix_sparse(matrices: List[csr_matrix]) -> csr_matrix:
+def _get_omnibus_matrix_sparse(matrices: List[csr_array]) -> csr_array:
     """
     Generate the omnibus matrix from a list of sparse adjacency matrices as described by 'A central limit theorem
     for an omnibus embedding of random dot product graphs.'
@@ -52,7 +52,7 @@ def _get_omnibus_matrix_sparse(matrices: List[csr_matrix]) -> csr_matrix:
         # row
         rows.append(hstack(current_row))
 
-    return vstack(rows, format="csr")
+    return csr_array(vstack(rows, format="csr"))
 
 
 def _get_laplacian_matrices(
@@ -97,7 +97,7 @@ def _get_omni_matrix(
     out : 2d-array
         Array of shape (n_vertices * n_graphs, n_vertices * n_graphs)
     """
-    if isspmatrix_csr(graphs[0]):
+    if isinstance(graphs[0], csr_array):
         return _get_omnibus_matrix_sparse(graphs)  # type: ignore
 
     shape = graphs[0].shape
@@ -244,7 +244,7 @@ def fit(self, graphs, y=None):  # type: ignore
 
         Parameters
         ----------
-        graphs : list of nx.Graph or ndarray, or csr_matrix
+        graphs : list of nx.Graph or ndarray, or csr_array
             If list of nx.Graph, each Graph must contain same number of nodes.
             If list of ndarray, each array must have shape (n_vertices, n_vertices).
             If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).

diff --git a/graspologic/embed/svd.py b/graspologic/embed/svd.py
@@ -58,7 +58,7 @@ def _compute_likelihood(arr: np.ndarray) -> np.ndarray:
 
 
 def select_dimension(
-    X: Union[np.ndarray, sp.csr_matrix],
+    X: Union[np.ndarray, sp.csr_array],
     n_components: Optional[int] = None,
     n_elbows: int = 2,
     threshold: Optional[float] = None,
@@ -108,7 +108,7 @@ def select_dimension(
     """
     # Handle input data
     if not isinstance(X, np.ndarray) and not sp.isspmatrix_csr(X):
-        msg = "X must be a numpy array or scipy.sparse.csr_matrix, not {}.".format(
+        msg = "X must be a numpy array or scipy.sparse.csr_array, not {}.".format(
             type(X)
         )
         raise ValueError(msg)
@@ -185,7 +185,7 @@ def select_dimension(
 
 
 def select_svd(
-    X: Union[np.ndarray, sp.csr_matrix],
+    X: Union[np.ndarray, sp.csr_array],
     n_components: Optional[int] = None,
     n_elbows: Optional[int] = 2,
     algorithm: SvdAlgorithmType = "randomized",
@@ -223,7 +223,7 @@ def select_svd(
             :func:`sklearn.utils.extmath.randomized_svd`
         - 'full'
             Computes full svd using :func:`scipy.linalg.svd`
-            Does not support ``graph`` input of type scipy.sparse.csr_matrix
+            Does not support ``graph`` input of type scipy.sparse.csr_array
         - 'truncated'
             Computes truncated svd using :func:`scipy.sparse.linalg.svds`
         - 'eigsh'
@@ -266,7 +266,7 @@ def select_svd(
         raise ValueError(msg)
 
     if algorithm == "full" and sp.isspmatrix_csr(X):
-        msg = "'full' agorithm does not support scipy.sparse.csr_matrix inputs."
+        msg = "'full' agorithm does not support scipy.sparse.csr_array inputs."
         raise TypeError(msg)
 
     if n_components is None:

diff --git a/graspologic/match/solver.py b/graspologic/match/solver.py
@@ -10,7 +10,7 @@
 from beartype import beartype
 from ot import sinkhorn
 from scipy.optimize import linear_sum_assignment
-from scipy.sparse import csr_matrix
+from scipy.sparse import csr_array
 from sklearn.utils import check_scalar
 
 from graspologic.types import List, RngType, Tuple
@@ -492,7 +492,7 @@ def _check_input_matrix(
 ) -> MultilayerAdjacency:
     if isinstance(A, np.ndarray) and (np.ndim(A) == 2):
         A = [A]
-    elif isinstance(A, (csr_matrix, csr_array)):
+    elif isinstance(A, (csr_array, csr_array)):
         A = [A]
     elif isinstance(A, list):
         # iterate over to make sure they're all same shape
@@ -507,7 +507,7 @@ def _check_input_matrix(
                 )
         if isinstance(A[0], np.ndarray):
             A = np.array(A, dtype=float)
-        elif isinstance(A[0], csr_matrix):
+        elif isinstance(A[0], csr_array):
             pass
     if (n_layers is not None) and (len(A) != n_layers):
         msg = (
@@ -649,7 +649,7 @@ def _multilayer_adj_pad(
 
 
 def _adj_pad(matrix: AdjacencyMatrix, n_padded: Int, method: PaddingType) -> np.ndarray:
-    if isinstance(matrix, (csr_matrix, csr_array)) and (method == "adopted"):
+    if isinstance(matrix, (csr_array, csr_array)) and (method == "adopted"):
         msg = (
             "Using adopted padding method with a sparse adjacency representation; this "
             "will convert the matrix to a dense representation and likely remove any "
@@ -661,7 +661,7 @@ def _adj_pad(matrix: AdjacencyMatrix, n_padded: Int, method: PaddingType) -> np.
     if method == "adopted":
         matrix = 2 * matrix - np.ones(matrix.shape)
 
-    if (method == "naive") and isinstance(matrix, (csr_matrix, csr_array)):
+    if (method == "naive") and isinstance(matrix, (csr_array, csr_array)):
         matrix_padded = csr_array((n_padded, n_padded))
     else:
         matrix_padded = np.zeros((n_padded, n_padded))

diff --git a/graspologic/match/types.py b/graspologic/match/types.py
@@ -6,19 +6,19 @@
 import numpy as np
 from packaging import version
 from scipy import __version__ as scipy_version
-from scipy.sparse import csr_matrix
+from scipy.sparse import csr_array
 
 if version.parse(scipy_version) >= version.parse("1.8.0"):
     from scipy.sparse import csr_array
 else:
-    csr_array = csr_matrix
+    csr_array = csr_array
 
 from typing_extensions import Literal
 
 from graspologic.types import List, Tuple
 
 # redefining since I don't want to add csr_array for ALL code in graspologic yet
-AdjacencyMatrix = Union[np.ndarray, csr_matrix, csr_array]
+AdjacencyMatrix = Union[np.ndarray, csr_array, csr_array]
 
 MultilayerAdjacency = Union[List[AdjacencyMatrix], AdjacencyMatrix, np.ndarray]
 

diff --git a/graspologic/match/wrappers.py b/graspologic/match/wrappers.py
@@ -83,25 +83,25 @@ def graph_match(
 
     Parameters
     ----------
-    A : {ndarray, csr_matrix, csr_array} of shape (n, n), or a list thereof
+    A : {ndarray, csr_array, csr_array} of shape (n, n), or a list thereof
         The first (potentially multilayer) adjacency matrix to be matched. Multiplex
         networks (e.g. a network with multiple edge types) can be used by inputting a
         list of the adjacency matrices for each edge type.
 
-    B : {ndarray, csr_matrix, csr_array} of shape (m, m), or a list thereof
+    B : {ndarray, csr_array, csr_array} of shape (m, m), or a list thereof
         The second (potentially multilayer) adjacency matrix to be matched. Must have
         the same number of layers as ``A``, but need not have the same size
         (see ``padding``).
 
-    AB : {ndarray, csr_matrix, csr_array} of shape (n, m), or a list thereof, default=None
+    AB : {ndarray, csr_array, csr_array} of shape (n, m), or a list thereof, default=None
         A (potentially multilayer) matrix representing connections from the objects
         indexed in ``A`` to those in ``B``, used for bisected graph matching (see [2]).
 
-    BA : {ndarray, csr_matrix, csr_array} of shape (m, n), or a list thereof, default=None
+    BA : {ndarray, csr_array, csr_array} of shape (m, n), or a list thereof, default=None
         A (potentially multilayer) matrix representing connections from the objects
         indexed in ``B`` to those in ``A``, used for bisected graph matching (see [2]).
 
-    S : {ndarray, csr_matrix, csr_array} of shape (n, m), default=None
+    S : {ndarray, csr_array, csr_array} of shape (n, m), default=None
         A matrix representing the similarity of objects indexed in ``A`` to each object
         indexed in ``B``. Note that the scale (i.e. the norm) of this matrix will affect
         how strongly the similarity (linear) term is weighted relative to the adjacency

diff --git a/graspologic/models/edge_swaps.py b/graspologic/models/edge_swaps.py
@@ -3,7 +3,7 @@
 import numba as nb
 import numpy as np
 from beartype import beartype
-from scipy.sparse import csr_matrix, lil_matrix
+from scipy.sparse import csr_array, lil_matrix
 from sklearn.utils import check_scalar
 
 from graspologic.preconditions import check_argument
@@ -21,7 +21,7 @@ class EdgeSwapper:
 
     Attributes
     ----------
-    adjacency : np.ndarray OR csr_matrix, shape (n_verts, n_verts)
+    adjacency : np.ndarray OR csr_array, shape (n_verts, n_verts)
         The initial adjacency matrix to perform edge swaps on. Must be unweighted and undirected.
 
     edge_list : np.ndarray, shape (n_verts, 2)
@@ -65,7 +65,7 @@ def __init__(self, adjacency: AdjacencyMatrix, seed: Optional[int] = None):
 
         adjacency = import_graph(adjacency, copy=True)
 
-        if isinstance(adjacency, csr_matrix):
+        if isinstance(adjacency, csr_array):
             # more efficient for manipulations which change sparsity structure
             adjacency = lil_matrix(adjacency)
             self._edge_swap_function = _edge_swap
@@ -128,7 +128,7 @@ def swap_edges(self, n_swaps: int = 1) -> Tuple[AdjacencyMatrix, np.ndarray]:
 
         adjacency = self.adjacency
         if isinstance(adjacency, lil_matrix):
-            adjacency = csr_matrix(adjacency)
+            adjacency = csr_array(adjacency)
         else:
             adjacency = adjacency.copy()
 
@@ -141,11 +141,11 @@ def _edge_swap(
     """
     Performs the edge swap on the adjacency matrix. If adjacency is
     np.ndarray, then nopython=True is used in numba, but if adjacency
-    is csr_matrix, then forceobj=True is used in numba
+    is csr_array, then forceobj=True is used in numba
 
     Parameters
     ----------
-    adjacency : np.ndarray OR csr_matrix, shape (n_verts, n_verts)
+    adjacency : np.ndarray OR csr_array, shape (n_verts, n_verts)
         The initial adjacency matrix in which edge swaps are performed on it
 
     edge_list : np.ndarray, shape (n_verts, 2)
@@ -156,7 +156,7 @@ def _edge_swap(
 
     Returns
     -------
-    adjacency : np.ndarray OR csr_matrix, shape (n_verts, n_verts)
+    adjacency : np.ndarray OR csr_array, shape (n_verts, n_verts)
         The adjancency matrix after an edge swap is performed on the graph
 
     edge_list : np.ndarray (n_verts, 2)

diff --git a/graspologic/partition/leiden.py b/graspologic/partition/leiden.py
@@ -81,7 +81,7 @@ def _adjacency_matrix_to_edge_list(
     shape = matrix.shape
     if len(shape) != 2 or shape[0] != shape[1]:
         raise ValueError(
-            "graphs of type np.ndarray or csr.sparse.csr.csr_matrix should be "
+            "graphs of type np.ndarray or csr.sparse.csr.csr_array should be "
             "adjacency matrices with n x n shape"
         )
 
@@ -205,7 +205,7 @@ def leiden(
     graph : Union[List[Tuple[Any, Any, Union[int, float]]], GraphRepresentation]
         A graph representation, whether a weighted edge list referencing an undirected
         graph, an undirected networkx graph, or an undirected adjacency matrix in either
-        numpy.ndarray or scipy.sparse.csr.csr_matrix form. Please see the Notes section
+        numpy.ndarray or scipy.sparse.csr_array form. Please see the Notes section
         regarding node ids used.
     starting_communities : Optional[Dict[Any, int]]
         Default is ``None``. An optional community mapping dictionary that contains a node
@@ -419,7 +419,7 @@ def hierarchical_leiden(
         List[Tuple[Any, Any, Union[int, float]]],
         nx.Graph,
         np.ndarray,
-        scipy.sparse.csr_matrix,
+        scipy.sparse.csr_array,
     ],
     max_cluster_size: int = 1000,
     starting_communities: Optional[Dict[str, int]] = None,
@@ -466,7 +466,7 @@ def hierarchical_leiden(
     graph : Union[List[Tuple[Any, Any, Union[int, float]]], GraphRepresentation]
         A graph representation, whether a weighted edge list referencing an undirected
         graph, an undirected networkx graph, or an undirected adjacency matrix in either
-        numpy.ndarray or scipy.sparse.csr.csr_matrix form. Please see the Notes section
+        numpy.ndarray or scipy.sparse.csr_array form. Please see the Notes section
         regarding node ids used.
     max_cluster_size : int
         Default is ``1000``. Any partition or cluster with

diff --git a/graspologic/pipeline/embed/_types.py b/graspologic/pipeline/embed/_types.py
@@ -0,0 +1,5 @@
+from typing import Union
+
+import networkx as nx
+
+NxGraphType = Union[nx.Graph, nx.DiGraph]