Changes to doc website (#326)

* Change requirements * Attributes as list instead of table * White space removal, consistency of docs across files [skip travis] * Try newer numpydoc for list instead of tables of attributes [skip travis] * Attemp #2 * Unfreeze numpydoc requirement [skip travis] * More whitespace cleanup, make docs more like sklearn, etc [skip travis] * Refreeze numpydoc version
graspologic-org · Mar 2, 2020 · 10598ca · 10598ca
1 parent 801e48d
commit 10598ca
Show file tree

Hide file tree

Showing 17 changed files with 233 additions and 244 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -54,6 +54,8 @@
 # -- numpydoc
 # Below is needed to prevent errors
 numpydoc_show_class_members = False
+numpydoc_attributes_as_param_list = True
+numpydoc_use_blockquotes = True
 
 # -- sphinx.ext.autosummary
 autosummary_generate = True

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,7 +1,7 @@
 sphinx==1.8.5
 sphinx_rtd_theme>=0.4.2
 sphinxcontrib-rawfiles
-numpydoc
-nbsphinx
-IPython
-ipykernel
+nbsphinx==0.4.2
+ipython==7.4.0
+ipykernel==5.1.0
+numpydoc==0.9.2
diff --git a/graspy/cluster/autogmm.py b/graspy/cluster/autogmm.py
@@ -36,10 +36,9 @@ class AutoGMMCluster(BaseCluster):
     cluster numbers are used and the clustering with the best selection
     criterion (bic/aic) is chosen.
 
-
     Parameters
     ----------
-    min_components : int, default=2. 
+    min_components : int, default=2.
         The minimum number of mixture components to consider (unless
         max_components=None, in which case this is the maximum number of
         components to consider). If max_components is not None, min_components
@@ -48,7 +47,7 @@ class AutoGMMCluster(BaseCluster):
         in label_init.
 
     max_components : int or None, default=10.
-        The maximum number of mixture components to consider. Must be greater 
+        The maximum number of mixture components to consider. Must be greater
         than or equal to min_components.
         If label_init is given, min_components must match number of unique labels
         in label_init.
@@ -88,11 +87,11 @@ class AutoGMMCluster(BaseCluster):
 
         If a list/array, it must be a list/array of strings containing only
         'ward', 'complete', 'average', and/or 'single'.
-        
+
     covariance_type : {'full', 'tied', 'diag', 'spherical', 'all' (default)} , optional
         String or list/array describing the type of covariance parameters to use.
         If a string, it must be one of:
-        
+
         - 'full'
             each component has its own general covariance matrix
         - 'tied'
@@ -106,7 +105,7 @@ class AutoGMMCluster(BaseCluster):
 
         If a list/array, it must be a list/array of strings containing only
         'spherical', 'tied', 'diag', and/or 'spherical'.
-    
+
     random_state : int, RandomState instance or None, optional (default=None)
         There is randomness in k-means initialization of 
         :class:`sklearn.mixture.GaussianMixture`. This parameter is passed to 
@@ -123,7 +122,7 @@ class AutoGMMCluster(BaseCluster):
 
     max_iter : int, optional (default = 100).
         The maximum number of EM iterations to perform.
-    
+
     selection_criteria : str {"bic" or "aic"}, optional, (default="bic")
         select the best model based on Bayesian Information Criterion (bic) or 
         Aikake Information Criterion (aic)
@@ -150,23 +149,24 @@ class AutoGMMCluster(BaseCluster):
     results_ : pandas.DataFrame
         Contains exhaustive information about all the clustering runs.
         Columns are:
-            'model' : GaussianMixture object
-                GMM clustering fit to the data
-            'bic/aic' : float
-                Bayesian Information Criterion
-            'ari' : float or nan
-                Adjusted Rand Index between GMM classification, and true classification,
-                nan if y is not given
-            'n_components' : int
-                number of clusters
-            'affinity' : {'euclidean','manhattan','cosine','none'}
-                affinity used in Agglomerative Clustering
-            'linkage' : {'ward','complete','average','single'}
-                linkage used in Agglomerative Clustering
-            'covariance_type' : {'full', 'tied', 'diag', 'spherical'}
-                covariance type used in GMM
-            'reg_covar' : float
-                regularization used in GMM
+
+        'model' : GaussianMixture object
+            GMM clustering fit to the data
+        'bic/aic' : float
+            Bayesian Information Criterion
+        'ari' : float or nan
+            Adjusted Rand Index between GMM classification, and true classification,
+            nan if y is not given
+        'n_components' : int
+            number of clusters
+        'affinity' : {'euclidean','manhattan','cosine','none'}
+            affinity used in Agglomerative Clustering
+        'linkage' : {'ward','complete','average','single'}
+            linkage used in Agglomerative Clustering
+        'covariance_type' : {'full', 'tied', 'diag', 'spherical'}
+            covariance type used in GMM
+        'reg_covar' : float
+            regularization used in GMM
 
     criter_ : the best (lowest) Bayesian Information Criterion
 
@@ -179,10 +179,10 @@ class AutoGMMCluster(BaseCluster):
     affinity_ : str
         affinity used in the model with the best bic/aic
 
-    linkage_ : str 
+    linkage_ : str
         linkage used in the model with the best bic/aic
-    
-    reg_covar_ : float 
+
+    reg_covar_ : float
         regularization used in the model with the best bic/aic
 
     ari_ : float
@@ -469,14 +469,15 @@ def fit(self, X, y=None):
         X : array-like, shape (n_samples, n_features)
             List of n_features-dimensional data points. Each row
             corresponds to a single data point.
-        
+
         y : array-like, shape (n_samples,), optional (default=None)
             List of labels for X if available. Used to compute
             ARI scores.
 
         Returns
         -------
-        self
+        self : object
+            Returns an instance of self.
         """
 
         # Deal with number of clusters

diff --git a/graspy/cluster/base.py b/graspy/cluster/base.py
@@ -34,7 +34,7 @@ def fit(self, X, y=None):
         X : array-like, shape (n_samples, n_features)
             List of n_features-dimensional data points. Each row
             corresponds to a single data point.
-        
+
         y : array-like, shape (n_samples,), optional (default=None)
             List of labels for X if available. Used to compute
             ARI scores.

diff --git a/graspy/cluster/gclust.py b/graspy/cluster/gclust.py
@@ -45,7 +45,7 @@ class GaussianCluster(BaseCluster):
     covariance_type : {'all' (default), 'full', 'tied', 'diag', 'spherical'}, optional
         String or list/array describing the type of covariance parameters to use.
         If a string, it must be one of:
-        
+
         - 'all'
             considers all covariance structures in ['spherical', 'diag', 'tied', 'full']
         - 'full'
@@ -59,26 +59,26 @@ class GaussianCluster(BaseCluster):
         If a list/array, it must be a list/array of strings containing only
             'spherical', 'tied', 'diag', and/or 'spherical'.
 
-        tol : float, defaults to 1e-3.
-            The convergence threshold. EM iterations will stop when the
-            lower bound average gain is below this threshold.
+    tol : float, defaults to 1e-3.
+        The convergence threshold. EM iterations will stop when the
+        lower bound average gain is below this threshold.
 
-        reg_covar : float, defaults to 1e-6.
-            Non-negative regularization added to the diagonal of covariance.
-            Allows to assure that the covariance matrices are all positive.
+    reg_covar : float, defaults to 1e-6.
+        Non-negative regularization added to the diagonal of covariance.
+        Allows to assure that the covariance matrices are all positive.
 
-        max_iter : int, defaults to 100.
-            The number of EM iterations to perform.
+    max_iter : int, defaults to 100.
+        The number of EM iterations to perform.
 
-        n_init : int, defaults to 1.
-            The number of initializations to perform. The best results are kept.
+    n_init : int, defaults to 1.
+        The number of initializations to perform. The best results are kept.
 
-        init_params : {'kmeans', 'random'}, defaults to 'kmeans'.
-            The method used to initialize the weights, the means and the
-            precisions.
-            Must be one of::
-                'kmeans' : responsibilities are initialized using kmeans.
-                'random' : responsibilities are initialized randomly.
+    init_params : {'kmeans', 'random'}, defaults to 'kmeans'.
+        The method used to initialize the weights, the means and the
+        precisions.
+        Must be one of::
+            'kmeans' : responsibilities are initialized using kmeans.
+            'random' : responsibilities are initialized randomly.
 
     random_state : int, RandomState instance or None, optional (default=None)
         If int, ``random_state`` is the seed used by the random number generator;

diff --git a/graspy/cluster/kclust.py b/graspy/cluster/kclust.py
@@ -30,7 +30,7 @@ class KMeansCluster(BaseCluster):
     ----------
     max_clusters : int, defaults to 1.
         The maximum number of mixture components to consider.
-    
+
     random_state : int, RandomState instance or None, optional (default=None)
         If int, ``random_state`` is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
@@ -42,7 +42,7 @@ class KMeansCluster(BaseCluster):
     n_clusters_ : int
         Optimal number of components. If y is given, it is based on largest 
         ARI. Otherwise, it is based on smallest loss.
-    
+
     model_ : KMeans object
         Fitted KMeans object fitted with optimal n_components.
 
@@ -76,7 +76,7 @@ def fit(self, X, y=None):
         X : array-like, shape (n_samples, n_features)
             List of n_features-dimensional data points. Each row
             corresponds to a single data point.
-        
+
         y : array-like, shape (n_samples,), optional (default=None)
             List of labels for `X` if available. Used to compute ARI scores.
 

diff --git a/graspy/embed/ase.py b/graspy/embed/ase.py
@@ -26,11 +26,11 @@
 
 class AdjacencySpectralEmbed(BaseEmbed):
     r"""
-    Class for computing the adjacency spectral embedding of a graph 
-    
+    Class for computing the adjacency spectral embedding of a graph.
+
     The adjacency spectral embedding (ASE) is a k-dimensional Euclidean representation 
     of the graph based on its adjacency matrix. It relies on an SVD to reduce
-    the dimensionality to the specified k, or if k is unspecified, can find a number of 
+    the dimensionality to the specified k, or if k is unspecified, can find a number of
     dimensions automatically (see :class:`~graspy.embed.selectSVD`).
 
     Read more in the :ref:`tutorials <embed_tutorials>`
@@ -51,29 +51,29 @@ class AdjacencySpectralEmbed(BaseEmbed):
         SVD solver to use:
 
         - 'randomized'
-            Computes randomized svd using 
+            Computes randomized svd using
             :func:`sklearn.utils.extmath.randomized_svd`
         - 'full'
             Computes full svd using :func:`scipy.linalg.svd`
         - 'truncated'
             Computes truncated svd using :func:`scipy.sparse.linalg.svds`
 
     n_iter : int, optional (default = 5)
-        Number of iterations for randomized SVD solver. Not used by 'full' or 
-        'truncated'. The default is larger than the default in randomized_svd 
+        Number of iterations for randomized SVD solver. Not used by 'full' or
+        'truncated'. The default is larger than the default in randomized_svd
         to handle sparse matrices that may have large slowly decaying spectrum.
 
     check_lcc : bool , optional (default = True)
-        Whether to check if input graph is connected. May result in non-optimal 
+        Whether to check if input graph is connected. May result in non-optimal
         results if the graph is unconnected. If True and input is unconnected,
-        a UserWarning is thrown. Not checking for connectedness may result in 
+        a UserWarning is thrown. Not checking for connectedness may result in
         faster computation.
 
     diag_aug : bool, optional (default = True)
         Whether to replace the main diagonal of the adjacency matrix with a vector 
         corresponding to the degree (or sum of edge weights for a weighted network) 
         before embedding. Empirically, this produces latent position estimates closer
-        to the ground truth. 
+        to the ground truth.
 
 
     Attributes
@@ -84,7 +84,7 @@ class AdjacencySpectralEmbed(BaseEmbed):
         Only computed when the graph is directed, or adjacency matrix is assymetric.
         Estimated right latent positions of the graph. Otherwise, None.
     singular_values_ : array, shape (n_components)
-        Singular values associated with the latent position matrices. 
+        Singular values associated with the latent position matrices.
 
     See Also
     --------
@@ -93,13 +93,13 @@ class AdjacencySpectralEmbed(BaseEmbed):
 
     Notes
     -----
-    The singular value decomposition: 
+    The singular value decomposition:
 
     .. math:: A = U \Sigma V^T
 
-    is used to find an orthonormal basis for a matrix, which in our case is the 
-    adjacency matrix of the graph. These basis vectors (in the matrices U or V) are 
-    ordered according to the amount of variance they explain in the original matrix. 
+    is used to find an orthonormal basis for a matrix, which in our case is the
+    adjacency matrix of the graph. These basis vectors (in the matrices U or V) are
+    ordered according to the amount of variance they explain in the original matrix.
     By selecting a subset of these basis vectors (through our choice of dimensionality
     reduction) we can find a lower dimensional space in which to represent the graph.
 
@@ -142,7 +142,8 @@ def fit(self, graph, y=None):
 
         Returns
         -------
-        self : returns an instance of self.
+        self : object
+            Returns an instance of self.
         """
         A = import_graph(graph)
 

diff --git a/graspy/embed/base.py b/graspy/embed/base.py
@@ -43,15 +43,15 @@ class BaseEmbed(BaseEstimator):
         - 'truncated'
             Computes truncated svd using ``scipy.sparse.linalg.svd``
         - 'randomized'
-            Computes randomized svd using 
+            Computes randomized svd using
             ``sklearn.utils.extmath.randomized_svd``
     n_iter : int, optional (default = 5)
-        Number of iterations for randomized SVD solver. Not used by 'full' or 
-        'truncated'. The default is larger than the default in randomized_svd 
+        Number of iterations for randomized SVD solver. Not used by 'full' or
+        'truncated'. The default is larger than the default in randomized_svd
         to handle sparse matrices that may have large slowly decaying spectrum.
     check_lcc : bool , optional (defult =True)
-        Whether to check if input graph is connected. May result in non-optimal 
-        results if the graph is unconnected. Not checking for connectedness may 
+        Whether to check if input graph is connected. May result in non-optimal
+        results if the graph is unconnected. Not checking for connectedness may
         result in faster computation.
 
     Attributes
@@ -154,13 +154,12 @@ def fit_transform(self, graph, y=None):
         Parameters
         ----------
         graph: np.ndarray or networkx.Graph
-
-        y : Ignored
+            Input graph to embed.
 
         Returns
         -------
         out : np.ndarray, shape (n_vertices, n_dimension) OR tuple (len 2)
-            where both elements have shape (n_vertices, n_dimension)
+            Where both elements have shape (n_vertices, n_dimension)
             A single np.ndarray represents the latent position of an undirected
             graph, wheras a tuple represents the left and right latent positions 
             for a directed graph
@@ -199,8 +198,6 @@ def _check_input_graphs(self, graphs):
             If list of ndarray, each array must have shape (n_vertices, n_vertices).
             If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).
 
-        y : Ignored
-
         Returns
         -------
         out : ndarray, shape (n_graphs, n_vertices, n_vertices)