Re-black for release.

lmcinnes · Jan 11, 2021 · e68a0c1 · e68a0c1
1 parent b6d038d
commit e68a0c1
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 10 deletions.
diff --git a/umap/parametric_umap.py b/umap/parametric_umap.py
@@ -376,7 +376,11 @@ def _fit_embed_data(self, X, n_epochs, init, random_state):
 
     def __getstate__(self):
         # this function supports pickling, making sure that objects can be pickled
-        return dict((k, v) for (k, v) in self.__dict__.items() if should_pickle(k, v) and k != "optimizer")
+        return dict(
+            (k, v)
+            for (k, v) in self.__dict__.items()
+            if should_pickle(k, v) and k != "optimizer"
+        )
 
     def save(self, save_location, verbose=True):
 
@@ -907,11 +911,10 @@ def load_ParametricUMAP(save_location, verbose=True):
         print("Pickle of ParametricUMAP model loaded from {}".format(model_output))
 
     # Work around optimizer not pickling anymore (since tf 2.4)
-    class_name = model._optimizer_dict['name']
+    class_name = model._optimizer_dict["name"]
     OptimizerClass = getattr(tf.keras.optimizers, class_name)
     model.optimizer = OptimizerClass.from_config(model._optimizer_dict)
 
-
     # load encoder
     encoder_output = os.path.join(save_location, "encoder")
     if os.path.exists(encoder_output):

diff --git a/umap/tests/test_umap_ops.py b/umap/tests/test_umap_ops.py
@@ -101,6 +101,7 @@ def test_multi_component_layout_precomputed():
 
     assert_less(error, 15.0, msg="Multi component embedding to far astray")
 
+
 @pytest.mark.parametrize("num_isolates", [1, 5])
 @pytest.mark.parametrize("metric", ["jaccard", "hellinger", "cosine"])
 @pytest.mark.parametrize("force_approximation", [True, False])
@@ -137,7 +138,8 @@ def test_disconnected_data(num_isolates, metric, force_approximation):
         isolated_vertices = disconnected_vertices(model)
         assert isolated_vertices[10] == True
         number_of_nan = np.sum(np.isnan(model.embedding_[isolated_vertices]))
-        assert number_of_nan >= num_isolates*model.n_components
+        assert number_of_nan >= num_isolates * model.n_components
+
 
 @pytest.mark.parametrize("num_isolates", [1])
 @pytest.mark.parametrize("sparse", [True, False])
@@ -166,6 +168,7 @@ def test_disconnected_data_precomputed(num_isolates, sparse):
     number_of_nan = np.sum(np.isnan(model.embedding_[isolated_vertices]))
     assert number_of_nan >= num_isolates * model.n_components
 
+
 # ---------------
 # Umap Transform
 # --------------

diff --git a/umap/umap_.py b/umap/umap_.py
@@ -415,7 +415,7 @@ def compute_membership_strengths(
                 continue  # We didn't get the full knn for i
             # If applied to an adjacency matrix points shouldn't be similar to themselves.
             # If applied to an incidence matrix (or bipartite) then the row and column indices are different.
-            if (bipartite==False) & (knn_indices[i, j] == i):
+            if (bipartite == False) & (knn_indices[i, j] == i):
                 val = 0.0
             elif knn_dists[i, j] - rhos[i] <= 0.0 or sigmas[i] == 0.0:
                 val = 1.0
@@ -1258,6 +1258,7 @@ def init_transform(indices, weights, embedding):
 
     return result
 
+
 def init_graph_transform(graph, embedding):
     """Given a bipartite graph representing the 1-simplices and strengths between the
      new points and the original data set along with an embedding of the original points
@@ -1295,10 +1296,15 @@ def init_graph_transform(graph, embedding):
                 result[row_index, :] = embedding[col_index, :]
                 break
             for d in range(embedding.shape[1]):
-                result[row_index, d] += graph[row_index, col_index] / num_neighbours * embedding[col_index, d]
+                result[row_index, d] += (
+                    graph[row_index, col_index]
+                    / num_neighbours
+                    * embedding[col_index, d]
+                )
 
     return result
 
+
 @numba.njit()
 def init_update(current_init, n_original_samples, indices):
     for i in range(n_original_samples, indices.shape[0]):
@@ -2737,10 +2743,10 @@ def transform(self, X):
         # This was a very specially constructed graph with constant degree.
         # That lets us do fancy unpacking by reshaping the csr matrix indices
         # and data. Doing so relies on the constant degree assumption!
-        #csr_graph = normalize(graph.tocsr(), norm="l1")
-        #inds = csr_graph.indices.reshape(X.shape[0], self._n_neighbors)
-        #weights = csr_graph.data.reshape(X.shape[0], self._n_neighbors)
-        #embedding = init_transform(inds, weights, self.embedding_)
+        # csr_graph = normalize(graph.tocsr(), norm="l1")
+        # inds = csr_graph.indices.reshape(X.shape[0], self._n_neighbors)
+        # weights = csr_graph.data.reshape(X.shape[0], self._n_neighbors)
+        # embedding = init_transform(inds, weights, self.embedding_)
         # This is less fast code than the above numba.jit'd code.
         # It handles the fact that our nearest neighbour graph can now contain variable numbers of vertices.
         csr_graph = graph.tocsr()