Skip to content

Commit

Permalink
Re-black for release.
Browse files Browse the repository at this point in the history
  • Loading branch information
lmcinnes committed Jan 11, 2021
1 parent b6d038d commit e68a0c1
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 10 deletions.
9 changes: 6 additions & 3 deletions umap/parametric_umap.py
Expand Up @@ -376,7 +376,11 @@ def _fit_embed_data(self, X, n_epochs, init, random_state):

def __getstate__(self):
# this function supports pickling, making sure that objects can be pickled
return dict((k, v) for (k, v) in self.__dict__.items() if should_pickle(k, v) and k != "optimizer")
return dict(
(k, v)
for (k, v) in self.__dict__.items()
if should_pickle(k, v) and k != "optimizer"
)

def save(self, save_location, verbose=True):

Expand Down Expand Up @@ -907,11 +911,10 @@ def load_ParametricUMAP(save_location, verbose=True):
print("Pickle of ParametricUMAP model loaded from {}".format(model_output))

# Work around optimizer not pickling anymore (since tf 2.4)
class_name = model._optimizer_dict['name']
class_name = model._optimizer_dict["name"]
OptimizerClass = getattr(tf.keras.optimizers, class_name)
model.optimizer = OptimizerClass.from_config(model._optimizer_dict)


# load encoder
encoder_output = os.path.join(save_location, "encoder")
if os.path.exists(encoder_output):
Expand Down
5 changes: 4 additions & 1 deletion umap/tests/test_umap_ops.py
Expand Up @@ -101,6 +101,7 @@ def test_multi_component_layout_precomputed():

assert_less(error, 15.0, msg="Multi component embedding to far astray")


@pytest.mark.parametrize("num_isolates", [1, 5])
@pytest.mark.parametrize("metric", ["jaccard", "hellinger", "cosine"])
@pytest.mark.parametrize("force_approximation", [True, False])
Expand Down Expand Up @@ -137,7 +138,8 @@ def test_disconnected_data(num_isolates, metric, force_approximation):
isolated_vertices = disconnected_vertices(model)
assert isolated_vertices[10] == True
number_of_nan = np.sum(np.isnan(model.embedding_[isolated_vertices]))
assert number_of_nan >= num_isolates*model.n_components
assert number_of_nan >= num_isolates * model.n_components


@pytest.mark.parametrize("num_isolates", [1])
@pytest.mark.parametrize("sparse", [True, False])
Expand Down Expand Up @@ -166,6 +168,7 @@ def test_disconnected_data_precomputed(num_isolates, sparse):
number_of_nan = np.sum(np.isnan(model.embedding_[isolated_vertices]))
assert number_of_nan >= num_isolates * model.n_components


# ---------------
# Umap Transform
# --------------
Expand Down
18 changes: 12 additions & 6 deletions umap/umap_.py
Expand Up @@ -415,7 +415,7 @@ def compute_membership_strengths(
continue # We didn't get the full knn for i
# If applied to an adjacency matrix points shouldn't be similar to themselves.
# If applied to an incidence matrix (or bipartite) then the row and column indices are different.
if (bipartite==False) & (knn_indices[i, j] == i):
if (bipartite == False) & (knn_indices[i, j] == i):
val = 0.0
elif knn_dists[i, j] - rhos[i] <= 0.0 or sigmas[i] == 0.0:
val = 1.0
Expand Down Expand Up @@ -1258,6 +1258,7 @@ def init_transform(indices, weights, embedding):

return result


def init_graph_transform(graph, embedding):
"""Given a bipartite graph representing the 1-simplices and strengths between the
new points and the original data set along with an embedding of the original points
Expand Down Expand Up @@ -1295,10 +1296,15 @@ def init_graph_transform(graph, embedding):
result[row_index, :] = embedding[col_index, :]
break
for d in range(embedding.shape[1]):
result[row_index, d] += graph[row_index, col_index] / num_neighbours * embedding[col_index, d]
result[row_index, d] += (
graph[row_index, col_index]
/ num_neighbours
* embedding[col_index, d]
)

return result


@numba.njit()
def init_update(current_init, n_original_samples, indices):
for i in range(n_original_samples, indices.shape[0]):
Expand Down Expand Up @@ -2737,10 +2743,10 @@ def transform(self, X):
# This was a very specially constructed graph with constant degree.
# That lets us do fancy unpacking by reshaping the csr matrix indices
# and data. Doing so relies on the constant degree assumption!
#csr_graph = normalize(graph.tocsr(), norm="l1")
#inds = csr_graph.indices.reshape(X.shape[0], self._n_neighbors)
#weights = csr_graph.data.reshape(X.shape[0], self._n_neighbors)
#embedding = init_transform(inds, weights, self.embedding_)
# csr_graph = normalize(graph.tocsr(), norm="l1")
# inds = csr_graph.indices.reshape(X.shape[0], self._n_neighbors)
# weights = csr_graph.data.reshape(X.shape[0], self._n_neighbors)
# embedding = init_transform(inds, weights, self.embedding_)
# This is less fast code than the above numba.jit'd code.
# It handles the fact that our nearest neighbour graph can now contain variable numbers of vertices.
csr_graph = graph.tocsr()
Expand Down

0 comments on commit e68a0c1

Please sign in to comment.