Merge pull request #72 from dpeerlab/master

Merging into v1.0.1
dpeerlab · May 28, 2022 · eab3ba7 · eab3ba7
2 parents 136a102 + 3679b9e
commit eab3ba7
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -62,6 +62,48 @@ Each object has the following elements
 Notebooks detailing the generation of results comparing Palantir to trajectory detection algorithms are available [here](https://github.com/dpeerlab/Palantir/blob/master/notebooks/comparisons)
 
 
+#### Convert to Seurat objects
+Use the snippet below to convert `anndata` to `Seurat` objects 
+```
+library("SeuratDisk")
+library("Seurat")
+library("reticulate")
+use_condaenv(<conda env>, required = T) # before, install "anndata" into <conda env>
+anndata <- import('anndata')
+
+#link to Anndata files
+url_Rep1 <- "https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep1.h5ad"
+curl::curl_download(url_Rep1, basename(url_Rep1))
+url_Rep2 <- "https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep2.h5ad"
+curl::curl_download(url_Rep2, basename(url_Rep2))
+url_Rep3 <- "https://s3.amazonaws.com/dp-lab-data-public/palantir/human_cd34_bm_rep3.h5ad"
+curl::curl_download(url_Rep3, basename(url_Rep3))
+
+#H5AD files are compressed using the LZF filter. 
+#This filter is Python-specific, and cannot easily be used in R. 
+#To use this file with Seurat and SeuratDisk, you'll need to read it in Python and save it out using the gzip compression
+#https://github.com/mojaveazure/seurat-disk/issues/7
+adata_Rep1 = anndata$read("human_cd34_bm_rep1.h5ad")
+adata_Rep2 = anndata$read("human_cd34_bm_rep2.h5ad")
+adata_Rep3 = anndata$read("human_cd34_bm_rep3.h5ad")
+
+adata_Rep1$write_h5ad("human_cd34_bm_rep1.gzip.h5ad", compression="gzip")
+adata_Rep2$write_h5ad("human_cd34_bm_rep2.gzip.h5ad", compression="gzip")
+adata_Rep3$write_h5ad("human_cd34_bm_rep3.gzip.h5ad", compression="gzip")
+
+
+#convert gzip-compressed h5ad file to Seurat Object
+Convert("human_cd34_bm_rep1.gzip.h5ad", dest = "h5seurat", overwrite = TRUE)
+Convert("human_cd34_bm_rep2.gzip.h5ad", dest = "h5seurat", overwrite = TRUE)
+Convert("human_cd34_bm_rep3.gzip.h5ad", dest = "h5seurat", overwrite = TRUE)
+
+human_cd34_bm_Rep1 <- LoadH5Seurat("human_cd34_bm_rep1.gzip.h5seurat")
+human_cd34_bm_Rep2 <- LoadH5Seurat("human_cd34_bm_rep2.gzip.h5seurat")
+human_cd34_bm_Rep3 <- LoadH5Seurat("human_cd34_bm_rep3.gzip.h5seurat")
+```
+Thanks to Anne Ludwig from University Hospital Heidelberg for the tip!
+
+
 #### Citations
 Palantir manuscript is available from [Nature Biotechnology](https://www.nature.com/articles/s41587-019-0068-4). If you use Palantir for your work, please cite our paper.
 
@@ -91,4 +133,4 @@ Release Notes
 
 ### Version 0.2.5
 
- * A fix related to [issue#28](https://github.com/dpeerlab/Palantir/issues/28). When identifying terminal states, duplicate values were generated instead of unique ones.
+ * A fix related to [issue#28](https://github.com/dpeerlab/Palantir/issues/28). When identifying terminal states, duplicate values were generated instead of unique ones.
diff --git a/setup.py b/setup.py
@@ -36,7 +36,6 @@
         "tables>=3.4.2",
         "Cython",
         "cmake",
-        "MulticoreTSNE",
         "matplotlib>=2.2.2",
         "seaborn>=0.8.1",
         "tzlocal",

diff --git a/src/palantir/preprocess.py b/src/palantir/preprocess.py
@@ -40,6 +40,6 @@ def log_transform(data, pseudo_count=0.1):
     :return: Log transformed matrix
     """
     if type(data) is sc.AnnData:
-        data.X.data = np.log2(data.X.data + pseudo_count) - np.log2(pseudo_count)
+        data.X = np.log2(data.X + pseudo_count) - np.log2(pseudo_count)
     else:
         return np.log2(data + pseudo_count)
diff --git a/src/palantir/utils.py b/src/palantir/utils.py
@@ -1,6 +1,7 @@
 import pandas as pd
 import numpy as np
-from MulticoreTSNE import MulticoreTSNE as TSNE
+from sklearn.manifold import TSNE
+
 import phenograph
 
 from scipy.sparse import csr_matrix, find, issparse
@@ -165,9 +166,17 @@ def run_tsne(data, n_dim=2, perplexity=150, **kwargs):
     :param n_dim: Number of dimensions for tSNE embedding
     :return: tSNE embedding of the data
     """
-    tsne = TSNE(n_components=n_dim, perplexity=perplexity, **kwargs).fit_transform(
-        data.values
-    )
+    try:
+        from MulticoreTSNE import MulticoreTSNE as TSNE
+
+        print("Using the 'MulticoreTSNE' package by Ulyanov (2017)")
+        tsne = TSNE(n_components=n_dim, perplexity=perplexity, **kwargs).fit_transform(data.values)
+    except ImportError:
+        from sklearn.manifold import TSNE
+
+        print("Could not import 'MulticoreTSNE'. Install for faster runtime. Falling back to scikit-learn.")
+        tsne = TSNE(n_components= n_dim, perplexity= perplexity, **kwargs).fit_transform(data.values)
+
     tsne = pd.DataFrame(tsne, index=data.index)
     tsne.columns = ["x", "y"]
     return tsne