Merge pull request #76 from dpeerlab/v1.1.0

V1.1.0
dpeerlab · Jun 15, 2022 · faf2f98 · faf2f98
2 parents 19f3068 + 74c3282
commit faf2f98
Show file tree

Hide file tree

Showing 7 changed files with 441 additions and 295 deletions.
diff --git a/README.md b/README.md
@@ -17,20 +17,8 @@ Palantir is an algorithm to align cells along differentiation trajectories. Pala
 3. To uninstall:
 
 		$> pip uninstall palantir
-
-4. If you would like to determine gene expression trends, please install <a href="https://cran.r-project.org"> R <a> programming language and the R package <a href="https://cran.r-project.org/web/packages/gam/">GAM </a>. You will also need to install the rpy2 module using 
-
-		$> pip install .['PLOT_GENE_TRENDS']
-		    OR,
-		$> pip install rpy2
-
-    In case of compiler error during installation of `rpy2`, try to link your compiler in `env`. Example:
-
-        $> env CC=/usr/local/Cellar/gcc/xxx/bin/gcc-x pip install .['PLOT_GENE_TRENDS']
-
-    where `x` should be replaced with the version numbers
 		
-5. Palantir can also be used with [**Scanpy**](https://github.com/theislab/scanpy). It is fully integrated into Scanpy, and can be found under Scanpy's external modules ([link](https://scanpy.readthedocs.io/en/latest/api/scanpy.external.html#external-api))
+4. Palantir can also be used with [**Scanpy**](https://github.com/theislab/scanpy). It is fully integrated into Scanpy, and can be found under Scanpy's external modules ([link](https://scanpy.readthedocs.io/en/latest/api/scanpy.external.html#external-api))
 
 
 #### Usage
@@ -120,6 +108,10 @@ ____
 
 Release Notes
 -------------
+### Version 1.1.0
+ * Replaced rpy2 with pyGAM for computing gene expression trends. 
+ * Updated tutorial and plotting functions 
+
 
 ### Version 1.0.0
 

diff --git a/data/marrow_sample_scseq_counts.h5ad b/data/marrow_sample_scseq_counts.h5ad
diff --git a/notebooks/Palantir_sample_notebook.ipynb b/notebooks/Palantir_sample_notebook.ipynb
diff --git a/setup.py b/setup.py
@@ -40,10 +40,8 @@
         "seaborn>=0.8.1",
         "tzlocal",
         "scanpy>=1.6.0",
+        "pygam"
     ],
-    extras_require={
-        'PLOT_GENE_TRENDS': ["rpy2>=3.0.2"]
-    },
     classifiers=[
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: MIT License",

diff --git a/src/palantir/plot.py b/src/palantir/plot.py
@@ -162,11 +162,13 @@ def cell_types(tsne, clusters, cluster_colors=None, n_cols=5):
         ax.set_title(cluster, fontsize=10)
 
 
-def plot_cell_clusters(tsne, clusters):
+def plot_cell_clusters(plot_embedding, clusters):
     """Plot cell clusters on the tSNE map
-    :param tsne: tSNE map
+    :param plot_embedding: tSNE map
     :param clusters: Results of the determine_cell_clusters function
     """
+    tsne = plot_embedding.copy()
+    tsne.columns = ['x', 'y']
 
     # Cluster colors
     n_clusters = len(set(clusters))
@@ -215,10 +217,12 @@ def plot_tsne(tsne, fig=None, ax=None):
     return fig, ax
 
 
-def highlight_cells_on_tsne(tsne, cells, fig=None, ax=None):
+def highlight_cells_on_tsne(plot_tsne, cells, fig=None, ax=None):
     """    Function to highlight specific cells on the tSNE map
     """
     fig, ax = get_fig(fig=fig, ax=ax)
+    tsne = plot_tsne.copy()
+    tsne.columns = ['x', 'y']
     ax.scatter(tsne["x"], tsne["y"], s=5, color="lightgrey")
     ax.scatter(tsne.loc[cells, "x"], tsne.loc[cells, "y"], s=30)
     ax.set_axis_off()
@@ -314,8 +318,8 @@ def plot_diffusion_components(tsne, dm_res):
 
     for i, ax in enumerate(fig):
         ax.scatter(
-            tsne["x"],
-            tsne["y"],
+            tsne.iloc[:, 0],
+            tsne.iloc[:, 1],
             c=dm_res["EigenVectors"].loc[tsne.index, i],
             cmap=matplotlib.cm.Spectral_r,
             edgecolors="none",
@@ -328,7 +332,7 @@ def plot_diffusion_components(tsne, dm_res):
         ax.set_axis_off()
 
 
-def plot_palantir_results(pr_res, tsne):
+def plot_palantir_results(pr_res, tsne, s=3):
     """ Plot Palantir results on tSNE
     """
 
@@ -344,7 +348,7 @@ def plot_palantir_results(pr_res, tsne):
     # Pseudotime
     ax = plt.subplot(gs[0:2, 1:3])
     c = pr_res.pseudotime[tsne.index]
-    ax.scatter(tsne.loc[:, "x"], tsne.loc[:, "y"], s=3, cmap=matplotlib.cm.plasma, c=c)
+    ax.scatter(tsne.iloc[:, 0], tsne.iloc[:, 1], s=s, cmap=matplotlib.cm.plasma, c=c)
     normalize = matplotlib.colors.Normalize(vmin=np.min(c), vmax=np.max(c))
     cax, _ = matplotlib.colorbar.make_axes(ax)
     cbar = matplotlib.colorbar.ColorbarBase(cax, norm=normalize, cmap=cmap)
@@ -354,7 +358,7 @@ def plot_palantir_results(pr_res, tsne):
     # Entropy
     ax = plt.subplot(gs[0:2, 3:5])
     c = pr_res.entropy[tsne.index]
-    ax.scatter(tsne.loc[:, "x"], tsne.loc[:, "y"], s=3, cmap=matplotlib.cm.plasma, c=c)
+    ax.scatter(tsne.iloc[:, 0], tsne.iloc[:, 1], s=s, cmap=matplotlib.cm.plasma, c=c)
     normalize = matplotlib.colors.Normalize(vmin=np.min(c), vmax=np.max(c))
     cax, _ = matplotlib.colorbar.make_axes(ax)
     cbar = matplotlib.colorbar.ColorbarBase(cax, norm=normalize, cmap=cmap)
@@ -366,7 +370,7 @@ def plot_palantir_results(pr_res, tsne):
         ax = plt.subplot(gs[row + 2, np.remainder(i, n_cols)])
         c = pr_res.branch_probs.loc[tsne.index, branch]
         ax.scatter(
-            tsne.loc[:, "x"], tsne.loc[:, "y"], s=3, cmap=matplotlib.cm.plasma, c=c
+            tsne.iloc[:, 0], tsne.iloc[:, 1], s=s, cmap=matplotlib.cm.plasma, c=c
         )
         normalize = matplotlib.colors.Normalize(vmin=np.min(c), vmax=np.max(c))
         cax, _ = matplotlib.colorbar.make_axes(ax)

diff --git a/src/palantir/presults.py b/src/palantir/presults.py
@@ -8,6 +8,7 @@
 from collections import OrderedDict
 from joblib import delayed, Parallel
 from sklearn.preprocessing import StandardScaler
+from pygam import LinearGAM, s
 
 
 class PResults(object):
@@ -78,31 +79,6 @@ def compute_gene_trends(pr_res, gene_exprs, lineages=None, n_jobs=-1):
     :return: Dictionary of gene expression trends and standard deviations for each branch
     """
 
-    # Error check
-    try:
-        import rpy2
-        import rpy2.rinterface_lib.embedded as embedded
-        from rpy2.robjects.packages import importr
-    except ImportError:
-        raise RuntimeError(
-            'Cannot compute gene expression trends without installing rpy2. \
-            \nPlease use "pip3 install rpy2" to install rpy2'
-        )
-
-    if not shutil.which("R"):
-        raise RuntimeError(
-            "R installation is necessary for computing gene expression trends. \
-            \nPlease install R and try again"
-        )
-
-    try:
-        rgam = importr("gam")
-    except embedded.RRuntimeError:
-        raise RuntimeError(
-            'R package "gam" is necessary for computing gene expression trends. \
-            \nPlease install gam from https://cran.r-project.org/web/packages/gam/ and try again'
-        )
-
     # Compute for all lineages if branch is not speicified
     if lineages is None:
         lineages = pr_res.branch_probs.columns
@@ -146,12 +122,44 @@ def compute_gene_trends(pr_res, gene_exprs, lineages=None, n_jobs=-1):
             results[branch]["trends"].loc[gene, :] = res[i][0]
             results[branch]["std"].loc[gene, :] = res[i][1]
         end = time.time()
-        print("Time for processing {}: {} minutes".format(branch, (end - start) / 60))
+        print("Time for processing {}: {} minutes".format(
+            branch, (end - start) / 60))
 
     return results
 
 
 def _gam_fit_predict(x, y, weights=None, pred_x=None):
+    # Weights
+    if weights is None:
+        weights = np.repeat(1.0, len(x))
+
+    # Construct dataframe
+    use_inds = np.where(weights > 0)[0]
+
+    # GAM fit
+    gam = LinearGAM(s(0, n_splines=4, spline_order=2)).fit(x[use_inds], y[use_inds],
+                                                           weights=weights[use_inds])
+
+    # Predict
+    if pred_x is None:
+        pred_x = x
+    y_pred = gam.predict(pred_x)
+
+    # Standard deviations
+    p = gam.predict(x[use_inds])
+    n = len(use_inds)
+    sigma = np.sqrt(((y[use_inds] - p) ** 2).sum() / (n - 2))
+    stds = (
+        np.sqrt(1 + 1 / n + (pred_x - np.mean(x)) **
+                2 / ((x - np.mean(x)) ** 2).sum())
+        * sigma
+        / 2
+    )
+
+    return y_pred, stds
+
+
+def _gam_fit_predict_rpy2(x, y, weights=None, pred_x=None):
 
     import rpy2.robjects as robjects
     from rpy2.robjects import pandas2ri, Formula
@@ -171,27 +179,31 @@ def _gam_fit_predict(x, y, weights=None, pred_x=None):
 
     # Fit the model
     rgam = importr("gam")
-    model = rgam.gam(Formula("y~s(x)"), data=r_df, weights=pd.Series(weights[use_inds]))
+    model = rgam.gam(Formula("y~s(x)"), data=r_df,
+                     weights=pd.Series(weights[use_inds]))
 
     # Predictions
     if pred_x is None:
         pred_x = x
     y_pred = np.array(
         robjects.r.predict(
-            model, newdata=pandas2ri.py2rpy(pd.DataFrame(pred_x, columns=["x"]))
+            model, newdata=pandas2ri.py2rpy(
+                pd.DataFrame(pred_x, columns=["x"]))
         )
     )
 
     # Standard deviations
     p = np.array(
         robjects.r.predict(
-            model, newdata=pandas2ri.py2rpy(pd.DataFrame(x[use_inds], columns=["x"]))
+            model, newdata=pandas2ri.py2rpy(
+                pd.DataFrame(x[use_inds], columns=["x"]))
         )
     )
     n = len(use_inds)
     sigma = np.sqrt(((y[use_inds] - p) ** 2).sum() / (n - 2))
     stds = (
-        np.sqrt(1 + 1 / n + (pred_x - np.mean(x)) ** 2 / ((x - np.mean(x)) ** 2).sum())
+        np.sqrt(1 + 1 / n + (pred_x - np.mean(x)) **
+                2 / ((x - np.mean(x)) ** 2).sum())
         * sigma
         / 2
     )

diff --git a/src/palantir/version.py b/src/palantir/version.py
@@ -1,3 +1,3 @@
-__version__ = "1.0.1"
+__version__ = "1.1"
 __author__ = "Manu Setty"
 __author_email__ = "manu.talanki@gmail.com"