Skip to content

Commit

Permalink
Merge pull request #76 from dpeerlab/v1.1.0
Browse files Browse the repository at this point in the history
V1.1.0
  • Loading branch information
ManuSetty committed Jun 15, 2022
2 parents 19f3068 + 74c3282 commit faf2f98
Show file tree
Hide file tree
Showing 7 changed files with 441 additions and 295 deletions.
18 changes: 5 additions & 13 deletions README.md
Expand Up @@ -17,20 +17,8 @@ Palantir is an algorithm to align cells along differentiation trajectories. Pala
3. To uninstall:

$> pip uninstall palantir

4. If you would like to determine gene expression trends, please install <a href="https://cran.r-project.org"> R <a> programming language and the R package <a href="https://cran.r-project.org/web/packages/gam/">GAM </a>. You will also need to install the rpy2 module using

$> pip install .['PLOT_GENE_TRENDS']
OR,
$> pip install rpy2

In case of compiler error during installation of `rpy2`, try to link your compiler in `env`. Example:

$> env CC=/usr/local/Cellar/gcc/xxx/bin/gcc-x pip install .['PLOT_GENE_TRENDS']

where `x` should be replaced with the version numbers
5. Palantir can also be used with [**Scanpy**](https://github.com/theislab/scanpy). It is fully integrated into Scanpy, and can be found under Scanpy's external modules ([link](https://scanpy.readthedocs.io/en/latest/api/scanpy.external.html#external-api))
4. Palantir can also be used with [**Scanpy**](https://github.com/theislab/scanpy). It is fully integrated into Scanpy, and can be found under Scanpy's external modules ([link](https://scanpy.readthedocs.io/en/latest/api/scanpy.external.html#external-api))


#### Usage
Expand Down Expand Up @@ -120,6 +108,10 @@ ____

Release Notes
-------------
### Version 1.1.0
* Replaced rpy2 with pyGAM for computing gene expression trends.
* Updated tutorial and plotting functions


### Version 1.0.0

Expand Down
Binary file added data/marrow_sample_scseq_counts.h5ad
Binary file not shown.
618 changes: 379 additions & 239 deletions notebooks/Palantir_sample_notebook.ipynb

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions setup.py
Expand Up @@ -40,10 +40,8 @@
"seaborn>=0.8.1",
"tzlocal",
"scanpy>=1.6.0",
"pygam"
],
extras_require={
'PLOT_GENE_TRENDS': ["rpy2>=3.0.2"]
},
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
Expand Down
22 changes: 13 additions & 9 deletions src/palantir/plot.py
Expand Up @@ -162,11 +162,13 @@ def cell_types(tsne, clusters, cluster_colors=None, n_cols=5):
ax.set_title(cluster, fontsize=10)


def plot_cell_clusters(tsne, clusters):
def plot_cell_clusters(plot_embedding, clusters):
"""Plot cell clusters on the tSNE map
:param tsne: tSNE map
:param plot_embedding: tSNE map
:param clusters: Results of the determine_cell_clusters function
"""
tsne = plot_embedding.copy()
tsne.columns = ['x', 'y']

# Cluster colors
n_clusters = len(set(clusters))
Expand Down Expand Up @@ -215,10 +217,12 @@ def plot_tsne(tsne, fig=None, ax=None):
return fig, ax


def highlight_cells_on_tsne(tsne, cells, fig=None, ax=None):
def highlight_cells_on_tsne(plot_tsne, cells, fig=None, ax=None):
""" Function to highlight specific cells on the tSNE map
"""
fig, ax = get_fig(fig=fig, ax=ax)
tsne = plot_tsne.copy()
tsne.columns = ['x', 'y']
ax.scatter(tsne["x"], tsne["y"], s=5, color="lightgrey")
ax.scatter(tsne.loc[cells, "x"], tsne.loc[cells, "y"], s=30)
ax.set_axis_off()
Expand Down Expand Up @@ -314,8 +318,8 @@ def plot_diffusion_components(tsne, dm_res):

for i, ax in enumerate(fig):
ax.scatter(
tsne["x"],
tsne["y"],
tsne.iloc[:, 0],
tsne.iloc[:, 1],
c=dm_res["EigenVectors"].loc[tsne.index, i],
cmap=matplotlib.cm.Spectral_r,
edgecolors="none",
Expand All @@ -328,7 +332,7 @@ def plot_diffusion_components(tsne, dm_res):
ax.set_axis_off()


def plot_palantir_results(pr_res, tsne):
def plot_palantir_results(pr_res, tsne, s=3):
""" Plot Palantir results on tSNE
"""

Expand All @@ -344,7 +348,7 @@ def plot_palantir_results(pr_res, tsne):
# Pseudotime
ax = plt.subplot(gs[0:2, 1:3])
c = pr_res.pseudotime[tsne.index]
ax.scatter(tsne.loc[:, "x"], tsne.loc[:, "y"], s=3, cmap=matplotlib.cm.plasma, c=c)
ax.scatter(tsne.iloc[:, 0], tsne.iloc[:, 1], s=s, cmap=matplotlib.cm.plasma, c=c)
normalize = matplotlib.colors.Normalize(vmin=np.min(c), vmax=np.max(c))
cax, _ = matplotlib.colorbar.make_axes(ax)
cbar = matplotlib.colorbar.ColorbarBase(cax, norm=normalize, cmap=cmap)
Expand All @@ -354,7 +358,7 @@ def plot_palantir_results(pr_res, tsne):
# Entropy
ax = plt.subplot(gs[0:2, 3:5])
c = pr_res.entropy[tsne.index]
ax.scatter(tsne.loc[:, "x"], tsne.loc[:, "y"], s=3, cmap=matplotlib.cm.plasma, c=c)
ax.scatter(tsne.iloc[:, 0], tsne.iloc[:, 1], s=s, cmap=matplotlib.cm.plasma, c=c)
normalize = matplotlib.colors.Normalize(vmin=np.min(c), vmax=np.max(c))
cax, _ = matplotlib.colorbar.make_axes(ax)
cbar = matplotlib.colorbar.ColorbarBase(cax, norm=normalize, cmap=cmap)
Expand All @@ -366,7 +370,7 @@ def plot_palantir_results(pr_res, tsne):
ax = plt.subplot(gs[row + 2, np.remainder(i, n_cols)])
c = pr_res.branch_probs.loc[tsne.index, branch]
ax.scatter(
tsne.loc[:, "x"], tsne.loc[:, "y"], s=3, cmap=matplotlib.cm.plasma, c=c
tsne.iloc[:, 0], tsne.iloc[:, 1], s=s, cmap=matplotlib.cm.plasma, c=c
)
normalize = matplotlib.colors.Normalize(vmin=np.min(c), vmax=np.max(c))
cax, _ = matplotlib.colorbar.make_axes(ax)
Expand Down
72 changes: 42 additions & 30 deletions src/palantir/presults.py
Expand Up @@ -8,6 +8,7 @@
from collections import OrderedDict
from joblib import delayed, Parallel
from sklearn.preprocessing import StandardScaler
from pygam import LinearGAM, s


class PResults(object):
Expand Down Expand Up @@ -78,31 +79,6 @@ def compute_gene_trends(pr_res, gene_exprs, lineages=None, n_jobs=-1):
:return: Dictionary of gene expression trends and standard deviations for each branch
"""

# Error check
try:
import rpy2
import rpy2.rinterface_lib.embedded as embedded
from rpy2.robjects.packages import importr
except ImportError:
raise RuntimeError(
'Cannot compute gene expression trends without installing rpy2. \
\nPlease use "pip3 install rpy2" to install rpy2'
)

if not shutil.which("R"):
raise RuntimeError(
"R installation is necessary for computing gene expression trends. \
\nPlease install R and try again"
)

try:
rgam = importr("gam")
except embedded.RRuntimeError:
raise RuntimeError(
'R package "gam" is necessary for computing gene expression trends. \
\nPlease install gam from https://cran.r-project.org/web/packages/gam/ and try again'
)

# Compute for all lineages if branch is not speicified
if lineages is None:
lineages = pr_res.branch_probs.columns
Expand Down Expand Up @@ -146,12 +122,44 @@ def compute_gene_trends(pr_res, gene_exprs, lineages=None, n_jobs=-1):
results[branch]["trends"].loc[gene, :] = res[i][0]
results[branch]["std"].loc[gene, :] = res[i][1]
end = time.time()
print("Time for processing {}: {} minutes".format(branch, (end - start) / 60))
print("Time for processing {}: {} minutes".format(
branch, (end - start) / 60))

return results


def _gam_fit_predict(x, y, weights=None, pred_x=None):
# Weights
if weights is None:
weights = np.repeat(1.0, len(x))

# Construct dataframe
use_inds = np.where(weights > 0)[0]

# GAM fit
gam = LinearGAM(s(0, n_splines=4, spline_order=2)).fit(x[use_inds], y[use_inds],
weights=weights[use_inds])

# Predict
if pred_x is None:
pred_x = x
y_pred = gam.predict(pred_x)

# Standard deviations
p = gam.predict(x[use_inds])
n = len(use_inds)
sigma = np.sqrt(((y[use_inds] - p) ** 2).sum() / (n - 2))
stds = (
np.sqrt(1 + 1 / n + (pred_x - np.mean(x)) **
2 / ((x - np.mean(x)) ** 2).sum())
* sigma
/ 2
)

return y_pred, stds


def _gam_fit_predict_rpy2(x, y, weights=None, pred_x=None):

import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri, Formula
Expand All @@ -171,27 +179,31 @@ def _gam_fit_predict(x, y, weights=None, pred_x=None):

# Fit the model
rgam = importr("gam")
model = rgam.gam(Formula("y~s(x)"), data=r_df, weights=pd.Series(weights[use_inds]))
model = rgam.gam(Formula("y~s(x)"), data=r_df,
weights=pd.Series(weights[use_inds]))

# Predictions
if pred_x is None:
pred_x = x
y_pred = np.array(
robjects.r.predict(
model, newdata=pandas2ri.py2rpy(pd.DataFrame(pred_x, columns=["x"]))
model, newdata=pandas2ri.py2rpy(
pd.DataFrame(pred_x, columns=["x"]))
)
)

# Standard deviations
p = np.array(
robjects.r.predict(
model, newdata=pandas2ri.py2rpy(pd.DataFrame(x[use_inds], columns=["x"]))
model, newdata=pandas2ri.py2rpy(
pd.DataFrame(x[use_inds], columns=["x"]))
)
)
n = len(use_inds)
sigma = np.sqrt(((y[use_inds] - p) ** 2).sum() / (n - 2))
stds = (
np.sqrt(1 + 1 / n + (pred_x - np.mean(x)) ** 2 / ((x - np.mean(x)) ** 2).sum())
np.sqrt(1 + 1 / n + (pred_x - np.mean(x)) **
2 / ((x - np.mean(x)) ** 2).sum())
* sigma
/ 2
)
Expand Down
2 changes: 1 addition & 1 deletion src/palantir/version.py
@@ -1,3 +1,3 @@
__version__ = "1.0.1"
__version__ = "1.1"
__author__ = "Manu Setty"
__author_email__ = "manu.talanki@gmail.com"

0 comments on commit faf2f98

Please sign in to comment.