Merge pull request #18 from saezlab/feature

v0.1.9
saezlab · Jun 6, 2023 · 674bfb1 · 674bfb1
2 parents 11156dd + d02f74a
commit 674bfb1
Show file tree

Hide file tree

Showing 29 changed files with 2,393 additions and 1,943 deletions.
diff --git a/README.md b/README.md
@@ -16,21 +16,17 @@ This is its faster and memory efficient Python implementation, an R version is a
 
 - [LIANA's basic tutorial](https://liana-py.readthedocs.io/en/latest/notebooks/basic_usage.html) in dissociated single-cell data
 
-- [LIANA with cell2cell-Tensor](https://liana-py.readthedocs.io/en/latest/notebooks/liana_c2c.html) to obtain intercellular communication programmes across samples and conditions
-
 - [LIANA with MOFA](https://liana-py.readthedocs.io/en/latest/notebooks/mofatalk.html). Using MOFA to infer intercellular communication programmes across samples and conditions, as initially proposed by cell2cell-Tensor
 
 - [Multicellular programmes with MOFA](https://liana-py.readthedocs.io/en/latest/notebooks/mofacellular.html). Using MOFA to obtain coordinates
 gene expression programmes across samples and conditions, as done in [Ramirez et al., 2023](https://europepmc.org/article/ppr/ppr620471)
 
+- [LIANA with cell2cell-Tensor](https://liana-py.readthedocs.io/en/latest/notebooks/liana_c2c.html) to extract intercellular communication programmes across samples and conditions. Extensive tutorials combining LIANA & [cell2cell-Tensor](https://www.nature.com/articles/s41467-022-31369-2) are available [here](https://ccc-protocols.readthedocs.io/en/latest/index.html).
 
+- We also refer users to the [Cell-cell communication chapter](https://www.sc-best-practices.org/mechanisms/cell_cell_communication.html) in the [best-practices guide from Theis lab](https://www.nature.com/articles/s41576-023-00586-w). There we provide an overview of the common limitations and assumptions in CCC inference from (dissociated single-cell) transcriptomics data.
 
 For further information please check LIANA's [API documentation](https://liana-py.readthedocs.io/en/latest/api.html).
 
-
-We also refer users to the [Cell-cell communication chapter](https://www.sc-best-practices.org/mechanisms/cell_cell_communication.html) in the best-practices book from Theis lab, as it provides an overview of the common limitations and assumptions in CCC inference from (dissociated single-cell) transcriptomics data.
-
-
 ## Install LIANA
 
 Install liana's stable version:

diff --git a/docs/source/notebooks/basic_usage.ipynb b/docs/source/notebooks/basic_usage.ipynb
diff --git a/docs/source/notebooks/liana_c2c.ipynb b/docs/source/notebooks/liana_c2c.ipynb
diff --git a/docs/source/notebooks/mofacellular.ipynb b/docs/source/notebooks/mofacellular.ipynb
diff --git a/docs/source/notebooks/mofatalk.ipynb b/docs/source/notebooks/mofatalk.ipynb
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -1,6 +1,31 @@
 Release notes
 =============
 
+0.1.9 (06.06.2023)
+------------------
+
+- Fixed issues with deprecated params of pandas.DataFrame.to_csv & .assert_frame_equal in tests
+
+- `multi.get_variable_loadings` will now return all factors
+
+- Added source & target params to `fun.generate_lr_geneset`
+
+- Refactored `sc._Method._get_means_perms` & related scoring functions to be more efficient.
+ `None` can now be passed to n_perms to avoid permutations - these are only relevant if specificity is assumed to be relevant.
+
+- LIANA's aggregate method can now be customized to include any method of choice (added an example to basic_usage).
+
+- Removed 'Steady' aggregation from rank_aggregate
+
+- Changed deprecated np.float to np.float32 in `liana_pipe`, relevant for CellChat `mat_max`.
+
+- Method results will now be ordered by magnitude, if available, if not specificity is used.
+
+- Added `ligand_complex` and `receptor_complex` filtering to liana's dotplot
+
+- MOFAcellular will now work only with decoupler>=1.4.0 which implements edgeR-like filtering for the views.
+
+
 0.1.8 (24.03.2023)
 ------------------
 

diff --git a/liana/__init__.py b/liana/__init__.py
@@ -1,4 +1,4 @@
-__version__ = '0.1.8'
+__version__ = '0.1.9'
 __version_info__ = tuple([int(num) for num in __version__.split('.')])
 
 from liana import method as mt, plotting as pl, resource as rs, multi as mu, funcomics as fun, testing

diff --git a/liana/funcomics/generate_lr_geneset.py b/liana/funcomics/generate_lr_geneset.py
@@ -47,10 +47,11 @@ def _sign_coherent_mean(x):
 
 def generate_lr_geneset(resource,
                         net, 
-                        ligand='ligand',
-                        receptor='receptor',
-                        lr_separator='&',
-                        source='source', 
+                        ligand_key='ligand',
+                        receptor_key='receptor',
+                        lr_separator='^',
+                        source='source',
+                        target='target',
                         weight='weight'):
     """
     Generate a ligand-receptor gene set from a resource and a network.
@@ -75,6 +76,7 @@ def generate_lr_geneset(resource,
     source : str, optional
         Name of the source column in the network, by default 'source'
     weight : str, optional
+        Name of the weight column in the network, by default 'weight'. If None, all weights are set to 1.
     
     Returns
     -------
@@ -83,31 +85,33 @@ def generate_lr_geneset(resource,
     - weight: mean weight of the interaction
     - source: source of the interaction
     """
+    if weight is None:
+        weight = 'weight'
+        net[weight] = 1
 
     # supp keys
-    ligand_weight = ligand + '_' + weight
-    receptor_weight = receptor + '_' + weight
-    ligand_source = ligand + '_' + source
-    receptor_source = receptor + '_' + source
-
+    ligand_weight = ligand_key + '_' + weight
+    receptor_weight = receptor_key + '_' + weight
+    ligand_source = ligand_key + '_' + source
+    receptor_source = receptor_key + '_' + source
 
     # assign weights to each entity
-    ligand_weights = _assign_entity_weights(resource, net, entity_key=ligand)
+    ligand_weights = _assign_entity_weights(resource, net, source=source, target=target, entity_key=ligand_key)
     ligand_weights.rename(columns={weight: ligand_weight, source:ligand_source}, inplace=True)
-    receptor_weights = _assign_entity_weights(resource, net, entity_key=receptor)
+    receptor_weights = _assign_entity_weights(resource, net, source=source, target=target, entity_key=receptor_key)
     receptor_weights.rename(columns={weight: receptor_weight, source: receptor_source}, inplace=True)
 
     # join weights to the the ligand-receptor resource
-    resource = resource.merge(ligand_weights, on=ligand, how='inner')
-    resource = resource.merge(receptor_weights, on=receptor, how='inner')
+    resource = resource.merge(ligand_weights, on=ligand_key, how='inner')
+    resource = resource.merge(receptor_weights, on=receptor_key, how='inner')
 
     # keep only coherent ligand and receptor sources
     resource = resource[resource[ligand_source] == resource[receptor_source]]
     # mean of sign-coherent ligand-receptor weights
     resource.loc[:, weight] = resource.apply(lambda x: _sign_coherent_mean(np.array([x[ligand_weight], x[receptor_weight]])), axis=1)
 
     # unite ligand-receptor columns
-    resource = resource.assign(interaction = lambda x: x[ligand] + lr_separator + x[receptor])
+    resource = resource.assign(interaction = lambda x: x[ligand_key] + lr_separator + x[receptor_key])
 
     # keep only relevant columns
     resource = resource[[ligand_source, 'interaction', weight]].rename(columns={ligand_source: source})

diff --git a/liana/method/_Method.py b/liana/method/_Method.py
@@ -224,7 +224,8 @@ def __call__(self,
             Verbosity flag
         n_perms
             Number of permutations for the permutation test. Note that this is relevant
-            only for permutation-based methods - e.g. `CellPhoneDB`
+            only for permutation-based methods - e.g. `CellPhoneDB`. If `None` is passed, 
+            no permutation testing is performed.
         seed
             Random seed for reproducibility.
         resource

diff --git a/liana/method/__init__.py b/liana/method/__init__.py
@@ -1,12 +1,12 @@
 from ._Method import Method, MethodMeta, _show_methods
-from .sc._rank_aggregate import AggregateClass, _rank_aggregate_meta
+from .sc._rank_aggregate import AggregateClass, _rank_aggregate_meta as aggregate_meta
 from .sc import cellphonedb, connectome, logfc, natmi, singlecellsignalr, geometric_mean, cellchat
 
 import numpy as np
 
 # callable consensus instance
 _methods = [cellphonedb, connectome, logfc, natmi, singlecellsignalr, cellchat]
-rank_aggregate = AggregateClass(_rank_aggregate_meta, methods=_methods)
+rank_aggregate = AggregateClass(aggregate_meta, methods=_methods)
 
 
 def show_methods():
@@ -15,10 +15,13 @@ def show_methods():
 
 def get_method_scores():
     """Returns a dict of all scoring functions, with a boolean indicating whether the score is ascending or not"""
+
     instances = np.array(MethodMeta.instances)
     relevant = np.array([(isinstance(instance, Method)) | (isinstance(instance, AggregateClass)) for instance in instances])
     instances = instances[relevant]
+
     specificity_scores = {method.specificity: method.specificity_ascending for method in instances if method.specificity is not None}
     magnitude_scores = {method.magnitude : method.magnitude_ascending for method in instances if method.magnitude is not None}
+
     scores = {**specificity_scores, **magnitude_scores}
     return scores
diff --git a/liana/method/_liana_pipe.py b/liana/method/_liana_pipe.py
@@ -6,7 +6,7 @@
 from liana.method._pipe_utils import prep_check_adata, assert_covered, filter_resource, \
     filter_reassemble_complexes
 from ..resource import select_resource, explode_complexes
-from liana.method._pipe_utils._get_mean_perms import _get_means_perms
+from liana.method._pipe_utils._get_mean_perms import _get_means_perms, _get_mat_idx
 from liana.method._pipe_utils._aggregate import _aggregate
 
 import scanpy as sc
@@ -34,7 +34,7 @@ def liana_pipe(adata: anndata.AnnData,
                _score=None,
                _methods: list = None,
                _consensus_opts: list = None,
-               _aggregate_method: str = None
+               _aggregate_method: str | None = None
                ):
     """
     Parameters
@@ -106,6 +106,9 @@ def liana_pipe(adata: anndata.AnnData,
                      'ligand_trimean', 'receptor_trimean',
                      'mat_mean', 'mat_max',
                      ]
+
+    if n_perms is None:
+        _consensus_opts = 'Magnitude'
 
     if supp_columns is None:
         supp_columns = []
@@ -157,14 +160,15 @@ def liana_pipe(adata: anndata.AnnData,
     adata = adata[:, np.intersect1d(entities, adata.var.index)]
 
     # Get lr results
-    lr_res = _get_lr(adata=adata, 
+    lr_res = _get_lr(adata=adata,
                      resource=resource,
-                     mat_mean=mat_mean, 
+                     mat_mean=mat_mean,
                      mat_max=mat_max,
                      relevant_cols=_key_cols + _add_cols + _complex_cols,
                      de_method=de_method,
-                     base=base, 
-                     verbose=verbose)
+                     base=base,
+                     verbose=verbose
+                     )
 
     # Mean Sums required for NATMI (note done on subunits also)
     if 'ligand_means_sums' in _add_cols:
@@ -203,22 +207,35 @@ def liana_pipe(adata: anndata.AnnData,
                 lr_res = _aggregate(lrs,
                                     consensus=_score,
                                     aggregate_method=_aggregate_method,
-                                    _key_cols=_key_cols)
+                                    _key_cols=_key_cols,
+                                    _consensus_opts=_consensus_opts,
+                                    )
             else:  # Return by method results as they are
                 return lrs
         else:  # Run the specific method in mind
-            lr_res = _run_method(lr_res=lr_res, adata=adata, expr_prop=expr_prop,
-                                 _score=_score, _key_cols=_key_cols, _complex_cols=_complex_cols,
-                                 _add_cols=_add_cols, n_perms=n_perms,
+            lr_res = _run_method(lr_res=lr_res,
+                                 adata=adata,
+                                 expr_prop=expr_prop,
+                                 _score=_score, _key_cols=_key_cols,
+                                 _complex_cols=_complex_cols,
+                                 _add_cols=_add_cols,
+                                 n_perms=n_perms,
                                  return_all_lrs=return_all_lrs,
-                                 verbose=verbose, seed=seed)
+                                 verbose=verbose,
+                                 seed=seed)
     else:  # Just return lr_res
         lr_res = filter_reassemble_complexes(lr_res=lr_res,
                                              _key_cols=_key_cols,
                                              expr_prop=expr_prop,
                                              complex_cols=_complex_cols,
                                              return_all_lrs=return_all_lrs)
 
+    if _score is not None:
+        orderby, ascending =  (_score.magnitude, _score.magnitude_ascending) if _score.magnitude is not None \
+            else (_score.specificity, _score.specificity_ascending)
+
+        lr_res = lr_res.sort_values(by=orderby, ascending=ascending)
+
     return lr_res
 
 
@@ -355,6 +372,7 @@ def _get_lr(adata, resource, relevant_cols, mat_mean, mat_max, de_method, base,
         assert isinstance(mat_mean, np.float32)
         lr_res['mat_mean'] = mat_mean
 
+    # NOTE: this is not needed
     if isinstance(mat_max, np.float32):
         lr_res['mat_max'] = mat_max
 
@@ -462,21 +480,34 @@ def _run_method(lr_res: pandas.DataFrame,
         agg_fun = np.mean
 
     if _score.permute:
-        perms, ligand_pos, receptor_pos, labels_pos = \
-            _get_means_perms(adata=adata,
-                             lr_res=lr_res,
-                             n_perms=n_perms,
-                             seed=seed,
-                             agg_fun=agg_fun,
-                             norm_factor=norm_factor,
-                             verbose=verbose)
-        lr_res[[_score.magnitude, _score.specificity]] = \
-            lr_res.apply(_score.fun, axis=1, result_type="expand",
-                         perms=perms, ligand_pos=ligand_pos,
-                         receptor_pos=receptor_pos, labels_pos=labels_pos)
+        # get permutations
+        if n_perms is not None:
+            perms = _get_means_perms(adata=adata,
+                                     n_perms=n_perms,
+                                     seed=seed,
+                                     agg_fun=agg_fun,
+                                     norm_factor=norm_factor,
+                                     verbose=verbose)
+            # get tensor indexes for ligand, receptor, source, target
+            ligand_idx, receptor_idx, source_idx, target_idx = _get_mat_idx(adata, lr_res)
+
+            # ligand and receptor perms
+            ligand_stat_perms = perms[:, source_idx, ligand_idx]
+            receptor_stat_perms = perms[:, target_idx, receptor_idx]
+            # stack them together
+            perm_stats = np.stack((ligand_stat_perms, receptor_stat_perms), axis=0)
+        else:
+            perm_stats = None
+            _score.specificity = None
+
+        scores = _score.fun(x=lr_res,
+                            perm_stats=perm_stats)
     else:  # non-perm funs
-        lr_res[[_score.magnitude, _score.specificity]] = \
-            lr_res.apply(_score.fun, axis=1, result_type="expand")
+        scores = _score.fun(x=lr_res)
+
+    lr_res.loc[:, _score.magnitude] = scores[0]
+    lr_res.loc[:, _score.specificity] = scores[1]
+
 
     if return_all_lrs:
         # re-append rest of results