fix uneven columns and rows (#63)

* fix uneven columns and rows * Update plot_cpdb.py * ah my stuipd regex pattern was the reason! * Update settings.py * Update settings.py * ok it was missing a colour * Update plot_cpdb_chord.py * Update plot_cpdb_chord.py * bump version * update tutorials. i guess everything is the same execpt this time the mnp's are kept because of the new regex pattern
zktuong · Feb 17, 2024 · 642bb98 · 642bb98
1 parent a6d8460
commit 642bb98
Show file tree

Hide file tree

Showing 6 changed files with 103 additions and 79 deletions.
diff --git a/docs/notebooks/tutorial.ipynb b/docs/notebooks/tutorial.ipynb
diff --git a/docs/notebooks/tutorial_v5.ipynb b/docs/notebooks/tutorial_v5.ipynb
@@ -53,7 +53,7 @@
     "import anndata as ad\n",
     "import pandas as pd\n",
     "import ktplotspy as kpy\n",
-    "import matplotlib.pyplot as plt\n"
+    "import matplotlib.pyplot as plt"
    ]
   },
   {
@@ -74,7 +74,7 @@
     "means = pd.read_csv(\"data/out_v5/degs_analysis_means_07_27_2023_151846.txt\", sep=\"\\t\")\n",
     "relevant_interactions = pd.read_csv(\"data/out_v5/degs_analysis_relevant_interactions_07_27_2023_151846.txt\", sep=\"\\t\")\n",
     "interaction_scores = pd.read_csv(\"data/out_v5/degs_analysis_interaction_scores_07_27_2023_151846.txt\", sep=\"\\t\")\n",
-    "cellsign = pd.read_csv(\"data/out_v5/degs_analysis_CellSign_active_interactions_07_27_2023_151846.txt\", sep=\"\\t\")\n"
+    "cellsign = pd.read_csv(\"data/out_v5/degs_analysis_CellSign_active_interactions_07_27_2023_151846.txt\", sep=\"\\t\")"
    ]
   },
   {
@@ -86,7 +86,7 @@
     {
      "data": {
       "text/plain": [
-       "<seaborn.matrix.ClusterGrid at 0x157053c10>"
+       "<seaborn.matrix.ClusterGrid at 0x17fe63dd0>"
       ]
      },
      "execution_count": 3,
@@ -105,7 +105,7 @@
     }
    ],
    "source": [
-    "kpy.plot_cpdb_heatmap(pvals=relevant_interactions, degs_analysis=True, figsize=(5, 5), title=\"Sum of significant interactions\")\n"
+    "kpy.plot_cpdb_heatmap(pvals=relevant_interactions, degs_analysis=True, figsize=(5, 5), title=\"Sum of significant interactions\")"
    ]
   },
   {
@@ -217,7 +217,7 @@
     "    standard_scale=True,\n",
     "    interaction_scores=interaction_scores,\n",
     "    scale_alpha_by_interaction_scores=True,\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -398,7 +398,7 @@
     "    degs_analysis=True,\n",
     "    standard_scale=True,\n",
     "    cellsign=cellsign,\n",
-    "    filter_by_cellsign=True\n",
+    "    filter_by_cellsign=True,\n",
     ")"
    ]
   },

diff --git a/ktplotspy/plot/plot_cpdb.py b/ktplotspy/plot/plot_cpdb.py
@@ -34,7 +34,6 @@
     DEFAULT_SPEC_PAT,
     DEFAULT_CELLSIGN_ALPHA,
     DEFAULT_COLUMNS,
-    DEFAULT_CPDB_SEP,
 )
 from ktplotspy.utils.support import (
     ensure_categorical,
@@ -174,21 +173,31 @@ def plot_cpdb(
 
     if special_character_regex_pattern is None:
         special_character_regex_pattern = DEFAULT_SPEC_PAT
-    swapr = True if (cell_type1 == ".") or (cell_type2 == ".") else False
     # prepare data
     metadata = adata.obs.copy()
     means_mat = prep_table(data=means)
     pvals_mat = prep_table(data=pvals)
+    col_start = (
+        DEFAULT_V5_COL_START if pvals_mat.columns[DEFAULT_CLASS_COL] == "classification" else DEFAULT_COL_START
+    )  # in v5, there are 12 columns before the values
+    if pvals_mat.shape != means_mat.shape:
+        tmp_pvals_mat = pd.DataFrame(index=means_mat.index, columns=means_mat.columns)
+        # Copy the values from means_mat to new_df
+        tmp_pvals_mat.iloc[:, :col_start] = means_mat.iloc[:, :col_start]
+        tmp_pvals_mat.update(pvals_mat)
+        if degs_analysis:
+            tmp_pvals_mat.fillna(0, inplace=True)
+        else:
+            tmp_pvals_mat.fillna(1, inplace=True)
+        pvals_mat = tmp_pvals_mat.copy()
+
     if (interaction_scores is not None) & (cellsign is not None):
         raise KeyError("Please specify either interaction scores or cellsign, not both.")
 
     if interaction_scores is not None:
         interaction_scores_mat = prep_table(data=interaction_scores)
     elif cellsign is not None:
         cellsign_mat = prep_table(data=cellsign)
-    col_start = (
-        DEFAULT_V5_COL_START if pvals_mat.columns[DEFAULT_CLASS_COL] == "classification" else DEFAULT_COL_START
-    )  # in v5, there are 12 columns before the values
     if degs_analysis:
         pvals_mat.iloc[:, col_start : pvals_mat.shape[1]] = 1 - pvals_mat.iloc[:, col_start : pvals_mat.shape[1]]
     # front load the dictionary construction here
@@ -259,10 +268,7 @@ def plot_cpdb(
         )
     cell_type = "|".join(celltype)
     # keep cell types
-    if swapr:
-        ct_columns = [ct for ct in means_mat.columns if re.search(ct, cell_type)]
-    else:
-        ct_columns = [ct for ct in means_mat.columns if re.search(cell_type, ct)]
+    ct_columns = [ct for ct in means_mat.columns if re.search(cell_type, ct)]
     # filter
     means_matx = filter_interaction_and_celltype(data=means_mat, genes=query, celltype_pairs=ct_columns)
     pvals_matx = filter_interaction_and_celltype(data=pvals_mat, genes=query, celltype_pairs=ct_columns)
@@ -384,7 +390,8 @@ def plot_cpdb(
         df["is_integrin"] = [is_int[i] for i in df.index]
         df["directionality"] = [direc[i] for i in df.index]
         df["classification"] = [classif[i] for i in df.index]
-
+    if df.shape[0] == 0:
+        raise ValueError("No significant results found.")
     if return_table:
         return df
     else:

diff --git a/ktplotspy/plot/plot_cpdb_chord.py b/ktplotspy/plot/plot_cpdb_chord.py
@@ -6,13 +6,12 @@
 import pandas as pd
 
 from collections import defaultdict
-from itertools import combinations
 from matplotlib.lines import Line2D
 from matplotlib.colors import LinearSegmentedColormap
 from pycircos import Garc, Gcircle
 from typing import Optional, Tuple, Dict, Union
 
-from ktplotspy.utils.settings import DEFAULT_SEP
+from ktplotspy.utils.settings import DEFAULT_SEP  # DEFAULT_PAL
 from ktplotspy.utils.support import celltype_fraction, celltype_means, find_complex, flatten, generate_df, present
 from ktplotspy.plot import plot_cpdb
 
@@ -260,7 +259,11 @@ def plot_cpdb_chord(
                 face_col_dict = dict(zip(list(set(adata.obs[celltype_key])), adata.uns[celltype_key + "_colors"]))
     for i, j in tmpdf.iterrows():
         name = j["producer"]
-        col = None if face_col_dict is None else face_col_dict[name]
+        if face_col_dict is None:
+            col = None
+        else:
+            # col = face_col_dict[name] if name in face_col_dict else next(DEFAULT_PAL) # cycle through the default palette
+            col = face_col_dict[name] if name in face_col_dict else "#e7e7e7"  # or just make them grey?
         arc = Garc(
             arc_id=name,
             size=size,

diff --git a/ktplotspy/utils/settings.py b/ktplotspy/utils/settings.py
@@ -1,16 +1,20 @@
 #!/usr/bin/env python
+# import matplotlib.pyplot as plt
 import pkg_resources
 
+# from itertools import cycle
+
 try:
     __version__ = pkg_resources.get_distribution("ktplotspy").version
 except:  # pragma: no cover
     __version__ = "dev"
 
 DEFAULT_SEP = ">@<"
-DEFAULT_SPEC_PAT = "/|:|\\?|\\*|\\+|\\|\\(|\\)|\\/"
+DEFAULT_SPEC_PAT = "/|:|\\?|\\*|\\+|\\(|\\)|\\/|\\[|\\]"
 DEFAULT_CELLSIGN_ALPHA = 0.5
 DEFAULT_COLUMNS = ["interaction_group", "celltype_group"]
 DEFAULT_V5_COL_START = 13
 DEFAULT_COL_START = 11
 DEFAULT_CLASS_COL = 12
 DEFAULT_CPDB_SEP = "|"
+# DEFAULT_PAL = cycle(plt.cm.tab20.colors)
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "ktplotspy"
-version = "0.2.2"
+version = "0.2.3"
 description = "Python library for plotting Cellphonedb results. Ported from ktplots R package."
 authors = ["Kelvin Tuong <26215587+zktuong@users.noreply.github.com>"]
 license = "MIT"