open2c · Phlya · Oct 27, 2023 · Oct 31, 2023 · Nov 7, 2023 · Nov 7, 2023
diff --git a/cooltools/api/expected.py b/cooltools/api/expected.py
@@ -302,7 +302,9 @@ def make_diag_tables(clr, regions, regions2=None, clr_weight_name="weight"):
             regions2 = bioframe.make_viewframe(
                 regions2, check_bounds=clr.chromsizes
             ).to_numpy()
-    except ValueError:  # If there are non-unique entries in regions1/2, possible only for asymmetric expected:
+    except (
+        ValueError
+    ):  # If there are non-unique entries in regions1/2, possible only for asymmetric expected:
         regions = pd.concat(
             [
                 bioframe.make_viewframe([region], check_bounds=clr.chromsizes)
@@ -1017,25 +1019,43 @@ def expected_cis(
 
     # additional smoothing and aggregating options would add columns only, not replace them
     if smooth:
+        if clr_weight_name is None:
+            # result["count.avg"] = result["count.sum"] / result["n_valid"]
+            cols = {
+                "dist": "dist",
+                "n_pixels": _NUM_VALID,
+                "n_contacts": "count.sum",
+                "contact_freq": "count.avg",
+                "smooth_suffix": ".smoothed",
+            }
+        else:
+            cols = expected_smoothing.DEFAULT_CVD_COLS
+
         result_smooth = expected_smoothing.agg_smooth_cvd(
-            result,
-            sigma_log10=smooth_sigma,
+            result, sigma_log10=smooth_sigma, cols=cols
         )
         # add smoothed columns to the result (only balanced for now)
         result = result.merge(
-            result_smooth[["balanced.avg.smoothed", _DIST]],
+            result_smooth[[cols["contact_freq"] + cols["smooth_suffix"], _DIST]],
             on=[_REGION1, _REGION2, _DIST],
             how="left",
         )
         if aggregate_smoothed:
             result_smooth_agg = expected_smoothing.agg_smooth_cvd(
-                result,
-                groupby=None,
-                sigma_log10=smooth_sigma,
-            ).rename(columns={"balanced.avg.smoothed": "balanced.avg.smoothed.agg"})
+                result, groupby=None, sigma_log10=smooth_sigma, cols=cols
+            ).rename(
+                columns={
+                    cols["contact_freq"]
+                    + cols["smooth_suffix"]: cols["contact_freq"]
+                    + cols["smooth_suffix"]
+                    + ".agg"
+                }
+            )
             # add smoothed columns to the result
             result = result.merge(
-                result_smooth_agg[["balanced.avg.smoothed.agg", _DIST]],
+                result_smooth_agg[
+                    [cols["contact_freq"] + cols["smooth_suffix"] + ".agg", _DIST]
+                ],
                 on=[
                     _DIST,
                 ],

diff --git a/tests/test_expected.py b/tests/test_expected.py
@@ -476,6 +476,54 @@ def test_expected_smooth_cli(request, tmpdir):
             assert _delta_smooth_agg < 0.02
 
 
+def test_expected_smooth_nobalance_cli(request, tmpdir):
+    # CLI compute-expected for chrom-wide cis-data
+    in_cool = op.join(request.fspath.dirname, "data/CN.mm9.1000kb.cool")
+    out_cis_expected = op.join(tmpdir, "cis_unb.exp.tsv")
+    runner = CliRunner()
+    result = runner.invoke(
+        cli,
+        [
+            "expected-cis",
+            "--smooth",
+            "--aggregate-smoothed",
+            "--clr-weight-name",
+            "",
+            "-o",
+            out_cis_expected,
+            in_cool,
+        ],
+    )
+    assert result.exit_code == 0
+    clr = cooler.Cooler(in_cool)
+    cis_expected = pd.read_table(out_cis_expected, sep="\t")
+    grouped = cis_expected.groupby(["region1", "region2"])
+    # full chromosomes in this example:
+    for (chrom1, chrom2), group in grouped:
+        assert chrom1 == chrom2
+        # work only on "large" crhomosomes, skip chrM and such
+        if chrom1 not in ["chrM", "chrY", "chrX"]:
+            # extract dense matrix and get desired expected:
+            matrix = clr.matrix(balance=False).fetch(chrom1)
+            desired_expected = np.where(
+                group["dist"] < ignore_diags,
+                np.nan,  # fill nan for ignored diags
+                _diagsum_symm_dense(matrix),
+            )
+            # do overall tolerance instead of element by element comparison
+            # because of non-matching NaNs and "noiseness" of the non-smoothed
+            # expected
+            _delta_smooth = np.nanmax(
+                np.abs(group["count.avg.smoothed"].to_numpy() - desired_expected)
+            )
+            _delta_smooth_agg = np.nanmax(
+                np.abs(group["count.avg.smoothed.agg"].to_numpy() - desired_expected)
+            )
+            # some made up tolerances, that work for this example
+            assert _delta_smooth < 2000
+            assert _delta_smooth_agg < 4000
+
+
 def test_trans_expected_view_cli(request, tmpdir):
     # CLI compute expected for cis-data with arbitrary view
     # which cannot overlap. But it is symmetrical cis-case.