fix: plotting with no model in nonpareil (#2536)

Fix plotting with no model and added more tests. ### QC  * [x] I confirm that: For all wrappers added by this PR, * there is a test case which covers any introduced changes, * `input:` and `output:` file paths in the resulting rule can be changed arbitrarily, * either the wrapper can only use a single core, or the example rule contains a `threads: x` statement with `x` being a reasonable default, * rule names in the test case are in [snake_case](https://en.wikipedia.org/wiki/Snake_case) and somehow tell what the rule is about or match the tools purpose or name (e.g., `map_reads` for a step that maps reads), * all `environment.yaml` specifications follow [the respective best practices](https://stackoverflow.com/a/64594513/2352071), * the `environment.yaml` pinning has been updated by running `snakedeploy pin-conda-envs environment.yaml` on a linux machine, * wherever possible, command line arguments are inferred and set automatically (e.g. based on file extensions in `input:` or `output:`), * all fields of the example rules in the `Snakefile`s and their entries are explained via comments (`input:`/`output:`/`params:` etc.), * `stderr` and/or `stdout` are logged correctly (`log:`), depending on the wrapped tool, * temporary files are either written to a unique hidden folder in the working directory, or (better) stored where the Python function `tempfile.gettempdir()` points to (see [here](https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir); this also means that using any Python `tempfile` default behavior works), * the `meta.yaml` contains a link to the documentation of the respective tool or command, * `Snakefile`s pass the linting (`snakemake --lint`), * `Snakefile`s are formatted with [snakefmt](https://github.com/snakemake/snakefmt), * Python wrapper scripts are formatted with [black](https://black.readthedocs.io). * Conda environments use a minimal amount of channels, in recommended ordering. E.g. for bioconda, use (conda-forge, bioconda, nodefaults, as conda-forge should have highest priority and defaults channels are usually not needed because most packages are in conda-forge nowadays).
snakemake · Jan 19, 2024 · 1ac6350 · 1ac6350
1 parent 4494a3b
commit 1ac6350
Show file tree

Hide file tree

Showing 4 changed files with 65 additions and 33 deletions.
diff --git a/bio/nonpareil/plot/test/Snakefile b/bio/nonpareil/plot/test/Snakefile
@@ -26,15 +26,15 @@ rule test_nonpareil_plot:
 
 use rule test_nonpareil_plot as test_nonpareil_plot_multiple with:
     input:
-        npo=["a.npo", "b.npo"],
+        npo=["a.npo", "b.npo", "c.npo"],
     output:
         pdf="results/samples.pdf",
         model="results/samples.RData",
         json="results/samples.json",
     log:
         "logs/samples.log",
     params:
-        labels=["Model A","Model B"],
+        labels=["Model A","Model B", "Model C"],
         col=["blue","red"],
         enforce_consistency=True,
         star=95,

diff --git a/bio/nonpareil/plot/test/c.npo b/bio/nonpareil/plot/test/c.npo
@@ -0,0 +1,34 @@
+# @impl: Nonpareil
+# @ksize: 24
+# @version: 3.40
+# @L: 55.738
+# @AL: 32.738
+# @R: 8554
+# @overlap: 50.00
+# @divide: 0.70
+0.000000	0.00000	0.00000	0.00000	0.00000	0.00000
+0.000192	0.00000	0.00000	0.00000	0.00000	0.00000
+0.000274	0.00000	0.00000	0.00000	0.00000	0.00000
+0.000391	0.00000	0.00000	0.00000	0.00000	0.00000
+0.000559	0.00000	0.00000	0.00000	0.00000	0.00000
+0.000798	0.00000	0.00000	0.00000	0.00000	0.00000
+0.001140	0.00000	0.00000	0.00000	0.00000	0.00000
+0.001628	0.00000	0.00000	0.00000	0.00000	0.00000
+0.002326	0.00000	0.00000	0.00000	0.00000	0.00000
+0.003323	0.00020	0.00625	0.00000	0.00000	0.00000
+0.004748	0.00037	0.00873	0.00000	0.00000	0.00000
+0.006782	0.00020	0.00625	0.00000	0.00000	0.00000
+0.009689	0.00039	0.00719	0.00000	0.00000	0.00000
+0.013841	0.00141	0.01088	0.00000	0.00000	0.00000
+0.019773	0.00205	0.01122	0.00000	0.00000	0.00000
+0.028248	0.00302	0.01141	0.00000	0.00000	0.00000
+0.040354	0.00286	0.00933	0.00000	0.00000	0.00000
+0.057648	0.00374	0.00865	0.00000	0.00000	0.00000
+0.082354	0.00408	0.00773	0.00000	0.00000	0.00000
+0.117649	0.00500	0.00689	0.00000	0.00000	0.01010
+0.168070	0.00609	0.00612	0.00000	0.00641	0.00794
+0.240100	0.00683	0.00540	0.00457	0.00521	0.01015
+0.343000	0.00782	0.00458	0.00362	0.00683	0.01064
+0.490000	0.00912	0.00371	0.00716	0.00957	0.01193
+0.700000	0.01023	0.00280	0.00847	0.01024	0.01209
+1.000000	0.01154	0.00180	0.01080	0.01200	0.01321
diff --git a/bio/nonpareil/plot/wrapper.R b/bio/nonpareil/plot/wrapper.R
@@ -22,9 +22,6 @@ in_files = snakemake@input[["npo"]]
 out_pdf = snakemake@output[[1]]
 base::message("Input files: ")
 base::print(in_files)
-base::message("Saving plot to file: ")
-base::print(out_pdf)
-
 
 # Set parameters
 params <- list("in_files" = in_files,
@@ -65,15 +62,15 @@ utils::str(params)
 ##################
 pdf(out_pdf)
 curves <- Nonpareil.set(in_files,
-                                labels = params[["labels"]],
-                                col = params[["col"]],
-                                enforce.consistency = params[["enforce_consistency"]],
-                                star = params[["star"]],
-                                correction.factor = params[["correction_factor"]],
-                                weights.exp = params[["weights_exp"]],
-                                skip.model = params[["skip_model"]],
-                                plot = FALSE
-                                )
+                        labels = params[["labels"]],
+                        col = params[["col"]],
+                        enforce.consistency = params[["enforce_consistency"]],
+                        star = params[["star"]],
+                        correction.factor = params[["correction_factor"]],
+                        weights.exp = params[["weights_exp"]],
+                        skip.model = params[["skip_model"]],
+                        plot = FALSE
+                       )
 
 lapply(curves$np.curves, plot,
        col = params[["col"]],
@@ -93,16 +90,6 @@ base::message("Nonpareil plot saved")
 stats <- summary(curves)
 # Fix names
 colnames(stats) <- c("Redundancy", "Avg. coverage", "Seq. effort", "Model correlation", "Required seq. effort", "Diversity")
-
-# If model not infered, set its values to NA
-stats[,4] <- sapply(stats[,4], function(x){if(length(x) == 0){NA} else {x}})
-stats[,5] <- sapply(stats[,5], function(x){if(length(x) == 0){NA} else {x}})
-
-# Convert to Gb
-stats[,3] <- stats[,3] / 1e9
-stats[,5] <- stats[,5] / 1e9
-# Round
-stats <- round(stats, digits = 2)
 # Print stats to log
 base::print(stats)
 
@@ -133,14 +120,21 @@ if ("json" %in% base::names(snakemake@output)) {
     n <- names(attributes(object))[13:20]
     y <- lapply(n, function(v) attr(object,v))
     names(y) <- n
-    # Add model
-    # https://github.com/lmrodriguezr/nonpareil/blob/162f1697ab1a21128e1857dd87fa93011e30c1ba/utils/Nonpareil/R/Nonpareil.R#L330-L332
-    x_min <- 1e3
-    x_max <- signif(tail(attr(curves$np.curves[[1]],"x.adj"), n=1)*10, 1)
-    x.model <- exp(seq(log(x_min), log(x_max), length.out=1e3))
-    y.model <- predict(object, lr=x.model)
+    curve_json <- c(x, y)
 
-    c(x, y, list(x.model=x.model), list(y.model=y.model))
+    # Add model
+    if (object$has.model) {
+      # https://github.com/lmrodriguezr/nonpareil/blob/162f1697ab1a21128e1857dd87fa93011e30c1ba/utils/Nonpareil/R/Nonpareil.R#L330-L332
+      x_min <- 1e3
+      x_max <- signif(tail(attr(object,"x.adj"), n=1)*10, 1)
+      x.model <- exp(seq(log(x_min), log(x_max), length.out=1e3))
+      y.model <- predict(object, lr=x.model)
+      curve_json <- append(curve_json, list(x.model=x.model))
+      curve_json <- append(curve_json, list(y.model=y.model))
+    }
+
+    base::print(curve_json)
+    curve_json
   }
 
   export_set <- function(object){

diff --git a/test.py b/test.py
@@ -188,8 +188,12 @@ def test_nonpareil_plot():
             "--use-conda",
             "-F",
             "results/a.pdf",
-            # Test disabled due to bug in nonpareil (#62)
-            #            "results/a.nomodel.pdf",
+            "results/b.pdf",
+            "results/c.pdf",
+            "results/a.nomodel.pdf",
+            "results/b.nomodel.pdf",
+            "results/c.nomodel.pdf",
+            "results/samples.pdf",
         ],
     )