Skip to content

Commit

Permalink
fix: plotting with no model in nonpareil (#2536)
Browse files Browse the repository at this point in the history
<!-- Ensure that the PR title follows conventional commit style (<type>:
<description>)-->
<!-- Possible types are here:
https://github.com/commitizen/conventional-commit-types/blob/master/index.json
-->

<!-- Add a description of your PR here-->
Fix plotting with no model and added more tests.

### QC
<!-- Make sure that you can tick the boxes below. -->

* [x] I confirm that:

For all wrappers added by this PR, 

* there is a test case which covers any introduced changes,
* `input:` and `output:` file paths in the resulting rule can be changed
arbitrarily,
* either the wrapper can only use a single core, or the example rule
contains a `threads: x` statement with `x` being a reasonable default,
* rule names in the test case are in
[snake_case](https://en.wikipedia.org/wiki/Snake_case) and somehow tell
what the rule is about or match the tools purpose or name (e.g.,
`map_reads` for a step that maps reads),
* all `environment.yaml` specifications follow [the respective best
practices](https://stackoverflow.com/a/64594513/2352071),
* the `environment.yaml` pinning has been updated by running
`snakedeploy pin-conda-envs environment.yaml` on a linux machine,
* wherever possible, command line arguments are inferred and set
automatically (e.g. based on file extensions in `input:` or `output:`),
* all fields of the example rules in the `Snakefile`s and their entries
are explained via comments (`input:`/`output:`/`params:` etc.),
* `stderr` and/or `stdout` are logged correctly (`log:`), depending on
the wrapped tool,
* temporary files are either written to a unique hidden folder in the
working directory, or (better) stored where the Python function
`tempfile.gettempdir()` points to (see
[here](https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir);
this also means that using any Python `tempfile` default behavior
works),
* the `meta.yaml` contains a link to the documentation of the respective
tool or command,
* `Snakefile`s pass the linting (`snakemake --lint`),
* `Snakefile`s are formatted with
[snakefmt](https://github.com/snakemake/snakefmt),
* Python wrapper scripts are formatted with
[black](https://black.readthedocs.io).
* Conda environments use a minimal amount of channels, in recommended
ordering. E.g. for bioconda, use (conda-forge, bioconda, nodefaults, as
conda-forge should have highest priority and defaults channels are
usually not needed because most packages are in conda-forge nowadays).
  • Loading branch information
fgvieira committed Jan 19, 2024
1 parent 4494a3b commit 1ac6350
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 33 deletions.
4 changes: 2 additions & 2 deletions bio/nonpareil/plot/test/Snakefile
Expand Up @@ -26,15 +26,15 @@ rule test_nonpareil_plot:

use rule test_nonpareil_plot as test_nonpareil_plot_multiple with:
input:
npo=["a.npo", "b.npo"],
npo=["a.npo", "b.npo", "c.npo"],
output:
pdf="results/samples.pdf",
model="results/samples.RData",
json="results/samples.json",
log:
"logs/samples.log",
params:
labels=["Model A","Model B"],
labels=["Model A","Model B", "Model C"],
col=["blue","red"],
enforce_consistency=True,
star=95,
Expand Down
34 changes: 34 additions & 0 deletions bio/nonpareil/plot/test/c.npo
@@ -0,0 +1,34 @@
# @impl: Nonpareil
# @ksize: 24
# @version: 3.40
# @L: 55.738
# @AL: 32.738
# @R: 8554
# @overlap: 50.00
# @divide: 0.70
0.000000 0.00000 0.00000 0.00000 0.00000 0.00000
0.000192 0.00000 0.00000 0.00000 0.00000 0.00000
0.000274 0.00000 0.00000 0.00000 0.00000 0.00000
0.000391 0.00000 0.00000 0.00000 0.00000 0.00000
0.000559 0.00000 0.00000 0.00000 0.00000 0.00000
0.000798 0.00000 0.00000 0.00000 0.00000 0.00000
0.001140 0.00000 0.00000 0.00000 0.00000 0.00000
0.001628 0.00000 0.00000 0.00000 0.00000 0.00000
0.002326 0.00000 0.00000 0.00000 0.00000 0.00000
0.003323 0.00020 0.00625 0.00000 0.00000 0.00000
0.004748 0.00037 0.00873 0.00000 0.00000 0.00000
0.006782 0.00020 0.00625 0.00000 0.00000 0.00000
0.009689 0.00039 0.00719 0.00000 0.00000 0.00000
0.013841 0.00141 0.01088 0.00000 0.00000 0.00000
0.019773 0.00205 0.01122 0.00000 0.00000 0.00000
0.028248 0.00302 0.01141 0.00000 0.00000 0.00000
0.040354 0.00286 0.00933 0.00000 0.00000 0.00000
0.057648 0.00374 0.00865 0.00000 0.00000 0.00000
0.082354 0.00408 0.00773 0.00000 0.00000 0.00000
0.117649 0.00500 0.00689 0.00000 0.00000 0.01010
0.168070 0.00609 0.00612 0.00000 0.00641 0.00794
0.240100 0.00683 0.00540 0.00457 0.00521 0.01015
0.343000 0.00782 0.00458 0.00362 0.00683 0.01064
0.490000 0.00912 0.00371 0.00716 0.00957 0.01193
0.700000 0.01023 0.00280 0.00847 0.01024 0.01209
1.000000 0.01154 0.00180 0.01080 0.01200 0.01321
52 changes: 23 additions & 29 deletions bio/nonpareil/plot/wrapper.R
Expand Up @@ -22,9 +22,6 @@ in_files = snakemake@input[["npo"]]
out_pdf = snakemake@output[[1]]
base::message("Input files: ")
base::print(in_files)
base::message("Saving plot to file: ")
base::print(out_pdf)


# Set parameters
params <- list("in_files" = in_files,
Expand Down Expand Up @@ -65,15 +62,15 @@ utils::str(params)
##################
pdf(out_pdf)
curves <- Nonpareil.set(in_files,
labels = params[["labels"]],
col = params[["col"]],
enforce.consistency = params[["enforce_consistency"]],
star = params[["star"]],
correction.factor = params[["correction_factor"]],
weights.exp = params[["weights_exp"]],
skip.model = params[["skip_model"]],
plot = FALSE
)
labels = params[["labels"]],
col = params[["col"]],
enforce.consistency = params[["enforce_consistency"]],
star = params[["star"]],
correction.factor = params[["correction_factor"]],
weights.exp = params[["weights_exp"]],
skip.model = params[["skip_model"]],
plot = FALSE
)

lapply(curves$np.curves, plot,
col = params[["col"]],
Expand All @@ -93,16 +90,6 @@ base::message("Nonpareil plot saved")
stats <- summary(curves)
# Fix names
colnames(stats) <- c("Redundancy", "Avg. coverage", "Seq. effort", "Model correlation", "Required seq. effort", "Diversity")

# If model not infered, set its values to NA
stats[,4] <- sapply(stats[,4], function(x){if(length(x) == 0){NA} else {x}})
stats[,5] <- sapply(stats[,5], function(x){if(length(x) == 0){NA} else {x}})

# Convert to Gb
stats[,3] <- stats[,3] / 1e9
stats[,5] <- stats[,5] / 1e9
# Round
stats <- round(stats, digits = 2)
# Print stats to log
base::print(stats)

Expand Down Expand Up @@ -133,14 +120,21 @@ if ("json" %in% base::names(snakemake@output)) {
n <- names(attributes(object))[13:20]
y <- lapply(n, function(v) attr(object,v))
names(y) <- n
# Add model
# https://github.com/lmrodriguezr/nonpareil/blob/162f1697ab1a21128e1857dd87fa93011e30c1ba/utils/Nonpareil/R/Nonpareil.R#L330-L332
x_min <- 1e3
x_max <- signif(tail(attr(curves$np.curves[[1]],"x.adj"), n=1)*10, 1)
x.model <- exp(seq(log(x_min), log(x_max), length.out=1e3))
y.model <- predict(object, lr=x.model)
curve_json <- c(x, y)

c(x, y, list(x.model=x.model), list(y.model=y.model))
# Add model
if (object$has.model) {
# https://github.com/lmrodriguezr/nonpareil/blob/162f1697ab1a21128e1857dd87fa93011e30c1ba/utils/Nonpareil/R/Nonpareil.R#L330-L332
x_min <- 1e3
x_max <- signif(tail(attr(object,"x.adj"), n=1)*10, 1)
x.model <- exp(seq(log(x_min), log(x_max), length.out=1e3))
y.model <- predict(object, lr=x.model)
curve_json <- append(curve_json, list(x.model=x.model))
curve_json <- append(curve_json, list(y.model=y.model))
}

base::print(curve_json)
curve_json
}

export_set <- function(object){
Expand Down
8 changes: 6 additions & 2 deletions test.py
Expand Up @@ -188,8 +188,12 @@ def test_nonpareil_plot():
"--use-conda",
"-F",
"results/a.pdf",
# Test disabled due to bug in nonpareil (#62)
# "results/a.nomodel.pdf",
"results/b.pdf",
"results/c.pdf",
"results/a.nomodel.pdf",
"results/b.nomodel.pdf",
"results/c.nomodel.pdf",
"results/samples.pdf",
],
)

Expand Down

0 comments on commit 1ac6350

Please sign in to comment.