Skip to content

Commit

Permalink
Merge branch 'CW-3289/graceful-fail' into 'dev'
Browse files Browse the repository at this point in the history
Resolve CW-3289 "/graceful fail"

Closes CW-3289

See merge request epi2melabs/workflows/wf-bacterial-genomes!104
  • Loading branch information
Christopher Alder committed Feb 7, 2024
2 parents ad11e6e + c4f1541 commit 479d958
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 15 deletions.
9 changes: 5 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [unreleased]
### Fixed
- The workflow requesting too little memory for some processes.

## [v1.1.0]
### Added
- Sample results aggregated into `results.json`
- `flye_genome_size` and `flye_asm_coverage` parameters for controlling the initial downsampling step before the de novo assembly
Expand All @@ -17,6 +14,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Some formatting in github issue template.
- Retry and memory bump if de novo assembly fails first time

### Fixed
- Workflow now runs to completion when a sample fails the de novo assembly
- The workflow requesting too little memory for some processes.

## [v1.0.2]
### Added
- FASTA now includes basecaller model in headers
Expand Down
4 changes: 2 additions & 2 deletions bin/workflow_glue/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ def create_report(args):
if samples_flye_failed:
html_raw(
f"""
<b>Info:</b> Due to too low coverage, Flye failed to produce an
<b>Info:</b> Flye failed to produce an
assembly for the following samples:
<b>{", ".join(samples_flye_failed)}</b>.<br>
Expand All @@ -316,7 +316,7 @@ def create_report(args):
# not a single sample produced a valid assembly
html_raw(
"""
<b>Warning:</b> Due to too low coverage, Flye failed to produce an
<b>Warning:</b> Flye failed to produce an
assembly for any of the samples. There are therefore no more results
to report.
"""
Expand Down
18 changes: 10 additions & 8 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ OPTIONAL_FILE = file("$projectDir/data/OPTIONAL_FILE")
FLYE_MIN_COVERAGE_THRESHOLD = 5



process readStats {
label "wfbacterialgenomes"
cpus 1
Expand Down Expand Up @@ -74,15 +73,15 @@ process deNovo {
path("${meta.alias}.draft_assembly.fasta.gz"),
path("${meta.alias}_flye_stats.tsv"),
optional: true, emit: asm
tuple val(meta), env(LOW_COV_FAIL), emit: failed
tuple val(meta), env(COV_FAIL), emit: failed
script:
// flye may fail due to low coverage; in this case we don't want to cause the whole
// workflow to crash --> exit with `0` and don't emit output files
def flye_opts = params.flye_opts ?: ""
def genome_size = params.flye_genome_size ? "--genome-size " + params.flye_genome_size : ""
def asm_coverage = params.flye_asm_coverage ? "--asm-coverage " + params.flye_asm_coverage : ""
"""
LOW_COV_FAIL=0
COV_FAIL=0
FLYE_EXIT_CODE=0
flye $flye_opts $genome_size $asm_coverage --nano-hq reads.fastq.gz --out-dir output --threads "${task.cpus}" || \
FLYE_EXIT_CODE=\$?
Expand All @@ -92,7 +91,7 @@ process deNovo {
mv output/assembly_info.txt "./${meta.alias}_flye_stats.tsv"
bgzip "${meta.alias}.draft_assembly.fasta"
else
# flye failed --> check the log to see if low coverage caused the failure
# flye failed --> check the log to check why
edge_cov=\$(
grep -oP 'Mean edge coverage: \\K\\d+' output/flye.log \
|| echo $FLYE_MIN_COVERAGE_THRESHOLD
Expand All @@ -107,7 +106,10 @@ process deNovo {
]]; then
echo -n "Caught Flye failure due to low coverage (either mean edge cov. or "
echo "overlap-based cov. were below $FLYE_MIN_COVERAGE_THRESHOLD)".
LOW_COV_FAIL=1
COV_FAIL=1
elif grep -q "No disjointigs were assembled" output/flye.log; then
echo -n "Caught Flye failure due to disjointig assembly."
COV_FAIL=2
else
# exit a subshell with error so that the process fails
( exit \$FLYE_EXIT_CODE )
Expand Down Expand Up @@ -564,8 +566,7 @@ workflow calling_pipeline {
input_reads.reads | map { meta, reads -> [ meta, "complete" ] }
| mix (input_reads.no_reads | map { meta, reads -> [ meta, "not-met" ] } )
)



sample_ids = reads.map { meta, reads, stats -> meta.alias }
metadata = reads.map { meta, reads, stats -> meta } | toList()
definitions = projectDir.resolve("./output_definition.json").toString()
Expand All @@ -580,14 +581,15 @@ workflow calling_pipeline {
deNovo.out.failed.map { meta, failed ->
if (failed == "1") {
log.warn "Flye failed for sample '$meta.alias' due to low coverage."
} else if (failed == "2"){
log.warn "Flye failed for sample '$meta.alias' as no disjointigs were assembled."
}
}

// Creat channel of failed samples for checkpoints "not-met"
failed_samples = input_reads.no_reads.mix(
deNovo.out.failed | filter { meta, failed -> failed != "0"}
) | map { meta, field -> [ meta, "not-met" ] }
println(failed_samples.view())
named_refs = deNovo.out.asm.map { meta, asm, stats -> [meta, asm] }
// Nextflow might be run in strict mode (e.g. in CI) which prevents `join`
// from dropping non-matching entries. We have to use `remainder: true` and
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ manifest {
description = 'Assembly, variant calling, and annotation of bacterial genomes.'
mainScript = 'main.nf'
nextflowVersion = '>=23.04.2'
version = 'v1.0.2'
version = 'v1.1.0'
}

epi2melabs {
Expand Down

0 comments on commit 479d958

Please sign in to comment.