Skip to content

Commit

Permalink
scrubber runs with kraken2 pangenome and srahumanscrubber, amrfinder …
Browse files Browse the repository at this point in the history
…runs in combined mode now
  • Loading branch information
rpetit3 committed Apr 4, 2024
1 parent 3514c94 commit a7e2c15
Show file tree
Hide file tree
Showing 27 changed files with 149 additions and 298 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Expand Up @@ -8,12 +8,20 @@ description: A full list of Bactopia releases and a description of the changes.

### `Added`

- Named Workflows
- `clean-yer-reads` - Use Bactopia's read QC steps to Clean-Yer-Reads
- Use `clean-yer-reads`, `cyr` or `bactopia --wf cleanyerreads`
- `teton` - Host removal and taxon classification with estimated abundances
- Use `teton` or `bactopia --wf teton`
- Bactopia Tools (`bactopia --wf <NAME>`)
- `defensefinder` - Systematic search of all known anti-phage systems
- `scrubber` - Remove human reads from FASTQ files
- screen human reads with `kraken2` (_against human pangenome_) or `sra-human-scrubber`
- full support of config files from nf-core/configs
- no longer prints efficiency for `standard`, `docker`, and `singularity` profiles
- now required non-integer values for `--max_time` (e.g. `4.h`) and `--max_memory` (e.g. `8.GB`)
- always import `base.config`
- `amrfinderplus` by combining results from genes, proteins and gff (coordinates)

### `Fixed`

Expand Down
6 changes: 1 addition & 5 deletions conf/params/clean-yer-reads.config
Expand Up @@ -14,14 +14,10 @@ params {
enable_fastq_check = false
skip_fastq_check = params.enable_fastq_check ? false : true

// Scrubber
enable_scrubber = false
skip_scrubber = params.enable_scrubber ? false : true

// QC Reads
enable_error_correction = false
skip_error_correction = params.enable_fastq_check ? false : true

// Params to ignore
schema_ignore_params = "${params.schema_ignore_params},use_bakta,skip_scrubber,skip_fastq_check,skip_error_correction,ask_merlin,species,datasets,reassemble,assembly,short_polish,hybrid,available_datasets"
schema_ignore_params = "${params.schema_ignore_params},kraken2_db,use_bakta,skip_scrubber,skip_fastq_check,skip_error_correction,ask_merlin,species,datasets,reassemble,assembly,short_polish,hybrid,available_datasets"
}
6 changes: 3 additions & 3 deletions conf/workflows.config
Expand Up @@ -64,7 +64,7 @@ params {
description = "Host removal and taxon classification with estimated abundances"
includes = ["gather", "scrubber", "bracken"]
is_workflow = true
modules = ["gather", "kraken2", "bracken"]
modules = ["gather", "kraken2", "bracken", "srahumanscrubber_scrub"]
}

'updater' {
Expand Down Expand Up @@ -128,7 +128,7 @@ params {
'scrubber' {
description = "Scrub human reads from FASTQ files"
ext = "fastq"
modules = ["kraken2"]
modules = ["kraken2", "srahumanscrubber_scrub"]
}
'snippy' {
description = "Rapid variant calling from Illumina sequence reads with optional core-SNP phylogeny"
Expand Down Expand Up @@ -179,7 +179,7 @@ params {
}
'amrfinderplus' {
description = "Identify antimicrobial resistance in genes or proteins"
ext = "fna_faa"
ext = "fna_faa_gff"
path = "modules/nf-core/amrfinderplus/run"
}
'ariba_getref' {
Expand Down
2 changes: 1 addition & 1 deletion lib/NfcoreSchema.groovy
Expand Up @@ -275,7 +275,7 @@ class NfcoreSchema {
}

if (param_required.length() > 0) {
required_output += "\n ### Workflow Specific\n"
required_output += '\n ' + colors.underlined + colors.bold + "Workflow Specific" + colors.reset + '\n'
required_output += param_required
}

Expand Down
28 changes: 10 additions & 18 deletions lib/WorkflowBactopia.groovy
Expand Up @@ -131,29 +131,21 @@ class WorkflowBactopia {
error += 1
}
}
}

// Check for existing output directory
/*
if (Utils.isLocal(params.outdir)) {
// Only run this if local files
if (!workflow.resume) {
def Integer files_found = 0
new File("${params.outdir}/bactopia-comparative/${params.wf}/${params.run_name}").eachDirRecurse { item ->
if (item.toString().contains("nf-reports")) {
return
} else if (params.wf == "teton") {
// Needed for Teton workflow
if (params.kraken2_db) {
if (Utils.isLocal(params.kraken2_db)) {
if (params.kraken2_db.endsWith(".tar.gz")) {
error += Utils.fileNotFound(params.kraken2_db, 'kraken2_db', log)
} else {
files_found += 1
error += Utils.fileNotFound("${params.kraken2_db}/hash.k2d", 'kraken2_db', log)
}
}
if (files_found > 0 && !params.force) {
log.error("Output for ${params.run_name} (--run_name) already exists in ${params.outdir} (--outdir), ${params.wf} will not continue unless '--force' is used, a different run name (--run_name), or a different output directory (--outdir) is used.")
error += 1
}
} else {
log.error "Teton workflow requires '--kraken2_db' to be provided"
error += 1
}
}
*/

if (error > 0) {
log.error("ERROR: Validation of pipeline parameters failed!\nPlease correct to continue")
Expand Down
15 changes: 15 additions & 0 deletions lib/nf/bactopia_tools.nf
Expand Up @@ -170,6 +170,21 @@ def _collect_inputs(sample, dir, extension) {
return tuple([id:sample, single_end:false, is_compressed:false, runtype:'illumina'], [file("${fna}")], [file(pe1), file(pe2)])
}
}
} else if (extension == 'fna_faa_gff') {
// Default to Bakta faa
faa = "${base_dir}/${PATHS['faa']}/bakta/${sample}.faa"
gff = "${base_dir}/${PATHS['faa']}/bakta/${sample}.gff3"
if (!file("${faa}").exists() && !file("${faa}.gz").exists()) {
// Fall back on Prokka
faa = "${base_dir}/${PATHS['faa']}/prokka/${sample}.faa"
gff = "${base_dir}/${PATHS['faa']}/bakta/${sample}.gff"
}

if (file("${fna}.gz").exists() && file("${faa}.gz").exists() && file("${gff}.gz").exists()) {
return tuple([id:sample, is_compressed:true], [file("${fna}.gz")], [file("${faa}.gz")], [file("${gff}.gz")])
} else if (file(fna).exists() && file(faa).exists() && file(gff).exists() {
return tuple([id:sample, is_compressed:false], [file("${fna}")], [file("${faa}")], [file("${gff}")])
}
} else if (extension == 'fna_faa') {
// Default to Bakta faa
faa = "${base_dir}/${PATHS['faa']}/bakta/${sample}.faa"
Expand Down
2 changes: 1 addition & 1 deletion lib/nf/functions.nf
Expand Up @@ -98,7 +98,7 @@ def _get_module_schemas(modules) {
modules.each { it ->
if (params.wf == "cleanyerreads") {
module_schemas << "${params.workflows[it].path}/params-${params.wf}.json"
} else if (params.wf == "teton" && (it == "gather" || it == "srahumanscrubber_initdb" || it == "kraken2")) {
} else if (params.wf == "teton" && (it == "gather" || it == "srahumanscrubber_scrub" || it == "kraken2")) {
module_schemas << "${params.workflows[it].path}/params-${params.wf}.json"
} else {
module_schemas << "${params.workflows[it].path}/params.json"
Expand Down
3 changes: 2 additions & 1 deletion modules/local/bactopia/qc/main.nf
@@ -1,7 +1,7 @@
// Import generic module functions
include { initOptions; saveFiles } from '../../../../lib/nf/functions'
options = initOptions(params.options ? params.options : [:], 'qc')
options.ignore = ['.fna.gz']
options.ignore = ['.fna.gz', "EMPTY_EXTRA"]
options.btype = options.btype ?: "main"
conda_tools = "bioconda::bactopia-qc=1.0.2"
conda_name = conda_tools.replace("=", "-").replace(":", "-").replace(" ", "-")
Expand All @@ -23,6 +23,7 @@ process QC {

output:
tuple val(meta), path("results/${prefix}*.fastq.gz"), path("extra/*"), emit: fastq, optional: true
tuple val(meta), path("results/${prefix}*.fastq.gz") , emit: fastq_only, optional: true
path "results/*"
path "*.{log,err}" , emit: logs, optional: true
path ".command.*" , emit: nf_logs
Expand Down
50 changes: 27 additions & 23 deletions modules/nf-core/amrfinderplus/run/main.nf
Expand Up @@ -16,25 +16,26 @@ process AMRFINDERPLUS_RUN {
'quay.io/biocontainers/ncbi-amrfinderplus:3.12.8--h283d18e_0' }"

input:
tuple val(meta), path(genes), path(proteins)
tuple val(meta), path(genes), path(proteins), path(gff)
each path(db)

output:
tuple val(meta), path("${prefix}-genes.tsv") , emit: gene_report
tuple val(meta), path("${prefix}-proteins.tsv") , emit: protein_report
tuple val(meta), path("${prefix}-{genes,proteins}-mutations.tsv"), emit: mutation_reports, optional: true
path "*.{log,err}" , emit: logs, optional: true
path ".command.*" , emit: nf_logs
path "versions.yml" , emit: versions
tuple val(meta), path("${prefix}.tsv") , emit: report
tuple val(meta), path("${prefix}-mutations.tsv"), emit: mutation_report, optional: true
path "*.{log,err}" , emit: logs, optional: true
path ".command.*" , emit: nf_logs
path "versions.yml" , emit: versions

script:
def fna_is_compressed = genes.getName().endsWith(".gz") ? true : false
def faa_is_compressed = proteins.getName().endsWith(".gz") ? true : false
def gff_is_compressed = gff.getName().endsWith(".gz") ? true : false
prefix = options.suffix ? "${options.suffix}" : "${meta.id}"
fna_organism_param = meta.containsKey("organism") ? "--organism ${meta.organism} --mutation_all ${prefix}-genes-mutations.tsv" : ""
faa_organism_param = meta.containsKey("organism") ? "--organism ${meta.organism} --mutation_all ${prefix}-proteins-mutations.tsv" : ""
organism_param = meta.containsKey("organism") ? "--organism ${meta.organism} --mutation_all ${prefix}-mutations.tsv" : ""
fna_name = genes.getName().replace(".gz", "")
faa_name = proteins.getName().replace(".gz", "")
gff_name = gff.getName().replace(".gz", "")
annotation_format = gff_name.endsWith(".gff") ? "prokka" : "bakta"
"""
if [ "$fna_is_compressed" == "true" ]; then
gzip -c -d $genes > $fna_name
Expand All @@ -44,30 +45,33 @@ process AMRFINDERPLUS_RUN {
gzip -c -d $proteins > $faa_name
fi
tar xzvf $db
if [ "$gff_is_compressed" == "true" ]; then
gzip -c -d $gff > $gff_name
fi
# Gene
amrfinder \\
-n $fna_name \\
$fna_organism_param \\
$options.args \\
--database amrfinderplus/ \\
--threads $task.cpus \\
--name $prefix > ${prefix}-genes.tsv
# Extract database
tar xzf $db
# Protein
# Full AMRFinderPlus search combining results
amrfinder \\
-p $faa_name \\
$faa_organism_param \\
--nucleotide $fna_name \\
--protein $faa_name \\
--gff $gff_name \\
--annotation_format $annotation_format \\
$organism_param \\
$options.args \\
--database amrfinderplus/ \\
--threads $task.cpus \\
--name $prefix > ${prefix}-proteins.tsv
--name $prefix > ${prefix}.tsv
# Clean up
DB_VERSION=\$(echo \$(echo \$(amrfinder --database amrfinderplus --database_version 2> stdout) | rev | cut -f 1 -d ' ' | rev))
rm -rf amrfinderplus/
cat <<-END_VERSIONS > versions.yml
"${task.process}":
amrfinderplus: \$(amrfinder --version)
amrfinderplus-database: \$(echo \$(echo \$(amrfinder --database amrfinderplus --database_version 2> stdout) | rev | cut -f 1 -d ' ' | rev))
amrfinderplus-database: \$DB_VERSION
END_VERSIONS
"""
}
2 changes: 1 addition & 1 deletion modules/nf-core/bakta/run/main.nf
Expand Up @@ -23,7 +23,7 @@ process BAKTA_RUN {
path replicons

output:
tuple val(meta), path("results/${prefix}.{ffn,ffn.gz}"), path("results/${prefix}.{faa,faa.gz}"), emit: annotations
tuple val(meta), path("results/${prefix}.{fna,fna.gz}"), path("results/${prefix}.{faa,faa.gz}"), path("results/${prefix}.{gff3,gff3.gz}"), emit: annotations
tuple val(meta), path("results/${prefix}.{embl,embl.gz}") , emit: embl
tuple val(meta), path("results/${prefix}.{faa,faa.gz}") , emit: faa
tuple val(meta), path("results/${prefix}.{ffn,ffn.gz}") , emit: ffn
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/bracken/main.nf
Expand Up @@ -8,7 +8,7 @@ conda_env = file("${params.condadir}/${conda_name}").exists() ? "${params.co

process BRACKEN {
tag "$meta.id"
label 'process_high'
label 'process_medium'

conda (params.enable_conda ? conda_env : null)
container "${ workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container ?
Expand Down
11 changes: 9 additions & 2 deletions modules/nf-core/kraken2/main.nf
Expand Up @@ -24,16 +24,20 @@ process KRAKEN2 {
tuple val(meta), path('*.scrub.report.tsv') , emit: scrub_report, optional: true
tuple val(meta), path("*.${classified_naming}*.fastq.gz") , emit: classified, optional: true
tuple val(meta), path("*.${unclassified_naming}*.fastq.gz"), emit: unclassified, optional: true
tuple val(meta), path("*.${classified_naming}*.fastq.gz") , path("EMPTY_EXTRA"), emit: classified_extra, optional: true
tuple val(meta), path("*.${unclassified_naming}*.fastq.gz"), path("EMPTY_EXTRA"), emit: unclassified_extra, optional: true
path "*.{log,err}" , emit: logs, optional: true
path ".command.*" , emit: nf_logs
path "versions.yml", emit: versions

script:
prefix = options.suffix ? "${options.suffix}" : "${meta.id}"
meta.single_end = reads[1] == null ? true : false
meta.is_paired = reads[1] == null ? false : true
def paired = meta.single_end ? "" : "--paired"
classified_naming = params.wf == "teton" || params.wf == "scrubber" ? "host" : "classified"
classified_naming = params.wf == "teton" || params.wf == "scrubber" || params.wf == "cleanyerreads" ? "host" : "classified"
classified = meta.single_end ? "${prefix}.${classified_naming}.fastq" : "${prefix}.${classified_naming}#.fastq"
unclassified_naming = params.wf == "teton" || params.wf == "scrubber" ? "scrubbed" : "unclassified"
unclassified_naming = params.wf == "teton" || params.wf == "scrubber" || params.wf == "cleanyerreads" ? "scrubbed" : "unclassified"
unclassified = meta.single_end ? "${prefix}.${unclassified_naming}.fastq" : "${prefix}.${unclassified_naming}#.fastq"
def is_tarball = db.getName().endsWith(".tar.gz") ? true : false
"""
Expand Down Expand Up @@ -81,6 +85,9 @@ process KRAKEN2 {
rm ${prefix}.host*.fastq.gz
fi
# Used for clean-yer-reads
touch EMPTY_EXTRA
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fastq-scan: \$(echo \$(fastq-scan -v 2>&1) | sed 's/fastq-scan //')
Expand Down
6 changes: 6 additions & 0 deletions modules/nf-core/kraken2/params-cleanyerreads.json
Expand Up @@ -12,6 +12,12 @@
"default": "",
"fa_icon": "fas fa-exclamation-circle",
"properties": {
"use_k2scrubber": {
"type": "boolean",
"default": false,
"description": "Use Kraken2 with Human Pan-genome for scrubbing human reads",
"fa_icon": "fas fa-expand-arrows-alt"
},
"quick_mode": {
"type": "boolean",
"default": false,
Expand Down
1 change: 1 addition & 0 deletions modules/nf-core/kraken2/params.config
Expand Up @@ -4,6 +4,7 @@ This file includes default parameter values.

params {
// Kraken2
use_k2scrubber = false
kraken2_db = null
quick_mode = false
confidence = 0.0
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/prokka/main.nf
Expand Up @@ -21,7 +21,7 @@ process PROKKA {
path prodigal_tf

output:
tuple val(meta), path("results/*.{ffn,ffn.gz}"), path("results/*.{faa,faa.gz}"), emit: annotations
tuple val(meta), path("results/*.{fna,fna.gz}"), path("results/*.{faa,faa.gz}"), path("results/*.{gff,gff.gz}"), emit: annotations
tuple val(meta), path("results/*.{gff,gff.gz}"), emit: gff
tuple val(meta), path("results/*.{gbk,gbk.gz}"), emit: gbk
tuple val(meta), path("results/*.{fna,fna.gz}"), emit: fna
Expand Down
40 changes: 0 additions & 40 deletions modules/nf-core/srahumanscrubber/initdb/params-cleanyerreads.json

This file was deleted.

0 comments on commit a7e2c15

Please sign in to comment.