Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add module seqfu/stats #5275

Merged
merged 10 commits into from Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 9 additions & 0 deletions modules/nf-core/seqfu/stats/environment.yml
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "seqfu_stats"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::seqfu=1.20.3"
51 changes: 51 additions & 0 deletions modules/nf-core/seqfu/stats/main.nf
@@ -0,0 +1,51 @@
process SEQFU_STATS {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/seqfu:1.20.3--h1eb128b_0':
'biocontainers/seqfu:1.20.3--h1eb128b_0' }"

sateeshperi marked this conversation as resolved.
Show resolved Hide resolved

input:
// stats can get one or more fasta or fastq files
tuple val(meta), path(files)

output:
tuple val(meta), path("*.tsv") , emit: stats
tuple val(meta), path("*_mqc.txt"), emit: multiqc
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
seqfu \\
stats \\
$args \\
--multiqc ${prefix}_mqc.txt \\
$files > ${prefix}.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqfu: \$(seqfu version)
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}_mqc.txt
seqfu stats ${prefix}_mqc.txt > ${prefix}.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqfu: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
"""
}
57 changes: 57 additions & 0 deletions modules/nf-core/seqfu/stats/meta.yml
@@ -0,0 +1,57 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "seqfu_stats"
description: Statistics for FASTA or FASTQ files
keywords:
- seqfu
- stats
- n50
tools:
- "seqfu":
description: "Cross-platform compiled suite of tools to manipulate and inspect FASTA and FASTQ files"
homepage: "https://telatin.github.io/seqfu2/"
documentation: "https://telatin.github.io/seqfu2/"
tool_dev_url: "https://github.com/telatin/seqfu2"
doi: "10.3390/bioengineering8050059"
licence: ["GPL v3"]

input:
# Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`

- files:
type: file
description: One or more FASTA or FASTQ files
pattern: "*.{fasta,fastq,fasta.gz,fastq.gz,fq,fq.gz}"

output:
#Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`

- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

- stats:
type: file
description: Tab-separated output file with basic sequence statistics.
pattern: "*.{tsv}"

- multiqc:
type: file
description: MultiQC ready table
pattern: "*.{_mqc.txt}"

authors:
- "@telatin"
maintainers:
- "@telatin"
75 changes: 75 additions & 0 deletions modules/nf-core/seqfu/stats/tests/main.nf.test
@@ -0,0 +1,75 @@
nextflow_process {

name "Test Process SEQFU_STATS"
script "../main.nf"
process "SEQFU_STATS"

tag "modules"
tag "modules_nfcore"
tag "seqfu"
tag "seqfu/stats"


test("seqfu stats - faa") {
// test with 1 FAA file (with multiple sequences of different length)
when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert process.out.stats },
{ assert process.out.multiqc },
{ assert process.out.stats.size() == 1 },
{ assert snapshot(process.out.versions).match("versions-single") },
{ assert snapshot(process.out.stats).match("stats-single") },
{ assert path(process.out.stats.get(0).get(1)).md5 == "26141ef87ad8a6f59a6f283cc0a06fda" }
)
}

}

test("seqfu stats - multiple files") {
// test feeding a mix of files including compressed
when {
process {
"""
input[0] = [
[ id:'test' ],
[
file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert process.out.stats },
{ assert process.out.multiqc },
{ assert process.out.stats.size() == 1 },
{ assert path(process.out.versions[0]).readLines()[1].contains('.') },
{ assert snapshot(process.out.stats).match("stats-multi") },
{ assert snapshot(process.out.stats).md5().match("multi-lines") },
{ assert path(process.out.stats[0][1]).readLines()[0] == 'File\t#Seq\tTotal bp\tAvg\tN50\tN75\tN90\tauN\tMin\tMax' },
{ assert path(process.out.multiqc[0][1]).readLines().join('\n').contains('genome.fasta') },
{ assert path(process.out.multiqc[0][1]).readLines().join('\n').contains('proteome.fasta') }
)
}

}

}
56 changes: 56 additions & 0 deletions modules/nf-core/seqfu/stats/tests/main.nf.test.snap

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions modules/nf-core/seqfu/stats/tests/tags.yml
@@ -0,0 +1,2 @@
seqfu/stats:
- "modules/nf-core/seqfu/stats/**"