From 060cf507526714fd64ac53c467a6157fe47ae67e Mon Sep 17 00:00:00 2001 From: Sam Minot Date: Mon, 30 Oct 2023 09:29:08 -0700 Subject: [PATCH 1/3] Update eggnog-mapper version --- modules/annotation.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/annotation.nf b/modules/annotation.nf index f6e71c5..4e6a4f2 100644 --- a/modules/annotation.nf +++ b/modules/annotation.nf @@ -175,7 +175,7 @@ done > genes.tax.aln.gz process eggnog { tag "Annotate genes by predicted function" - container "quay.io/biocontainers/eggnog-mapper:2.0.1--py_1" + container "quay.io/biocontainers/eggnog-mapper:2.1.12--pyhdfd78af_0" label 'mem_veryhigh' input: @@ -199,6 +199,7 @@ mv ${eggnog_dmnd} data/eggnog_proteins.dmnd emapper.py \ -i ${query} \ + --itype proteins \ --output genes \ -m "diamond" \ --cpu ${task.cpus} \ From f350f58f6522cb40fc38a2bca9e729992cd437d6 Mon Sep 17 00:00:00 2001 From: Sam Minot Date: Mon, 30 Oct 2023 09:39:46 -0700 Subject: [PATCH 2/3] Document database setup --- modules/annotation.nf | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/modules/annotation.nf b/modules/annotation.nf index 4e6a4f2..f8dba7a 100644 --- a/modules/annotation.nf +++ b/modules/annotation.nf @@ -226,13 +226,38 @@ def helpMessage() { --gene_fasta Location for input 'genes.fasta.gz' --output_folder Location for output --taxonomic_dmnd Database used for taxonomic annotation (default: false) - (Data available at s3://fh-ctr-public-reference-data/tool_specific_data/geneshot/2020-01-15-geneshot/DB.refseq.tax.dmnd) --ncbi_taxdump Reference describing the NCBI Taxonomy (default: ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz) --eggnog_dmnd One of two databases used for functional annotation with eggNOG (default: false) - (Data available at s3://fh-ctr-public-reference-data/tool_specific_data/geneshot/2020-06-17-eggNOG-v5.0/eggnog_proteins.dmnd) --eggnog_db One of two databases used for functional annotation with eggNOG (default: false) - (Data available at s3://fh-ctr-public-reference-data/tool_specific_data/geneshot/2020-06-17-eggNOG-v5.0/eggnog.db) + + + #################################### + # Downloading Reference Databases: # + #################################### + + --taxonomic_dmnd + The DIAMOND database of reference protein sequences must be indexed using both + (a) a set of sequences to search and (b) taxonomic annotations for each. + Full instructions for creating this indexed database file can be found + here: https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options + + Example: + diamond makedb \ + --in \ + --db \ + --taxonmap prot.accession2taxid.FULL.gz \ + --taxonnodes nodes.dmp \ + --taxonnames names.dmp + + --eggnog_dmnd & --eggnog_db + The eggNOG database for functional annotation can be most easily downloaded + using the edicated utility provided along with the eggNOG-mapper utility. + The only flag which needs to be set when running the download utility is the + destination folder for the downloaded files. + + Example: + download_eggnog_data.py --data_dir data/ """.stripIndent() } From c6763f4be9f61e85509aae6e7493c991719c8f1d Mon Sep 17 00:00:00 2001 From: Sam Minot Date: Mon, 30 Oct 2023 09:46:05 -0700 Subject: [PATCH 3/3] Clean up functional annotation workflow --- modules/annotation.nf | 69 ++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/modules/annotation.nf b/modules/annotation.nf index f8dba7a..890e773 100644 --- a/modules/annotation.nf +++ b/modules/annotation.nf @@ -61,9 +61,29 @@ workflow Annotation_wf { run_eggnog = false if ( params.noannot == false ) { if ( params.eggnog_db && params.eggnog_dmnd ) { - if ( !file(params.eggnog_db).isEmpty() && !file(params.eggnog_dmnd).isEmpty() ){ + eggnog_db = file(params.eggnog_db) + eggnog_dmnd = file(params.eggnog_dmnd) + + if ( eggnog_db.isEmpty() ){ + log.info"Cannot find file ${params.eggnog_db}, skipping functional annotation" + } + if ( eggnog_dmnd.isEmpty() ){ + log.info"Cannot find file ${params.eggnog_dmnd}, skipping functional annotation" + } + + if ( !eggnog_db.isEmpty() && !eggnog_dmnd.isEmpty() ){ run_eggnog = true } + + } else { + + if ( params.eggnog_db ) { + log.info"Missing --eggnog_dmnd, skipping functional annotation" + } + if ( params.eggnog_dmnd ) { + log.info"Missing --eggnog_db, skipping functional annotation" + } + } } @@ -71,8 +91,8 @@ workflow Annotation_wf { if ( run_eggnog ){ eggnog( shard_genes.out.flatten(), - file(params.eggnog_db), - file(params.eggnog_dmnd) + eggnog_db, + eggnog_dmnd ) eggnog_tsv = eggnog.out.collect() } else { @@ -84,8 +104,11 @@ workflow Annotation_wf { run_tax = false if ( params.noannot == false ) { if ( params.taxonomic_dmnd ) { - if ( !file(params.taxonomic_dmnd).isEmpty() ){ + taxonomic_dmnd = file(taxonomic_dmnd) + if ( !taxonomic_dmnd.isEmpty() ){ run_tax = true + } else { + log.info"Cannot find ${params.taxonomic_dmnd}, skipping taxonomic annotation" } } } @@ -94,7 +117,7 @@ workflow Annotation_wf { if ( run_tax ) { diamond_tax( shard_genes.out.flatten(), - file(params.taxonomic_dmnd) + taxonomic_dmnd ) tax_tsv = diamond_tax.out.collect() join_tax( @@ -262,27 +285,27 @@ def helpMessage() { """.stripIndent() } - -// Show help message if the user specifies the --help flag at runtime -if (params.help || params.gene_fasta == false || params.output_folder == false){ - // Invoke the function above which prints the help message - helpMessage() - // Exit out and do not run anything else - exit 0 -} - -// Show help message if the user does not specify any annotations -if (params.taxonomic_dmnd == false && params.eggnog_dmnd == false && params.eggnog_db == false){ - // Invoke the function above which prints the help message - helpMessage() - // Exit out and do not run anything else - exit 0 -} - workflow { main: + + // Show help message if the user specifies the --help flag at runtime + if (params.help || params.gene_fasta == false || params.output_folder == false){ + // Invoke the function above which prints the help message + helpMessage() + // Exit out and do not run anything else + exit 0 + } + + // Show help message if the user does not specify any annotations + if (params.taxonomic_dmnd == false && params.eggnog_dmnd == false && params.eggnog_db == false){ + // Invoke the function above which prints the help message + helpMessage() + // Exit out and do not run anything else + exit 0 + } + // Make sure we can find the input files if(file(params.gene_fasta).isEmpty()){ log.info"""Cannot find input file ${params.gene_fasta}""".stripIndent() @@ -294,6 +317,4 @@ workflow { file(params.gene_fasta) ) - - } \ No newline at end of file