Skip to content

Commit

Permalink
Merge pull request #90 from Golob-Minot/anndata_eggnog
Browse files Browse the repository at this point in the history
Update annotation workflow
  • Loading branch information
jgolob committed Nov 3, 2023
2 parents 0727492 + c6763f4 commit a85f336
Showing 1 changed file with 75 additions and 28 deletions.
103 changes: 75 additions & 28 deletions modules/annotation.nf
Expand Up @@ -61,18 +61,38 @@ workflow Annotation_wf {
run_eggnog = false
if ( params.noannot == false ) {
if ( params.eggnog_db && params.eggnog_dmnd ) {
if ( !file(params.eggnog_db).isEmpty() && !file(params.eggnog_dmnd).isEmpty() ){
eggnog_db = file(params.eggnog_db)
eggnog_dmnd = file(params.eggnog_dmnd)

if ( eggnog_db.isEmpty() ){
log.info"Cannot find file ${params.eggnog_db}, skipping functional annotation"
}
if ( eggnog_dmnd.isEmpty() ){
log.info"Cannot find file ${params.eggnog_dmnd}, skipping functional annotation"
}

if ( !eggnog_db.isEmpty() && !eggnog_dmnd.isEmpty() ){
run_eggnog = true
}

} else {

if ( params.eggnog_db ) {
log.info"Missing --eggnog_dmnd, skipping functional annotation"
}
if ( params.eggnog_dmnd ) {
log.info"Missing --eggnog_db, skipping functional annotation"
}

}
}

// Annotate the clustered genes with eggNOG
if ( run_eggnog ){
eggnog(
shard_genes.out.flatten(),
file(params.eggnog_db),
file(params.eggnog_dmnd)
eggnog_db,
eggnog_dmnd
)
eggnog_tsv = eggnog.out.collect()
} else {
Expand All @@ -84,8 +104,11 @@ workflow Annotation_wf {
run_tax = false
if ( params.noannot == false ) {
if ( params.taxonomic_dmnd ) {
if ( !file(params.taxonomic_dmnd).isEmpty() ){
taxonomic_dmnd = file(taxonomic_dmnd)
if ( !taxonomic_dmnd.isEmpty() ){
run_tax = true
} else {
log.info"Cannot find ${params.taxonomic_dmnd}, skipping taxonomic annotation"
}
}
}
Expand All @@ -94,7 +117,7 @@ workflow Annotation_wf {
if ( run_tax ) {
diamond_tax(
shard_genes.out.flatten(),
file(params.taxonomic_dmnd)
taxonomic_dmnd
)
tax_tsv = diamond_tax.out.collect()
join_tax(
Expand Down Expand Up @@ -175,7 +198,7 @@ done > genes.tax.aln.gz

process eggnog {
tag "Annotate genes by predicted function"
container "quay.io/biocontainers/eggnog-mapper:2.0.1--py_1"
container "quay.io/biocontainers/eggnog-mapper:2.1.12--pyhdfd78af_0"
label 'mem_veryhigh'

input:
Expand All @@ -199,6 +222,7 @@ mv ${eggnog_dmnd} data/eggnog_proteins.dmnd
emapper.py \
-i ${query} \
--itype proteins \
--output genes \
-m "diamond" \
--cpu ${task.cpus} \
Expand All @@ -225,38 +249,63 @@ def helpMessage() {
--gene_fasta Location for input 'genes.fasta.gz'
--output_folder Location for output
--taxonomic_dmnd Database used for taxonomic annotation (default: false)
(Data available at s3://fh-ctr-public-reference-data/tool_specific_data/geneshot/2020-01-15-geneshot/DB.refseq.tax.dmnd)
--ncbi_taxdump Reference describing the NCBI Taxonomy
(default: ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz)
--eggnog_dmnd One of two databases used for functional annotation with eggNOG (default: false)
(Data available at s3://fh-ctr-public-reference-data/tool_specific_data/geneshot/2020-06-17-eggNOG-v5.0/eggnog_proteins.dmnd)
--eggnog_db One of two databases used for functional annotation with eggNOG (default: false)
(Data available at s3://fh-ctr-public-reference-data/tool_specific_data/geneshot/2020-06-17-eggNOG-v5.0/eggnog.db)
""".stripIndent()
}

// Show help message if the user specifies the --help flag at runtime
if (params.help || params.gene_fasta == false || params.output_folder == false){
// Invoke the function above which prints the help message
helpMessage()
// Exit out and do not run anything else
exit 0
}
// Show help message if the user does not specify any annotations
if (params.taxonomic_dmnd == false && params.eggnog_dmnd == false && params.eggnog_db == false){
// Invoke the function above which prints the help message
helpMessage()
// Exit out and do not run anything else
exit 0
####################################
# Downloading Reference Databases: #
####################################
--taxonomic_dmnd
The DIAMOND database of reference protein sequences must be indexed using both
(a) a set of sequences to search and (b) taxonomic annotations for each.
Full instructions for creating this indexed database file can be found
here: https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options
Example:
diamond makedb \
--in <proteins_fasta> \
--db <output_dmnd> \
--taxonmap prot.accession2taxid.FULL.gz \
--taxonnodes nodes.dmp \
--taxonnames names.dmp
--eggnog_dmnd & --eggnog_db
The eggNOG database for functional annotation can be most easily downloaded
using the edicated utility provided along with the eggNOG-mapper utility.
The only flag which needs to be set when running the download utility is the
destination folder for the downloaded files.
Example:
download_eggnog_data.py --data_dir data/
""".stripIndent()
}

workflow {

main:


// Show help message if the user specifies the --help flag at runtime
if (params.help || params.gene_fasta == false || params.output_folder == false){
// Invoke the function above which prints the help message
helpMessage()
// Exit out and do not run anything else
exit 0
}

// Show help message if the user does not specify any annotations
if (params.taxonomic_dmnd == false && params.eggnog_dmnd == false && params.eggnog_db == false){
// Invoke the function above which prints the help message
helpMessage()
// Exit out and do not run anything else
exit 0
}

// Make sure we can find the input files
if(file(params.gene_fasta).isEmpty()){
log.info"""Cannot find input file ${params.gene_fasta}""".stripIndent()
Expand All @@ -268,6 +317,4 @@ workflow {
file(params.gene_fasta)
)



}

0 comments on commit a85f336

Please sign in to comment.