diff --git a/bio/emu/abundance/environment.linux-64.pin.txt b/bio/emu/abundance/environment.linux-64.pin.txt new file mode 100644 index 0000000000..a22b63bcaf --- /dev/null +++ b/bio/emu/abundance/environment.linux-64.pin.txt @@ -0,0 +1,62 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda#7aca3059a1729aa76c597603f10b0dd3 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h7e041cc_5.conda#f6f6600d18a4047b54f803cf708b868a +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.10-4_cp310.conda#26322ec5d7712c3ded99dd656142b8ce +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_5.conda#d211c42b9ce49aee3734fdc828731689 +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_5.conda#d4ff227c46917d3b4565302a2bbb276b +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.28.1-hd590300_0.conda#dcde58ff9a1f30b0037a2315d1846d1f +https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.18-h0b41bf4_0.conda#6aa9c9de5542ecb07fdda9ca626252d8 +https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-ha4646dd_5.conda#7a6bd7a12a4bd359e2afe6c0fa1acace +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4.20240210-h59595ed_0.conda#97da8860a0da5413c7c98a3b3838a645 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.1-hd590300_1.conda#9d731343cff6ee2e5a25c4a091bf8e2a +https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_5.conda#e73e9cfd1191783392131e6238bdb3e9 +https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.58.0-h47da74e_1.conda#700ac6ea6d53d5510591c4344d5c989a +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.2-h2797004_0.conda#866983a220e27a80cb75e85cb30466a1 +https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589 +https://conda.anaconda.org/bioconda/linux-64/bioawk-1.0-he4a0461_10.tar.bz2#3f4ea155f59ae781753ea76571e8564a +https://conda.anaconda.org/bioconda/linux-64/k8-0.2.5-hdcf5f25_4.tar.bz2#d3c49a96ae45864706037702775ca7c2 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_h413a1c8_0.conda#a356024784da6dfd4683dc5ecf45b155 +https://conda.anaconda.org/conda-forge/linux-64/python-3.10.14-hd12c33a_0_cpython.conda#2b4ba962994e8bd4be9ff5b64b75aff2 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_openblas.conda#1a2a0cd3153464fee6646f3dd6dad9b8 +https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.7.1-hca28451_0.conda#755c7f876815003337d2c61ff5d047e5 +https://conda.anaconda.org/bioconda/linux-64/minimap2-2.28-he4a0461_0.tar.bz2#27d83cfe6bca3eb50aaeb6334371122d +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d +https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.2.0-pyhd8ed1ab_0.conda#da214ecd521a720a9d521c68047682dc +https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a +https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda#0896606848b2dc5cebdf111b6543aa04 +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_openblas.conda#4b31699e0ec5de64d5896e580389c9a1 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_openblas.conda#b083767b6c877e24ee597d93b87ab838 +https://conda.anaconda.org/conda-forge/linux-64/pathlib2-2.3.7.post1-py310hff52083_3.conda#62d26790749f62b9329425c901d93c3a +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/bioconda/linux-64/pysam-0.22.0-py310h41dec4a_1.tar.bz2#19fdb9301a6debbb7fe9836670e3feb7 +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/noarch/flatten-dict-0.4.2-pyhd8ed1ab_1.tar.bz2#ccfb30b92adfeb283d4dcae3d0b6441b +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py310hb13e2d6_0.conda#6593de64c935768b6bad3e19b3e978be +https://conda.anaconda.org/conda-forge/linux-64/biopython-1.83-py310h2372a71_0.conda#0128595946cebfaaf212cc45d4b9cd3c +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.1-py310hcc13569_0.conda#cf5d315e3601a6a2931f63aa9a84dc40 +https://conda.anaconda.org/bioconda/noarch/emu-3.4.5-hdfd78af_0.tar.bz2#34b067c6f82c3796a40e1b0ecaf094d3 diff --git a/bio/emu/abundance/environment.yaml b/bio/emu/abundance/environment.yaml new file mode 100644 index 0000000000..a117676039 --- /dev/null +++ b/bio/emu/abundance/environment.yaml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - emu=3.4.5 \ No newline at end of file diff --git a/bio/emu/abundance/meta.yaml b/bio/emu/abundance/meta.yaml new file mode 100644 index 0000000000..9106bd2c70 --- /dev/null +++ b/bio/emu/abundance/meta.yaml @@ -0,0 +1,14 @@ +name: emu abundance +description: Generate relative abundance estimates from ONT, Pac-Bio or short 16S reads using emu. +url: https://github.com/treangenlab/emu +authors: + - Curro Campuzano +input: + - reads: single fastq file or paired fastq files + - db: emu database (optional; check documentation for pre-built databases and how to build them). +output: + - abundances: TSV with relative (and optionally, absolute abundances). + - alignments: SAM file with the alignments (optional). + - unclassified: FASTA file with unclassified sequences (optional). +params: + extra: Any optimal parameter such as --type (sequencer) or --min-abundance. Optional flags involving output are handled automatically (e.g. --output-dir, --output-basename ...) diff --git a/bio/emu/abundance/test/Snakefile b/bio/emu/abundance/test/Snakefile new file mode 100644 index 0000000000..466de94623 --- /dev/null +++ b/bio/emu/abundance/test/Snakefile @@ -0,0 +1,30 @@ +rule abundance: + input: + reads="{sample}.fa", + db="database", + output: + abundances="{sample}_rel-abundance.tsv", + alignments="{sample}_emu_alignments.sam", + unclassified="{sample}_unclassified.fa", + log: + "logs/emu/{sample}_abundance.log", + params: + extra="--type map-ont --keep-counts", + threads: 3 # optional, defaults to 1 + wrapper: + "master/bio/emu/abundance" + + +rule abundance_paired: + input: + reads=["{sample}_R1.fq", "{sample}_R2.fq"], + db="database", + output: + abundances="{sample}_rel-abundance_paired.tsv", + log: + "logs/emu/{sample}_abundance_paired.log", + params: + extra="--type sr --keep-counts", + threads: 3 # optional, defaults to 1 + wrapper: + "master/bio/emu/abundance" diff --git a/bio/emu/abundance/test/database/species_taxid.fasta b/bio/emu/abundance/test/database/species_taxid.fasta new file mode 100644 index 0000000000..4e0e691a19 --- /dev/null +++ b/bio/emu/abundance/test/database/species_taxid.fasta @@ -0,0 +1,15 @@ +>1:emu-silva:1 ['dada2-silva_1 Bacteria;Proteobacteria;Gammaproteobacteria;Pseudomonadales;Pseudomonadaceae;Pseudomonas;amygdali;'] +AACTGAAGAGTTTGATCATGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAG +TCGAGCGGCAGCACGGGTACTTGTACCTGGTGGCGAGCGGCGGACGGGTGAGTAATGCCT +AGGAATCTGCCTGGTAGTGGGGGATAACGCTCGGAAACGGACGCTAATACCGCATACGTC +CTACGGGAGAAAGCAGGGGACCTTCGGGCCTTGCGCTATCAGATGAGCCTAGGTCGGATT +AGCTAGTTGGTGAGGTAATGGCTCACCAAGGCGACGATCCGTAACTGGTCTGAGAGGATG +ATCAGTCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAGTGGGGAAT +ATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGA +TTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTTACCTAATACGTATCTGTTTTGACGT +TACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGC +GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAA +GCGCACNNNNGCGGTCTGTCAAGTCGGANGNNAAATCCCCGGGCTNNNNNNNGGAACTGC +ATTCGAAACTGNCAGGCTTGAGTCTTGTAGAGGGNNNTNGNATTCNNNGTGTAGCGNNNN +NNTGCGTAGAGATCTGGANGAACACCAGTGGCGAAGGCGGCTCTCTNGTCTGTAACTGAC +GCTGAGGCTCGAAAGCNTGGGGAGCAAACAGGATTAGATANCCTGGTAGTCCACG \ No newline at end of file diff --git a/bio/emu/abundance/test/database/taxonomy.tsv b/bio/emu/abundance/test/database/taxonomy.tsv new file mode 100644 index 0000000000..c6a3209d65 --- /dev/null +++ b/bio/emu/abundance/test/database/taxonomy.tsv @@ -0,0 +1,2 @@ +tax_id superkingdom phylum class order family genus species +1 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas amygdali; \ No newline at end of file diff --git a/bio/emu/abundance/test/sample.fa b/bio/emu/abundance/test/sample.fa new file mode 100644 index 0000000000..985df2df9c --- /dev/null +++ b/bio/emu/abundance/test/sample.fa @@ -0,0 +1,70 @@ +>Sphingobacterium_puteal_r1 +ACGGGTGCGTAACGCGTGAGCAACCTACCTCTATCAGGGGGATAGCCTCTCGAAAGAGAGATTAACACCGCATAACA +TCAACAGTTCGCATGTTCGGTTGATTAAATATTTATAGGATAGAGATGGGCTCGCGTGACATTAGCTAGTTGGTAGGGTA +ACGGCCTACCAAGGCGACGATGTCTAGGGGCTCTGAGAGGAGAATCCCCCACACTGGTACTGAGACACGGACCAGACTCC +TACGGGAGGCAGCAGTAAGGAATATTGGTCAATGGGCGGAAGCCTGAACCAGCCATGCCGCGTGCAGGAAGACTGCCCTA +TGGGTTGTAAACTGCTTTTGTCCAGGAATAAACCTCTTTACGTGTAGAGAGCTGAATGTACTGGAAGAATAAGGATCGGC +TAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCG +GCCTGTTAAGTCAGGGGTGAAATACGGTGGCTCAACCATCGCAGTGCCTTTGATACTGACGGGCTTGAATCCATTTGAAG +TGGGCGGAATAAGACAAGTAGCGGTGAAATGCATAGATATGTCTTAGAACTCCGATTGCGAAGGCAGCTCACTAAGCTGG +TATTGACGCTGATGCACGAAAGCGTGGGGATCGAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGATGATAACT +CGATGTTGGCGATAGACCGCCAGCGTCCAAGCGAAAGCGTTAAGTTATCCACCTGGGGAGTACGCCCGCAAGGGTGAAAC +TCAAAGGAATTGACGGGGGCCCGCACAAGCGGAGGAGCATGTGGTTTAATTCGATGATACGCGAGGAACCTTACCCGGGC +TTGAAAGTTAGTGAAGGATGCGGAGACGCATCCGTCCTTCGGGACACGAAACTAGGTGCTGCATGGCTGTCGTCAGCTCG +TGCCGTGAGGTGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTATGTTTAGTTGCCAGCAAGTAATGTTGGGGACTCTA +AACAGACTGCCTGCGCAAGCAGAGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCCCTTACGTCCGGGGCTACACACG +TGCTACAATGGATGGTACAGCGGGCAGCTACATAGCAATATGGTGCTAATCTCTAAAAGCCATTCACAGTTCGGATTGGG +GTCTGCAACTCGACCCCATGAAGTTGGATTCGCTAGTAATCGCGTATCAGC +>Sphingobacterium_puteal_r2 +GGCCTAATACATGCAAGTCGGACGGGATTTAAGTTAAAGCTTGCTTTAAGTTAATGAGAGTGG +CGCACGGGTGCGTAACGCGTGAGCAACCTACCTCTATCAGGGGGATAGCCTCTCGAAAGAGAGATTAACACCGCATAACA +TCAACAGTTCGCATGTTCGGTTGATTAAATATTTATAGGATAGAGATGGGCTCGCGTGACATTAGCTAGTTGGTAGGGTA +ACGGCCTACCAAGGCGACGATGTCTAGGGGCTCTGAGAGGAGAATCCCCCACACTGGTACTGAGACACGGACCAGACTCC +TACGGGAGGCAGCAGTAAGGAATATTGGTCAATGGGCGGAAGCCTGAACCAGCCATGCCGCGTGCAGGAAGACTGCCCTA +TGGGTTGTAAACTGCTTTTGTCCAGGAATAAACCTCTTTACGTGTAGAGAGCTGAATGTACTGGAAGAATAAGGATCGGC +TAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCG +GCCTGTTAAGTCAGGGGTGAAATACGGTGGCTCAACCATCGCAGTGCCTTTGATACTGACGGGCTTGAATCCATTTGAAG +TGGGCGGAATAAGACAAGTAGCGGTGAAATGCATAGATATGTCTTAGAACTCCGATTGCGAAGGCAGCTCACTAAGCTGG +TATTGACGCTGATGCACGAAAGCGTGGGGATCGAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGATGATAACT +CGATGTTGGCGATAGACCGCCAGCGTCCAAGCGAAAGCGTTAAGTTATCCACCTGGGGAGTACGCCCGCAAGGGTGAAAC +TCAAAGGAATTGACGGGGGCCCGCACAAGCGGAGGAGCATGTGGTTTAATTCGATGATACGCGAGGAACCTTACCCGGGC +TTGAAAGTTAGTGAAGGATGCGGAGACGCATCCGTCCTTCGGGACACGAAACTAGGTGCTGCATGGCTGTCGTCAGCTCG +TGCCGTGAGGTGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTATGTTTAGTTGCCAGCAAGTAATGTTGGGGACTCTA +AACAGACTGCCTGCGCAAGCAGAGAGGAAGGTGGGGACGACGTCAAGTCATCATGGCCCTTACGTCCGGGGCTACACACG +TGCTACAATGGATGGTACAGCGGGCAGCTACATAGCAATATGGTGCTAATCTCTAAAAGCCATTCACAGTTCGGATTGGG +GTCTGCAACTCGACCCCATGAAGTTGGATTCGCTAGTAATCGCGTATCAGCAATGACGCGGTGAATACGTTCCCGGGCCT +TGTACACA +>Mycobacterium_saskatchewanense_r1 +AGTTTGATCCTGGCTCAGGACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGGAAAGGTCTCTTCGGAGATAC +TCGAGTGGCGAACGGGTGAGTAACACGTGGGCAATCTGCCCTGCACTTCGGGATAAGCCTGGGAAACTGGGTCTAATACC +GGATAGGACCTTTAGGCGCATGCCTTTTGGTGGAAAGCTTTTGCGGTGTGGGATGGGCCCGCGGCCTATCAGCTTGTTGG +TGGGGTGATGGCCTACCAAGGCGACGACGGGTAGCCGGCCTGAGAGGGTGTCCGGCCACACTGGGACTGAGATACGGCCC +AGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGGGGGATGAC +GGCCTTCGGGTTGTAAACCTCTTTCAGCAGGGACGAAGCGCAAGTGACGGTACCTGCAGAAGAAGCACCGGCCAACTACG +TGCCAGCAGCCGCGGTAATACGTAGGGTGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGAGCTCGTAGGTGGTTTGTCG +CGTTGTTCGTGAAATCTCACGGCTTAACTGTGAGCGTGCGGGCGATACGGGCAGACTAGAGTACTGCAGGGGAGACTGGA +ATTCCTGGTGTAGCGGTGGAATGCGCAGATATCAGGAGGAACACCGGTGGCGAAGGCGGGTCTCTGGGCAGTAACTGACG +CTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGGTGGGTACTAGGTGTGG +GTTTCCTTCCTTGGGATCCGTGCCGTAGCTAACGCATTAAGTACCCCGCCTGGGGAGTACGGCCGCAAGGCTAAAACTCA +AAGGAATTGACGGGGGCCCGCACAAGCGGCGGAGCATGTGGATTAATTCGATGCAACGCGAAGAACCTTACCTGGGTTTG +ACATGCACAGGACGCCGGCAGAGATGTCGGTTCCCTTGTGGCCTGTGTGCAGGTGGTGCATGGCTGTCGTCAGCTCGTGT +CGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCTCATGTTGCCAGCGGGTAATGCCGGGGACTCGTGAG +AGACTGCCGGGGTCAACTCGGAGGAAGGTGGGGATGACGTCAAGTCATCATG +>Streptococcus_sobrinus_r1 +AGTGTTACTAATGAGTCGCGAACGGGTGAGTAACGCGTAGGTAACCTGCCTGATAGCGGGGGATAACTATTGGAAACGAT +AGCTAATACCGCATAAGAGGAGTTAACTCATGTTAACTGTTTAAAAGAAGCCATTGCTTCACTATCAGAGGACCTGCGT +TGTATTAGCTAGTAGGTAGGGTAACGGCCTACCTAGGCAACGATACATAGCCGACCTGAGAGGGTGAACGGCCACACTGG +GACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCGGCAATGGACGCAAGTCTGACCGAGCAACG +CCGCGTGAGTGAAGACGGTTTTCGGATCGTAAAGCTCTGTTGTAGGGGAAGAACGTGTGTAAGAGTGGAAAGCTTACACA +GTGACGGTACCCTACCAGAAAGGGACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTCCCGAGCGTTGTCCGG +ATTTATTGGGCGTAAAGGGAGCGCAGGCGGTTTAGTAAGTCTGAAGTTAAAGGCATTGGCTCAACCAATGTATGCTTTGG +AAACTGTTAGACTTGAGTGCAGAAGGGGAGAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACAC +CGGTGGCGAAAGCGGCTCTCTGGTCTGTCACTGACGCTGAGGCTCGAAAGCGTGGGTAGCGAACAGGATTAGATACCCTG +GTAGTCCACGCCGTAAACGCTGAGTGCTAGGTGTTAGGTCCTTTCCGGGACTTAGTGCCGACGCTAACGCATTAAGCACT +CCGCCTGGGGAGTACGACCGCAAGGTTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTA +ATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCGATGCCCGCTCTAGAGATAGAGTTTTTCTTCGGAACAT +CGGAGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTAT +TGTTAGTTGCCATCATTAAGTTGGGCACTCTAGCGAGACTGCCGGTAATAAACCGGAGGAAGGTGGGGATGACGTCAAAT +CATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGTTGGTACAACGAGTCGCAAGCCGGTGACGGCAAGCTA +ATCTCTGAAAGCCAATCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGTCGGAATCGCTAGTAATCGCGGATC +AGCACGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCAAAGTCGGT \ No newline at end of file diff --git a/bio/emu/abundance/test/short_read_R1.fq b/bio/emu/abundance/test/short_read_R1.fq new file mode 100644 index 0000000000..06c5874275 --- /dev/null +++ b/bio/emu/abundance/test/short_read_R1.fq @@ -0,0 +1,16 @@ +@SRR10391187.1 1 length=293 +GTGCCAGCCGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGGGNTNNNNNNCGGTTCCTTAAGNNTGANNNNNNANCCCCCGGCTNNNNNNNGGAGNGTCNNNGGAANCTNNGGAACTTGAGTGCAGAAGAGGANNNNNNNNTNCNNNGTGTAGCNNNNNNNTGCGTAGAGATGTGNNNNNNCACCAGTGNNNANNNNGACTCTNNNNNNNGTAANTGNNNNTGNGNANCNAANNNNNNGGGAGCGNNNNNNNTTAGATANNNNNNNAGTACA ++SRR10391187.1 1 length=293 +CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG#:######::CDFEGGGGGG##:CC######:#::?FGGGGF#######::BF#:BC###::BF#:B##::+BFFFGGFGGGGGFFGGGGF########6#6###86>FFGD#######66=CCEGCG?CGFF######*43BFGGC###3####*3/1;+#######*2;C#22####11#1#*#0#22######1131FE@#######(.04:A<#######(--(06 +@SRR10391187.2 2 length=293 +GTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTATCCGGAATTATTGGGCGTAAAGCGNGNNNNNNCGGTTTTTTAAGTNTGANNNNANAGCCCACGGCTNNNNNNNGGAGNGTCNNNGGAANCTNNAAAACTTGAGTGCAGAAGAGGANNNNNNNNTNCNNNGTGTAGCNNNNNNNTGCGCAGAGATATGGNNNNACACCAGTGNNGANNNNGACTTTNNNNNNNGTAANTGNNNNTGNTNTNCNAANNNNNNGGGATCANNNNGNNTTAGATANNNNNNNAGTCCA ++SRR10391187.2 2 length=293 +CCCCCGGGGGGGGGGGGGGGGGGFGGGFGGGGGGGGGGGGGGGGGGGGGGGDGFGGGGGGGGG#:######::DFDGGGGGGGG#:CD####:#:BFDGGGGGGG#######::DF#:BB###:8>D#::##8:=FEGGGGGGGGGGGGGACFG########6#6###86@FFGC#######*6>FGGGGGGGFGGG####31=CFGGGF##33####13=C>F#######/2;C#**####*1#*#/#.#22######1186*8>####0##(.08?F<#######(-4:FF +@SRR10391187.3 3 length=295 +GTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGNANNNNNGCGGTCTGTCAAGTNGGANNNNAAATCCCCGGGCTNNNNNNNGGAACTGCNTTCGAAACTNNCAGGCTTGAGTCTTGTAGAGGGNNNGNGNATNCNNNGTGTAGCNNNNNNNAGCGTAGAGATCGGGANNNATACCGGTGGNGANNGCGGCCCCNNNNANNAAGAATGANGCTCAGNTGCGAANNNNNNGGGAGCANNNNGGATTAGATANNNNNNTAGCCCACG ++SRR10391187.3 3 length=295 +CCCCCGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGG#:#####::CDEGGGGGGGDG#:9C####::BDFGGGGGGGF#######:8BFGGFG#:BCFFGGG9##::>CGGGGGGGFGGGGGGGGGG###:#6#65#5###861>FF,#######**6@BCB>@CE9**43###341CF=DGC@#13##22**;CGG####*##22;**0+0#*19CF4#*/.:/C######/138C>@####00-4;4<:9:######)((-2FGGGGGFGGCGGGGGFFGG###:#6#6@FF###86DFGCFG######*6=CCCCEGGD,,2?+#/*3BEF>FGGGG58EGGGFGGGGGGG?#2:7F793556384,)-+#217CFFA<8A*6>< \ No newline at end of file diff --git a/bio/emu/abundance/test/short_read_R2.fq b/bio/emu/abundance/test/short_read_R2.fq new file mode 100644 index 0000000000..ffa6936601 --- /dev/null +++ b/bio/emu/abundance/test/short_read_R2.fq @@ -0,0 +1,16 @@ +@SRR10391187.1 1 length=301 +NANCNCGAGCTGACGANAANNNNNNNCCACCTGTCACTCTGCCCCCGANNNNGACGTNCTNTCTCTAGGATTGTNAGNGGANGTCAAGANNNNNNNAGGTTCTNNNNNNNGCTTCGAATTAAACCACANNNTNCACCGCTTGTGCGNGNCCCCGTCAATNNCTNNNAGTTTCAGTCNTGNNACCGTACTCCCCAGNCGGAGTGCTTAATGCGTTAGCTGCAGCACTAAGGGGCGGAANNNCCCTAACACTNAGCNNNCNTNNTTTNNGGCNGGGAGNANCCNNNGATCTAATCCTGTTTNN ++SRR10391187.1 1 length=301 +#8#8#=CFGGGGGGGG#=D#######::CFGGGGGGGGGGGGGGGGGG####::CDC#:C#:CBFGGGGFCEFG#:C#:CC#:BFFFGG#######::CFGGG#######9:AFGGCGGGDGGCFFGG###+#88A@FEGGGGGCB#8#66?EFFEEGE##86###88@FFFGFA;#5*##45@D:CCFGGGGF@#/1**;C8CFGFFFCFGGGGEFGC?FFFFF8;FFA1;=C:24###2197@GFBF=#-1*###-#(##--0##---#((1-(#)#-(###(-,8:A))4FF#+3##35@FGGFGFGGEGGGGGE9AFFFFFFDF6CEGGCFGG8*8DFFAEFGGFB9GG31>1#2*9>>:FFF<@#-**###(#-##-,)##--(#--/4,#)#(-###(--)4>>7AFDF=EGGECE+6EGGGF##38>EGGG7FGFGF9D#5@E:DCEGGGGDGGGF>EDCECCFGCGFFGG=EECBEFGF).:>7ACDD;0*;)..:CGGGDFFGF08)4)0C*##-#,##--)##-,(#(,/,(#0#(,###(-,)4-)442.-:<:,# +@SRR10391187.4 4 length=300 +NAACACGAGCTGACGACAANNNNNNNCCACCTGTCACTCTGCCCCCGANNNNGACGTCCTATCTCTAGGATTGTCAGAGGACGTCAAGACCNGGNAAGGTTCTNCNNNNNGCTTCGAATTAAACCACATGCTCCACCGCCTGTGCGGGCCCCCGTCAATTCCTNTGAGTTTCAACCTTGCGGTCGTACCCCCCAGGCGGAGTGCTTAATGCGTTTGCTGCAGCACTGAAGGGCGGAAACCCTCCAACACTTAGCCCNCATNGTTTNCGGCNTGGACNCNCCNNNGTTCACATCCTGTTTG ++SRR10391187.4 4 length=300 +#8ACCGGEGGGGGGD7FFG#######::CFFGGEFCFGGGGG9CCECF####::CFFEGEDFGGG7BGGGGGG@F9FGG;#66DCFFGFGECDGF9CCC*:@EE8*6BEGCC5DEC8CCCC4>FGFF+>EEGG+096?44)7347**20CC@9((98C*107D4(2*-4*2)0#--(#(-,8#-(,(#(-1-(#)#(-###(((0().42(47CDFF \ No newline at end of file diff --git a/bio/emu/abundance/wrapper.py b/bio/emu/abundance/wrapper.py new file mode 100644 index 0000000000..28a571f814 --- /dev/null +++ b/bio/emu/abundance/wrapper.py @@ -0,0 +1,31 @@ +__author__ = "Curro Campuzano Jimenez" +__copyright__ = "Copyright 2024, Curro Campuzano Jimenez" +__email__ = "campuzanocurro@gmail.com" +__license__ = "MIT" + + +from snakemake.shell import shell +import tempfile +import os + +log = snakemake.log_fmt_shell(stdout=True, stderr=True) +extra = snakemake.params.get("extra", "") +db = snakemake.input.get("db", "") +if db: + db = f"--db {db}" + +with tempfile.TemporaryDirectory() as tmpdir: + shell( + "emu abundance {snakemake.input.reads} {db}" + " --keep-files --output-dir {tmpdir}" + " --output-basename output --output-unclassified" + " --threads {snakemake.threads}" + " {extra}" + " {log}" + ) + if out_tsv := snakemake.output.get("abundances"): + shell("mv {tmpdir}/output_rel-abundance.tsv {out_tsv}") + if out_sam := snakemake.output.get("alignments"): + shell("mv {tmpdir}/output_emu_alignments.sam {out_sam}") + if out_fa := snakemake.output.get("unclassified"): + shell("mv {tmpdir}/output_unclassified.fa {out_fa}") diff --git a/bio/emu/collapse-taxonomy/environment.linux-64.pin.txt b/bio/emu/collapse-taxonomy/environment.linux-64.pin.txt new file mode 100644 index 0000000000..a22b63bcaf --- /dev/null +++ b/bio/emu/collapse-taxonomy/environment.linux-64.pin.txt @@ -0,0 +1,62 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda#7aca3059a1729aa76c597603f10b0dd3 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h7e041cc_5.conda#f6f6600d18a4047b54f803cf708b868a +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.10-4_cp310.conda#26322ec5d7712c3ded99dd656142b8ce +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_5.conda#d211c42b9ce49aee3734fdc828731689 +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_5.conda#d4ff227c46917d3b4565302a2bbb276b +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.28.1-hd590300_0.conda#dcde58ff9a1f30b0037a2315d1846d1f +https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.18-h0b41bf4_0.conda#6aa9c9de5542ecb07fdda9ca626252d8 +https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-ha4646dd_5.conda#7a6bd7a12a4bd359e2afe6c0fa1acace +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4.20240210-h59595ed_0.conda#97da8860a0da5413c7c98a3b3838a645 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.1-hd590300_1.conda#9d731343cff6ee2e5a25c4a091bf8e2a +https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_5.conda#e73e9cfd1191783392131e6238bdb3e9 +https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.58.0-h47da74e_1.conda#700ac6ea6d53d5510591c4344d5c989a +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.2-h2797004_0.conda#866983a220e27a80cb75e85cb30466a1 +https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589 +https://conda.anaconda.org/bioconda/linux-64/bioawk-1.0-he4a0461_10.tar.bz2#3f4ea155f59ae781753ea76571e8564a +https://conda.anaconda.org/bioconda/linux-64/k8-0.2.5-hdcf5f25_4.tar.bz2#d3c49a96ae45864706037702775ca7c2 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_h413a1c8_0.conda#a356024784da6dfd4683dc5ecf45b155 +https://conda.anaconda.org/conda-forge/linux-64/python-3.10.14-hd12c33a_0_cpython.conda#2b4ba962994e8bd4be9ff5b64b75aff2 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_openblas.conda#1a2a0cd3153464fee6646f3dd6dad9b8 +https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.7.1-hca28451_0.conda#755c7f876815003337d2c61ff5d047e5 +https://conda.anaconda.org/bioconda/linux-64/minimap2-2.28-he4a0461_0.tar.bz2#27d83cfe6bca3eb50aaeb6334371122d +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d +https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.2.0-pyhd8ed1ab_0.conda#da214ecd521a720a9d521c68047682dc +https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a +https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda#0896606848b2dc5cebdf111b6543aa04 +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_openblas.conda#4b31699e0ec5de64d5896e580389c9a1 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_openblas.conda#b083767b6c877e24ee597d93b87ab838 +https://conda.anaconda.org/conda-forge/linux-64/pathlib2-2.3.7.post1-py310hff52083_3.conda#62d26790749f62b9329425c901d93c3a +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/bioconda/linux-64/pysam-0.22.0-py310h41dec4a_1.tar.bz2#19fdb9301a6debbb7fe9836670e3feb7 +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/noarch/flatten-dict-0.4.2-pyhd8ed1ab_1.tar.bz2#ccfb30b92adfeb283d4dcae3d0b6441b +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py310hb13e2d6_0.conda#6593de64c935768b6bad3e19b3e978be +https://conda.anaconda.org/conda-forge/linux-64/biopython-1.83-py310h2372a71_0.conda#0128595946cebfaaf212cc45d4b9cd3c +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.1-py310hcc13569_0.conda#cf5d315e3601a6a2931f63aa9a84dc40 +https://conda.anaconda.org/bioconda/noarch/emu-3.4.5-hdfd78af_0.tar.bz2#34b067c6f82c3796a40e1b0ecaf094d3 diff --git a/bio/emu/collapse-taxonomy/environment.yaml b/bio/emu/collapse-taxonomy/environment.yaml new file mode 100644 index 0000000000..a117676039 --- /dev/null +++ b/bio/emu/collapse-taxonomy/environment.yaml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - emu=3.4.5 \ No newline at end of file diff --git a/bio/emu/collapse-taxonomy/meta.yaml b/bio/emu/collapse-taxonomy/meta.yaml new file mode 100644 index 0000000000..3f56fe88fe --- /dev/null +++ b/bio/emu/collapse-taxonomy/meta.yaml @@ -0,0 +1,12 @@ +name: emu collapse-taxonomy +description: Collapse a TSV output file generated with emu at the desired taxonomic rank. +url: https://github.com/treangenlab/emu +authors: + - Curro Campuzano +input: + - A TSV output file generated with emu. +output: + - A TSV output file collapsed at the desired taxonomic rank. +params: + rank: Accepted ranks are 'species', 'genus', 'family', 'order', 'class', 'phylum' and 'superkingdom'. If ommited, agglomeration will be done at the species level. + diff --git a/bio/emu/collapse-taxonomy/test/Snakefile b/bio/emu/collapse-taxonomy/test/Snakefile new file mode 100644 index 0000000000..91ec4a9d20 --- /dev/null +++ b/bio/emu/collapse-taxonomy/test/Snakefile @@ -0,0 +1,11 @@ +rule collapse_taxonomy: + input: + "full_length_rel-abundance.tsv", + output: + "full_length_rel-abundance_collapsed.tsv", + log: + "logs/emu/full_length_collapsed.log", + params: + rank="genus", + wrapper: + "master/bio/emu/collapse-taxonomy" diff --git a/bio/emu/collapse-taxonomy/test/full_length_rel-abundance.tsv b/bio/emu/collapse-taxonomy/test/full_length_rel-abundance.tsv new file mode 100644 index 0000000000..fd065e577e --- /dev/null +++ b/bio/emu/collapse-taxonomy/test/full_length_rel-abundance.tsv @@ -0,0 +1,3 @@ +tax_id abundance superkingdom phylum class order family genus species estimated counts +1 1.0 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas amygdali; 2.0 +unassigned 0.0 2.0 diff --git a/bio/emu/collapse-taxonomy/wrapper.py b/bio/emu/collapse-taxonomy/wrapper.py new file mode 100644 index 0000000000..b155bd4b8a --- /dev/null +++ b/bio/emu/collapse-taxonomy/wrapper.py @@ -0,0 +1,27 @@ +__author__ = "Curro Campuzano Jimenez" +__copyright__ = "Copyright 2024, Curro Campuzano Jimenez" +__email__ = "campuzanocurro@gmail.com" +__license__ = "MIT" + + +from snakemake.shell import shell +import tempfile +import os + +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + +input_file = snakemake.input[0] +output_file = snakemake.output[0] +rank = snakemake.params.get("rank", "species") + +with tempfile.TemporaryDirectory() as tmpdir: + # Resolve the symbolic link and get the actual path of the input file + input_file_path = os.path.realpath(input_file) + # Create a symlink of the input file in the temporary directory + symlink_path = os.path.join(tmpdir, os.path.basename(input_file_path)) + os.symlink(input_file_path, symlink_path) + shell("emu collapse-taxonomy {symlink_path} {rank} {log}") + # Get the input file name without extension + name = os.path.splitext(os.path.basename(input_file_path))[0] + temp_out = f"{tmpdir}/{name}-{rank}.tsv" # it is always a tsv + shell("mv {temp_out} {output_file}") diff --git a/bio/emu/combine-outputs/environment.linux-64.pin.txt b/bio/emu/combine-outputs/environment.linux-64.pin.txt new file mode 100644 index 0000000000..a22b63bcaf --- /dev/null +++ b/bio/emu/combine-outputs/environment.linux-64.pin.txt @@ -0,0 +1,62 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda#7aca3059a1729aa76c597603f10b0dd3 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h7e041cc_5.conda#f6f6600d18a4047b54f803cf708b868a +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.10-4_cp310.conda#26322ec5d7712c3ded99dd656142b8ce +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_5.conda#d211c42b9ce49aee3734fdc828731689 +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_5.conda#d4ff227c46917d3b4565302a2bbb276b +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.28.1-hd590300_0.conda#dcde58ff9a1f30b0037a2315d1846d1f +https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.18-h0b41bf4_0.conda#6aa9c9de5542ecb07fdda9ca626252d8 +https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-ha4646dd_5.conda#7a6bd7a12a4bd359e2afe6c0fa1acace +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4.20240210-h59595ed_0.conda#97da8860a0da5413c7c98a3b3838a645 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.2.1-hd590300_1.conda#9d731343cff6ee2e5a25c4a091bf8e2a +https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_5.conda#e73e9cfd1191783392131e6238bdb3e9 +https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.58.0-h47da74e_1.conda#700ac6ea6d53d5510591c4344d5c989a +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.2-h2797004_0.conda#866983a220e27a80cb75e85cb30466a1 +https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589 +https://conda.anaconda.org/bioconda/linux-64/bioawk-1.0-he4a0461_10.tar.bz2#3f4ea155f59ae781753ea76571e8564a +https://conda.anaconda.org/bioconda/linux-64/k8-0.2.5-hdcf5f25_4.tar.bz2#d3c49a96ae45864706037702775ca7c2 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_h413a1c8_0.conda#a356024784da6dfd4683dc5ecf45b155 +https://conda.anaconda.org/conda-forge/linux-64/python-3.10.14-hd12c33a_0_cpython.conda#2b4ba962994e8bd4be9ff5b64b75aff2 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_openblas.conda#1a2a0cd3153464fee6646f3dd6dad9b8 +https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.7.1-hca28451_0.conda#755c7f876815003337d2c61ff5d047e5 +https://conda.anaconda.org/bioconda/linux-64/minimap2-2.28-he4a0461_0.tar.bz2#27d83cfe6bca3eb50aaeb6334371122d +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d +https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.2.0-pyhd8ed1ab_0.conda#da214ecd521a720a9d521c68047682dc +https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a +https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda#0896606848b2dc5cebdf111b6543aa04 +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_openblas.conda#4b31699e0ec5de64d5896e580389c9a1 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_openblas.conda#b083767b6c877e24ee597d93b87ab838 +https://conda.anaconda.org/conda-forge/linux-64/pathlib2-2.3.7.post1-py310hff52083_3.conda#62d26790749f62b9329425c901d93c3a +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/bioconda/linux-64/pysam-0.22.0-py310h41dec4a_1.tar.bz2#19fdb9301a6debbb7fe9836670e3feb7 +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/noarch/flatten-dict-0.4.2-pyhd8ed1ab_1.tar.bz2#ccfb30b92adfeb283d4dcae3d0b6441b +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py310hb13e2d6_0.conda#6593de64c935768b6bad3e19b3e978be +https://conda.anaconda.org/conda-forge/linux-64/biopython-1.83-py310h2372a71_0.conda#0128595946cebfaaf212cc45d4b9cd3c +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.1-py310hcc13569_0.conda#cf5d315e3601a6a2931f63aa9a84dc40 +https://conda.anaconda.org/bioconda/noarch/emu-3.4.5-hdfd78af_0.tar.bz2#34b067c6f82c3796a40e1b0ecaf094d3 diff --git a/bio/emu/combine-outputs/environment.yaml b/bio/emu/combine-outputs/environment.yaml new file mode 100644 index 0000000000..a117676039 --- /dev/null +++ b/bio/emu/combine-outputs/environment.yaml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - emu=3.4.5 \ No newline at end of file diff --git a/bio/emu/combine-outputs/meta.yaml b/bio/emu/combine-outputs/meta.yaml new file mode 100644 index 0000000000..3ab6e95d92 --- /dev/null +++ b/bio/emu/combine-outputs/meta.yaml @@ -0,0 +1,15 @@ +name: emu combine-outputs +description: Collapse individual abundance tables TSV into a single TSV at the desired taxonomic rank. +url: https://github.com/treangenlab/emu +authors: + - Curro Campuzano +input: + - A list of TSV files obtained with emu abundance. +output: + - abundances: TSV file containing the abundance of different taxa. + - taxonomy: TSV file containing the taxonomy (optional; otherwise, taxonomy will be included in the abundance table). +params: + rank: Accepted ranks are 'tax_id', 'species', 'genus', 'family', 'order', 'class', 'phylum' and 'superkingdom'. If ommited, no agglomeration will be done (that is, the default is 'tax_id'). + extra: Extra arguments (such as '--counts'). +note: The sample columns in the final table will be each filename without extension. If file ends with "_rel-abundance.tsv", the word '_rel-abundance' will be removed to (for consistency with the program). + diff --git a/bio/emu/combine-outputs/test/Snakefile b/bio/emu/combine-outputs/test/Snakefile new file mode 100644 index 0000000000..65973721a0 --- /dev/null +++ b/bio/emu/combine-outputs/test/Snakefile @@ -0,0 +1,24 @@ +rule combine_outputs: + input: + expand("{sample}_rel-abundance.tsv", sample=["sample1", "sample2"]), + output: + abundances="combined_abundances.tsv", + log: + "logs/emu/combined_abundances.log", + wrapper: + "master/bio/emu/combine-outputs" + + +rule combine_outputs_split: + input: + expand("{sample}_rel-abundance.txt", sample=["sample1", "sample2"]), + output: + abundances="counts.tsv", + taxonomy="taxonomy.tsv", + log: + "logs/emu/combined_split.log", + params: + rank="genus", + extra="--counts", + wrapper: + "master/bio/emu/combine-outputs" diff --git a/bio/emu/combine-outputs/test/sample1_rel-abundance.tsv b/bio/emu/combine-outputs/test/sample1_rel-abundance.tsv new file mode 100644 index 0000000000..fd065e577e --- /dev/null +++ b/bio/emu/combine-outputs/test/sample1_rel-abundance.tsv @@ -0,0 +1,3 @@ +tax_id abundance superkingdom phylum class order family genus species estimated counts +1 1.0 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas amygdali; 2.0 +unassigned 0.0 2.0 diff --git a/bio/emu/combine-outputs/test/sample1_rel-abundance.txt b/bio/emu/combine-outputs/test/sample1_rel-abundance.txt new file mode 120000 index 0000000000..6fb3595c9e --- /dev/null +++ b/bio/emu/combine-outputs/test/sample1_rel-abundance.txt @@ -0,0 +1 @@ +sample1_rel-abundance.tsv \ No newline at end of file diff --git a/bio/emu/combine-outputs/test/sample2_rel-abundance.tsv b/bio/emu/combine-outputs/test/sample2_rel-abundance.tsv new file mode 100644 index 0000000000..61af5e4f0a --- /dev/null +++ b/bio/emu/combine-outputs/test/sample2_rel-abundance.tsv @@ -0,0 +1,3 @@ +tax_id abundance superkingdom phylum class order family genus species +1 1.0 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae Pseudomonas amygdali; +unassigned 0.0 diff --git a/bio/emu/combine-outputs/test/sample2_rel-abundance.txt b/bio/emu/combine-outputs/test/sample2_rel-abundance.txt new file mode 120000 index 0000000000..c74fd5a73f --- /dev/null +++ b/bio/emu/combine-outputs/test/sample2_rel-abundance.txt @@ -0,0 +1 @@ +sample2_rel-abundance.tsv \ No newline at end of file diff --git a/bio/emu/combine-outputs/wrapper.py b/bio/emu/combine-outputs/wrapper.py new file mode 100644 index 0000000000..e1d62ea787 --- /dev/null +++ b/bio/emu/combine-outputs/wrapper.py @@ -0,0 +1,43 @@ +__author__ = "Curro Campuzano Jimenez" +__copyright__ = "Copyright 2024, Curro Campuzano Jimenez" +__email__ = "campuzanocurro@gmail.com" +__license__ = "MIT" + + +from snakemake.shell import shell +import tempfile +import os + +log = snakemake.log_fmt_shell(stdout=True, stderr=True) +extra = snakemake.params.get("extra", "") + +taxonomy = snakemake.output.get("taxonomy", "") +abundances = snakemake.output.get("abundances", "") +if taxonomy and abundances: + split = True + extra += " --split-tables" +else: + split = False + +rank = snakemake.params.get("rank", "tax_id") +counts = "--counts" in extra + + +with tempfile.TemporaryDirectory() as tmpdir: + for infile in snakemake.input: + # Files has to end in tsv, and contain rel_abundances + temp = os.path.join(tmpdir, os.path.basename(infile)) + if not temp.endswith("rel_abundances.tsv"): + temp = os.path.splitext(infile)[0] + "-rel_abundances.tsv" + os.symlink(infile, temp) + shell("emu combine-outputs {tmpdir} {rank} {extra} {log}") + if split and counts: + shell("mv {tmpdir}/emu-combined-taxonomy-{rank}.tsv {taxonomy}") + shell("mv {tmpdir}/emu-combined-abundance-{rank}-counts.tsv {abundances}") + elif split and not counts: + shell("mv {tmpdir}/emu-combined-taxonomy-{rank}.tsv {taxonomy}") + shell("mv {tmpdir}/emu-combined-abundance-{rank}.tsv {abundances}") + elif not split and counts: + shell("mv {tmpdir}/emu-combined-{rank}-counts.tsv {abundances}") + elif not split and not counts: + shell("mv {tmpdir}/emu-combined-{rank}.tsv {abundances}") diff --git a/test.py b/test.py index 87ff793dd5..99858ae0b1 100644 --- a/test.py +++ b/test.py @@ -3547,6 +3547,7 @@ def test_multiqc_a(): ["snakemake", "--cores", "1", "qc/multiqc.a.html", "--use-conda", "-F"], ) + @skip_if_not_modified def test_multiqc_config(): run( @@ -5068,7 +5069,6 @@ def test_gatk_mutect(): "bio/gatk/mutect", ["snakemake", "--cores", "1", "variant_list/a_b.vcf", "--use-conda", "-F"], ) - @skip_if_not_modified @@ -5355,6 +5355,7 @@ def test_ucsc_genepredtobed(): ["snakemake", "--cores", "1", "annotation.bed", "--use-conda", "-F"], ) + @skip_if_not_modified def test_ucsc_fatotwobit(): run( @@ -6171,6 +6172,7 @@ def test_generate_data_matrix(): ["snakemake", "--cores", "1", "--use-conda", "-F"], ) + @skip_if_not_modified def test_rseqc_infer_experiment(): run( @@ -6433,6 +6435,7 @@ def test_sortmerna_se(): ], ) + @skip_if_not_modified def test_tmb_pytmb(): run( @@ -6440,9 +6443,88 @@ def test_tmb_pytmb(): ["snakemake", "--cores", "1", "--use-conda", "-F"], ) + @skip_if_not_modified def test_root_hadd(): run( "phys/root/hadd", ["snakemake", "--cores", "2", "--use-conda", "-F"], ) + + +@skip_if_not_modified +def test_emu_abundance(): + run( + "bio/emu/abundance", + [ + "snakemake", + "--cores", + "1", + "sample_rel-abundance.tsv", + "sample_emu_alignments.sam", + "sample_unclassified.fa", + "--use-conda", + "-F", + ], + ) + + +@skip_if_not_modified +def test_emu_abundance_paired(): + run( + "bio/emu/abundance", + [ + "snakemake", + "--cores", + "1", + "short_read_rel-abundance_paired.tsv", + "--use-conda", + "-F", + ], + ) + + +@skip_if_not_modified +def test_emu_collapse_taxonomy(): + run( + "bio/emu/collapse-taxonomy", + [ + "snakemake", + "--cores", + "1", + "full_length_rel-abundance_collapsed.tsv", + "--use-conda", + "-F", + ], + ) + + +@skip_if_not_modified +def test_emu_combine_output(): + run( + "bio/emu/combine-outputs", + [ + "snakemake", + "--cores", + "1", + "combined_abundances.tsv", + "--use-conda", + "-F", + ], + ) + + +@skip_if_not_modified +def test_emu_combine_output_split(): + run( + "bio/emu/combine-outputs", + [ + "snakemake", + "--cores", + "1", + "counts.tsv", + "taxonomy.tsv", + "--use-conda", + "-F", + ], + )