From 754dfc1019a7eb52642e3f3e91c3df9da8df3495 Mon Sep 17 00:00:00 2001 From: "Filipe G. Vieira" Date: Tue, 16 Aug 2022 15:58:21 +0200 Subject: [PATCH] feat: Quast wrapper (#525) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added quast wrapper * Fixed typos * Added more read options * Updated URL * Update bio/quast/environment.yaml Co-authored-by: Johannes Köster --- bio/quast/environment.yaml | 6 +++ bio/quast/meta.yaml | 31 +++++++++++++++ bio/quast/test/Snakefile | 48 ++++++++++++++++++++++++ bio/quast/test/genome.fasta | 2 + bio/quast/wrapper.py | 75 +++++++++++++++++++++++++++++++++++++ test.py | 9 +++++ 6 files changed, 171 insertions(+) create mode 100644 bio/quast/environment.yaml create mode 100644 bio/quast/meta.yaml create mode 100644 bio/quast/test/Snakefile create mode 100644 bio/quast/test/genome.fasta create mode 100644 bio/quast/wrapper.py diff --git a/bio/quast/environment.yaml b/bio/quast/environment.yaml new file mode 100644 index 0000000000..3e219c289a --- /dev/null +++ b/bio/quast/environment.yaml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - quast =5.2 diff --git a/bio/quast/meta.yaml b/bio/quast/meta.yaml new file mode 100644 index 0000000000..7d49b8a811 --- /dev/null +++ b/bio/quast/meta.yaml @@ -0,0 +1,31 @@ +name: QuAsT +description: | + Quality Assessment Tool for Genome Assemblies +url: https://github.com/ablab/quast +authors: + - Filipe G. Vieira +input: + - Sequences in FASTA format + - Reference genome (optional) + - GFF (optional) + - Paired end read (optional) + - Mate-pair reads (optional) + - Unpaired reads (optional) + - PacBio SMRT reads (optional) + - Oxford Nanopore reads (optional) + - Mapped reads against the reference in SAM/BAM (optional) + - Mapped reads against each of the assemblies in SAM/BAM (same order; optional) + - Structural variants in BEDPE (optional) +output: + - Assessment summary in plain text format + - Tab-separated version of the summary + - LaTeX version of the summary + - Icarus main menu with links to interactive viewers + - PDF report of all plots combined with all tables + - HTML version of the report with interactive plots inside + - Report on misassemblies + - Report on unaligned and partially unaligned contigs + - Report on k-mer-based metrics + - Report on mapped reads statistics. +notes: | + * The `extra` param allows for additional program arguments. diff --git a/bio/quast/test/Snakefile b/bio/quast/test/Snakefile new file mode 100644 index 0000000000..41276ccf12 --- /dev/null +++ b/bio/quast/test/Snakefile @@ -0,0 +1,48 @@ +rule quast: + input: + fasta="genome.fasta", + ref="genome.fasta", + #gff="annotations.gff", + #pe1="reads_R1.fastq", + #pe2="reads_R2.fastq", + #pe12="reads.fastq", + #mp1="matereads_R1.fastq", + #mp2="matereads_R2.fastq", + #mp12="matereads.fastq", + #single="single.fastq", + #pacbio="pacbio.fas", + #nanopore="nanopore.fastq", + #ref_bam="ref.bam", + #ref_sam="ref.sam", + #bam=["s1.bam","s2.bam"], + #sam=["s1.sam","s2.sam"], + #sv_bedpe="sv.bed", + output: + multiext("{sample}/report.", "html", "tex", "txt", "pdf", "tsv"), + multiext("{sample}/transposed_report.", "tex", "txt", "tsv"), + multiext( + "{sample}/basic_stats/", + "cumulative_plot.pdf", + "GC_content_plot.pdf", + "gc.icarus.txt", + "genome_GC_content_plot.pdf", + "NGx_plot.pdf", + "Nx_plot.pdf", + ), + multiext( + "{sample}/contigs_reports/", + "all_alignments_genome.tsv", + "contigs_report_genome.mis_contigs.info", + "contigs_report_genome.stderr", + "contigs_report_genome.stdout", + ), + "{sample}/contigs_reports/minimap_output/genome.coords_tmp", + "{sample}/icarus.html", + "{sample}/icarus_viewers/contig_size_viewer.html", + "{sample}/quast.log", + log: + "logs/{sample}.quast.log", + params: + extra="--min-contig 5 --min-identity 95.0", + wrapper: + "master/bio/quast" diff --git a/bio/quast/test/genome.fasta b/bio/quast/test/genome.fasta new file mode 100644 index 0000000000..11d25dda64 --- /dev/null +++ b/bio/quast/test/genome.fasta @@ -0,0 +1,2 @@ +>Sheila +GCTAGCTCAGAAAAAAAAAA diff --git a/bio/quast/wrapper.py b/bio/quast/wrapper.py new file mode 100644 index 0000000000..6f28742b56 --- /dev/null +++ b/bio/quast/wrapper.py @@ -0,0 +1,75 @@ +__author__ = "Filipe G. Vieira" +__copyright__ = "Copyright 2022, Filipe G. Vieira" +__license__ = "MIT" + + +import os +from snakemake.shell import shell + + +extra = snakemake.params.get("extra", "") +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + + +ref = snakemake.input.get("ref", "") +if ref: + ref = f"-r {ref}" + +gff = snakemake.input.get("gff", "") +if gff: + gff = f"--features {gff}" + +pe1 = snakemake.input.get("pe1", "") +if pe1: + pe1 = f"--pe1 {pe1}" +pe2 = snakemake.input.get("pe2", "") +if pe2: + pe2 = f"--pe2 {pe2}" +pe12 = snakemake.input.get("pe12", "") +if pe12: + pe12 = f"--pe12 {pe12}" +mp1 = snakemake.input.get("mp1", "") +if mp1: + mp1 = f"--mp1 {mp1}" +mp2 = snakemake.input.get("mp2", "") +if mp2: + mp2 = f"--mp2 {mp2}" +mp12 = snakemake.input.get("mp12", "") +if mp12: + mp12 = f"--mp12 {mp12}" +single = snakemake.input.get("single", "") +if single: + single = f"--single {single}" +pacbio = snakemake.input.get("pacbio", "") +if pacbio: + pacbio = f"--pacbio {pacbio}" +nanopore = snakemake.input.get("nanopore", "") +if nanopore: + nanopore = f"--nanopore {nanopore}" +ref_bam = snakemake.input.get("ref_bam", "") +if ref_bam: + ref_bam = f"--ref-bam {ref_bam}" +ref_sam = snakemake.input.get("ref_sam", "") +if ref_sam: + ref_sam = f"--ref-sam {ref_sam}" +bam = snakemake.input.get("bam", "") +if bam: + if isinstance(bam, list): + bam = ",".join(bam) + bam = f"--bam {bam}" +sam = snakemake.input.get("sam", "") +if sam: + if isinstance(sam, list): + sam = ",".join(sam) + sam = f"--sam {sam}" +sv_bedpe = snakemake.input.get("sv_bedpe", "") +if sv_bedpe: + sv_bedpe = f"--sv-bedpe {sv_bedpe}" + + +output_dir = os.path.commonpath(snakemake.output) + + +shell( + "quast --threads {snakemake.threads} {ref} {gff} {pe1} {pe2} {pe12} {mp1} {mp2} {mp12} {single} {pacbio} {nanopore} {ref_bam} {ref_sam} {bam} {sam} {sv_bedpe} {extra} -o {output_dir} {snakemake.input.fasta} {log}" +) diff --git a/test.py b/test.py index 9d6fb8ce68..9313958599 100644 --- a/test.py +++ b/test.py @@ -139,6 +139,15 @@ def run(wrapper, cmd, check_log=None): os.chdir(origdir) + +@skip_if_not_modified +def test_quast(): + run( + "bio/quast", + ["snakemake", "--cores", "1", "a/quast.log", "--use-conda", "-F"], + ) + + @skip_if_not_modified def test_gfatools(): run(