diff --git a/bio/gatk/filtermutectcalls/meta.yaml b/bio/gatk/filtermutectcalls/meta.yaml index d4958046ba..20f5e4cbaa 100644 --- a/bio/gatk/filtermutectcalls/meta.yaml +++ b/bio/gatk/filtermutectcalls/meta.yaml @@ -1,14 +1,22 @@ name: gatk FilterMutectCalls +url: https://gatk.broadinstitute.org/hc/en-us/articles/360042476952-FilterMutectCalls description: | - Run gatk FilterMutectCalls. + Run gatk FilterMutectCalls to filter variants in a Mutect2 VCF callset. authors: - Patrik Smeds - Filipe G. Vieira + - Thibault Dayris input: - - vcf file - - reference genome + - vcf: Path to vcf file (pbgzipped, indexed) + - ref: Path to reference genome (with .dict file alongside) + - aln: Optional path to SAM/BAM/CRAM files + - contamination: Optional path to + - segmentation: Optional path to tumor segments + - f1r2: Optional path to prior artefact (tar.gz2) + - intervels: Optional file to BED intervals output: - - filtered vcf file + - vcf: filtered vcf file + - stats: Optional stats from Mutect2 notes: | * The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically). * The `extra` param allows for additional program arguments. diff --git a/bio/gatk/filtermutectcalls/test/Snakefile b/bio/gatk/filtermutectcalls/test/Snakefile index 4641ea33c2..2dd4ab929d 100644 --- a/bio/gatk/filtermutectcalls/test/Snakefile +++ b/bio/gatk/filtermutectcalls/test/Snakefile @@ -13,3 +13,25 @@ rule gatk_filtermutectcalls: mem_mb=1024, wrapper: "master/bio/gatk/filtermutectcalls" + + +rule gatk_filtermutectcalls_complete: + input: + vcf="calls/snvs.vcf", + ref="genome.fasta", + bam="mapped/a.bam", + intervals="intervals.bed", + # contamination="", # from gatk CalculateContamination + # segmentation="", # from gatk CalculateContamination + # f1r2="", # from gatk LearnReadOrientationBias + output: + vcf="calls/snvs.mutect.filtered.b.vcf", + log: + "logs/gatk/filter/snvs.log", + params: + extra="--max-alt-allele-count 3", # optional arguments, see GATK docs + java_opts="", # optional + resources: + mem_mb=1024, + wrapper: + "master/bio/gatk/filtermutectcalls" diff --git a/bio/gatk/filtermutectcalls/test/intervals.bed b/bio/gatk/filtermutectcalls/test/intervals.bed new file mode 100644 index 0000000000..df100f631f --- /dev/null +++ b/bio/gatk/filtermutectcalls/test/intervals.bed @@ -0,0 +1,2 @@ +ref 3 44 +ref2 8 31 diff --git a/bio/gatk/filtermutectcalls/test/mapped/a.bam b/bio/gatk/filtermutectcalls/test/mapped/a.bam new file mode 100644 index 0000000000..a407ae2040 Binary files /dev/null and b/bio/gatk/filtermutectcalls/test/mapped/a.bam differ diff --git a/bio/gatk/filtermutectcalls/test/mapped/a.bam.bai b/bio/gatk/filtermutectcalls/test/mapped/a.bam.bai new file mode 100644 index 0000000000..a9b9979fed Binary files /dev/null and b/bio/gatk/filtermutectcalls/test/mapped/a.bam.bai differ diff --git a/bio/gatk/filtermutectcalls/wrapper.py b/bio/gatk/filtermutectcalls/wrapper.py index 418b1854a3..881e8418ff 100644 --- a/bio/gatk/filtermutectcalls/wrapper.py +++ b/bio/gatk/filtermutectcalls/wrapper.py @@ -12,11 +12,37 @@ log = snakemake.log_fmt_shell(stdout=True, stderr=True) + +aln = snakemake.input.get("aln", "") +if aln: + aln = f"--input {aln}" + +contamination = snakemake.input.get("contemination_table", "") +if contamination: + contamination = f"--contamination-table {contamination}" + +segmentation = snakemake.input.get("segmentation", "") +if segmentation: + segmentation = f"--tumor-segmentation {segmentation}" + +f1r2 = snakemake.input.get("f1r2", "") +if f1r2: + f1r2 = f"--orientation-bias-artifact-priors {f1r2}" + +intervals = snakemake.input.get("bed", "") +if intervals: + intervals = f"--intervals {intervals}" + with tempfile.TemporaryDirectory() as tmpdir: shell( "gatk --java-options '{java_opts}' FilterMutectCalls" " --variant {snakemake.input.vcf}" " --reference {snakemake.input.ref}" + " {aln}" # BAM/SAM/CRAM file containing reads + " {contamination}" # Tables containing contamination information + " {segmentation}" # Tumor segments' minor allele fractions + " {f1r2}" # .tar.gz files containing tables of prior artifact + " {intervals}" # Genomic intervals over which to operate " {extra}" " --tmp-dir {tmpdir}" " --output {snakemake.output.vcf}" diff --git a/test.py b/test.py index 335ebb86b8..d03704d8a2 100644 --- a/test.py +++ b/test.py @@ -3290,6 +3290,18 @@ def test_gatk_filtermutectcalls(): ], ) + run( + "bio/gatk/filtermutectcalls", + [ + "snakemake", + "--cores", + "1", + "calls/snvs.mutect.filtered.b.vcf", + "--use-conda", + "-F", + ], + ) + @skip_if_not_modified def test_gatk_selectvariants():