Skip to content

Commit

Permalink
feat: added GATK variantAnnotator wrapper (#462)
Browse files Browse the repository at this point in the history
* Added GATK variantAnnotator wrapper

* BGziped VCF

* Fixed typo

* Added intervals to example Snakefile
  • Loading branch information
fgvieira committed Mar 16, 2022
1 parent 429f943 commit 3ace5fa
Show file tree
Hide file tree
Showing 14 changed files with 121 additions and 1 deletion.
7 changes: 7 additions & 0 deletions bio/gatk/variantannotator/environment.yaml
@@ -0,0 +1,7 @@
channels:
- bioconda
- conda-forge
- defaults
dependencies:
- gatk4 =4.2
- snakemake-wrapper-utils =0.3
16 changes: 16 additions & 0 deletions bio/gatk/variantannotator/meta.yaml
@@ -0,0 +1,16 @@
name: gatk VariantAnnotator
description: |
Run gatk VariantAnnotator.
authors:
- Filipe G. Vieira
input:
- VCF file
- BAM file
- reference genome
- VCF of known variation
output:
- annotated VCF file
notes: |
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
* The `extra` param allows for additional program arguments.
* For more information see, https://gatk.broadinstitute.org/hc/en-us/articles/4418054223003-VariantAnnotator
18 changes: 18 additions & 0 deletions bio/gatk/variantannotator/test/Snakefile
@@ -0,0 +1,18 @@
rule gatk_annotator:
input:
vcf="calls/snvs.vcf.gz",
aln="mapped/a.bam",
ref="genome.fasta",
db="calls/snvs.vcf.gz",
# intervals="targets.bed",
output:
vcf="snvs.annot.vcf",
log:
"logs/gatk/annotator/snvs.log",
params:
extra="--resource-allele-concordance -A Coverage --expression db.END",
java_opts="", # optional
resources:
mem_mb=1024,
wrapper:
"master/bio/gatk/variantannotator"
Binary file added bio/gatk/variantannotator/test/calls/snvs.vcf.gz
Binary file not shown.
Binary file added bio/gatk/variantannotator/test/calls/snvs.vcf.gz.tbi
Binary file not shown.
3 changes: 3 additions & 0 deletions bio/gatk/variantannotator/test/genome.dict
@@ -0,0 +1,3 @@
@HD VN:1.5
@SQ SN:ref LN:45 M5:7a66cae8ab14aef8d635bc80649e730b UR:file:/home/johannes/scms/snakemake-wrappers/bio/picard/createsequencedictionary/test/genome.fasta
@SQ SN:ref2 LN:40 M5:1636753510ec27476fdd109a6684680e UR:file:/home/johannes/scms/snakemake-wrappers/bio/picard/createsequencedictionary/test/genome.fasta
4 changes: 4 additions & 0 deletions bio/gatk/variantannotator/test/genome.fasta
@@ -0,0 +1,4 @@
>ref
AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT
>ref2
aggttttataaaacaattaagtctacagagcaactacgcg
2 changes: 2 additions & 0 deletions bio/gatk/variantannotator/test/genome.fasta.fai
@@ -0,0 +1,2 @@
ref 45 5 45 46
ref2 40 57 40 41
Binary file added bio/gatk/variantannotator/test/mapped/a.bam
Binary file not shown.
Binary file added bio/gatk/variantannotator/test/mapped/a.bam.bai
Binary file not shown.
52 changes: 52 additions & 0 deletions bio/gatk/variantannotator/wrapper.py
@@ -0,0 +1,52 @@
__author__ = "Filipe G. Vieira"
__copyright__ = "Copyright 2022, Filipe G. Vieira"
__license__ = "MIT"

import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)
log = snakemake.log_fmt_shell(stdout=True, stderr=True)


input = snakemake.input.get("aln", "")
if input:
input = f"--input {input}"

reference = snakemake.input.get("ref", "")
if reference:
reference = f"--reference {reference}"

dbsnp = snakemake.input.get("known", "")
if dbsnp:
dbsnp = f"--dbsnp {dbsnp}"

intervals = snakemake.input.get("intervals", "")
if not intervals:
intervals = snakemake.params.get("intervals", "")
if intervals:
intervals = "--intervals {}".format(intervals)

resources = [
f"--resource:{name} {file}"
for name, file in snakemake.input.items()
if name not in ["vcf", "aln", "ref", "known", "intervals"]
]


with tempfile.TemporaryDirectory() as tmpdir:
shell(
"gatk --java-options '{java_opts}' VariantAnnotator"
" --variant {snakemake.input.vcf}"
" {input}"
" {reference}"
" {dbsnp}"
" {intervals}"
" {resources}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output {snakemake.output.vcf}"
" {log}"
)
1 change: 1 addition & 0 deletions bio/gatk/variantfiltration/test/Snakefile
Expand Up @@ -2,6 +2,7 @@ rule gatk_filter:
input:
vcf="calls/snvs.vcf",
ref="genome.fasta",
# intervals="targets.bed",
output:
vcf="calls/snvs.filtered.vcf",
log:
Expand Down
11 changes: 10 additions & 1 deletion bio/gatk/variantfiltration/wrapper.py
Expand Up @@ -9,20 +9,29 @@

extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)
log = snakemake.log_fmt_shell(stdout=True, stderr=True)


filters = [
"--filter-name {} --filter-expression '{}'".format(name, expr.replace("'", "\\'"))
for name, expr in snakemake.params.filters.items()
]

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

intervals = snakemake.input.get("intervals", "")
if not intervals:
intervals = snakemake.params.get("intervals", "")
if intervals:
intervals = "--intervals {}".format(intervals)


with tempfile.TemporaryDirectory() as tmpdir:
shell(
"gatk --java-options '{java_opts}' VariantFiltration"
" --variant {snakemake.input.vcf}"
" --reference {snakemake.input.ref}"
" {filters}"
" {intervals}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output {snakemake.output.vcf}"
Expand Down
8 changes: 8 additions & 0 deletions test.py
Expand Up @@ -3133,6 +3133,14 @@ def test_gatk_selectvariants():
)


@skip_if_not_modified
def test_gatk_variantannotator():
run(
"bio/gatk/variantannotator",
["snakemake", "--cores", "1", "snvs.annot.vcf", "--use-conda", "-F"],
)


@skip_if_not_modified
def test_gatk_variantfiltration():
run(
Expand Down

0 comments on commit 3ace5fa

Please sign in to comment.