Skip to content

Commit

Permalink
feat: added tmpdir to all GATK wrappers, plus some doc changes (#449)
Browse files Browse the repository at this point in the history
* Added tmpdir to all GATK wrappers, plus some doc changes

* Changed intervals to allow specification from both input and params

* Adding missing Google Cloud dependencies

* Added OpenJDK 8 to environment

* Fixed typo

* Force retest

* Fixed typos

* Several doc changes and tweaks

* Renamed Snakefiles

* Removed old Snakefile

* Added author
  • Loading branch information
fgvieira committed Feb 21, 2022
1 parent 86a6c11 commit c17266b
Show file tree
Hide file tree
Showing 80 changed files with 516 additions and 435 deletions.
4 changes: 2 additions & 2 deletions bio/gatk/applybqsr/environment.yaml
Expand Up @@ -3,6 +3,6 @@ channels:
- conda-forge
- defaults
dependencies:
- gatk4 ==4.1.4.1
- gatk4 =4.2
- openjdk =8
- snakemake-wrapper-utils ==0.1.3
- snakemake-wrapper-utils =0.3
7 changes: 4 additions & 3 deletions bio/gatk/applybqsr/meta.yaml
Expand Up @@ -5,13 +5,14 @@ authors:
- Christopher Schröder
- Johannes Köster
- Jake VanCampen
- Filipe G. Vieira
input:
- bam file
- fasta reference
- BAM file
- FASTA reference
- recalibration table for the bam
output:
- recalibrated bam file
notes: |
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
* The `extra` param allows for additional program arguments for ApplyBSQR.
* The `extra` param allows for additional program arguments.
* For more information see, https://gatk.broadinstitute.org/hc/en-us/articles/360037055712-ApplyBQSR
10 changes: 5 additions & 5 deletions bio/gatk/applybqsr/test/Snakefile
Expand Up @@ -3,15 +3,15 @@ rule gatk_applybqsr:
bam="mapped/{sample}.bam",
ref="genome.fasta",
dict="genome.dict",
recal_table="recal/{sample}.grp"
recal_table="recal/{sample}.grp",
output:
bam="recal/{sample}.bam"
bam="recal/{sample}.bam",
log:
"logs/gatk/gatk_applybqsr/{sample}.log"
"logs/gatk/gatk_applybqsr/{sample}.log",
params:
extra="", # optional
java_opts="", # optional
java_opts="", # optional
resources:
mem_mb=1024
mem_mb=1024,
wrapper:
"master/bio/gatk/applybqsr"
19 changes: 13 additions & 6 deletions bio/gatk/applybqsr/wrapper.py
Expand Up @@ -4,16 +4,23 @@
__license__ = "MIT"


import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)

log = snakemake.log_fmt_shell(stdout=True, stderr=True, append=True)
shell(
"gatk --java-options '{java_opts}' ApplyBQSR {extra} "
"-R {snakemake.input.ref} -I {snakemake.input.bam} "
"--bqsr-recal-file {snakemake.input.recal_table} "
"-O {snakemake.output.bam} {log}"
)

with tempfile.TemporaryDirectory() as tmpdir:
shell(
"gatk --java-options '{java_opts}' ApplyBQSR"
" --input {snakemake.input.bam}"
" --bqsr-recal-file {snakemake.input.recal_table}"
" --reference {snakemake.input.ref}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output {snakemake.output.bam}"
" {log}"
)
2 changes: 1 addition & 1 deletion bio/gatk/applybqsrspark/environment.yaml
Expand Up @@ -5,4 +5,4 @@ channels:
dependencies:
- gatk4 =4.2
- openjdk =8
- snakemake-wrapper-utils =0.1.3
- snakemake-wrapper-utils =0.3
10 changes: 5 additions & 5 deletions bio/gatk/applybqsrspark/test/Snakefile
Expand Up @@ -3,18 +3,18 @@ rule gatk_applybqsr_spark:
bam="mapped/{sample}.bam",
ref="genome.fasta",
dict="genome.dict",
recal_table="recal/{sample}.grp"
recal_table="recal/{sample}.grp",
output:
bam="recal/{sample}.bam"
bam="recal/{sample}.bam",
log:
"logs/gatk/gatk_applybqsr_spark/{sample}.log"
"logs/gatk/gatk_applybqsr_spark/{sample}.log",
params:
extra="", # optional
java_opts="", # optional
java_opts="", # optional
#spark_runner="", # optional, local by default
#spark_master="", # optional
#spark_extra="", # optional
resources:
mem_mb=1024
mem_mb=1024,
wrapper:
"master/bio/gatk/applybqsrspark"
17 changes: 10 additions & 7 deletions bio/gatk/applybqsrspark/wrapper.py
Expand Up @@ -24,11 +24,14 @@
tmpdir_shards = Path(tmpdir) / "shards_{:06d}".format(random.randrange(10 ** 6))

shell(
"gatk --java-options '{java_opts}' ApplyBQSRSpark {extra} "
"--reference {snakemake.input.ref} --input {snakemake.input.bam} "
"--bqsr-recal-file {snakemake.input.recal_table} "
"--tmp-dir {tmpdir} --output-shard-tmp-dir {tmpdir_shards} "
"--output {snakemake.output.bam} "
"-- --spark-runner {spark_runner} --spark-master {spark_master} {spark_extra} "
"{log}"
"gatk --java-options '{java_opts}' ApplyBQSRSpark"
" --input {snakemake.input.bam}"
" --bqsr-recal-file {snakemake.input.recal_table}"
" --reference {snakemake.input.ref}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output-shard-tmp-dir {tmpdir_shards}"
" --output {snakemake.output.bam}"
" -- --spark-runner {spark_runner} --spark-master {spark_master} {spark_extra}"
" {log}"
)
4 changes: 2 additions & 2 deletions bio/gatk/applyvqsr/environment.yaml
Expand Up @@ -3,5 +3,5 @@ channels:
- conda-forge
- defaults
dependencies:
- gatk4 ==4.2.0.0
- snakemake-wrapper-utils ==0.1.3
- gatk4 =4.2
- snakemake-wrapper-utils =0.3
3 changes: 2 additions & 1 deletion bio/gatk/applyvqsr/meta.yaml
Expand Up @@ -3,13 +3,14 @@ description: |
Run gatk ApplyVQSR.
authors:
- Brett Copeland
- Filipe G. Vieira
input:
- VCF file
- Recalibration file
- Tranches file
output:
- Variant QualityScore-Recalibrated VCF
notes: |
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-Xmx4G" for one, and "-Xmx4G -XX:ParallelGCThreads=10" for two options.
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
* The `extra` param allows for additional program arguments.
* For more information, see https://gatk.broadinstitute.org/hc/en-us/articles/360037226332-ApplyVQSR
16 changes: 8 additions & 8 deletions bio/gatk/applyvqsr/test/Snakefile
@@ -1,17 +1,17 @@
rule apply_vqsr:
input:
vcf="test.vcf",
recal="snps.recal",
tranches="snps.tranches",
ref="ref.fasta"
recal="snps.recal",
tranches="snps.tranches",
ref="ref.fasta",
output:
vcf="test.snp_recal.vcf"
vcf="test.snp_recal.vcf",
log:
"logs/gatk/applyvqsr.log"
"logs/gatk/applyvqsr.log",
params:
mode="SNP", # set mode, must be either SNP, INDEL or BOTH
extra="" # optional
extra="", # optional
resources:
mem_mb=50
mem_mb=50,
wrapper:
"master/bio/gatk/applyvqsr"
"master/bio/gatk/applyvqsr"
25 changes: 15 additions & 10 deletions bio/gatk/applyvqsr/wrapper.py
Expand Up @@ -5,20 +5,25 @@


import os

import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts


extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
shell(
"gatk --java-options '{java_opts}' ApplyVQSR {extra} "
"-R {snakemake.input.ref} -V {snakemake.input.vcf} "
"--recal-file {snakemake.input.recal} "
"--tranches-file {snakemake.input.tranches} "
"-mode {snakemake.params.mode} "
"--output {snakemake.output.vcf} "
"{log}"
)

with tempfile.TemporaryDirectory() as tmpdir:
shell(
"gatk --java-options '{java_opts}' ApplyVQSR"
" --variant {snakemake.input.vcf}"
" --recal-file {snakemake.input.recal}"
" --reference {snakemake.input.ref}"
" --tranches-file {snakemake.input.tranches}"
" --mode {snakemake.params.mode}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output {snakemake.output.vcf}"
" {log}"
)
4 changes: 2 additions & 2 deletions bio/gatk/baserecalibrator/environment.yaml
Expand Up @@ -3,6 +3,6 @@ channels:
- conda-forge
- defaults
dependencies:
- gatk4 ==4.1.4.1
- gatk4 =4.2
- openjdk =8
- snakemake-wrapper-utils ==0.1.3
- snakemake-wrapper-utils =0.3
3 changes: 2 additions & 1 deletion bio/gatk/baserecalibrator/meta.yaml
Expand Up @@ -5,14 +5,15 @@ authors:
- Christopher Schröder
- Johannes Köster
- Jake VanCampen
- Filipe G. Vieira
input:
- bam file
- fasta reference
- vcf.gz of known variants
output:
- recalibration table for the bam
notes: |
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-Xmx4G" for one, and "-Xmx4G -XX:ParallelGCThreads=10" for two options.
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
* The `extra` param allows for additional program arguments.
* For more information see, https://software.broadinstitute.org/gatk/documentation/article?id=11050
14 changes: 5 additions & 9 deletions bio/gatk/baserecalibrator/test/Snakefile
Expand Up @@ -3,19 +3,15 @@ rule gatk_baserecalibrator:
bam="mapped/{sample}.bam",
ref="genome.fasta",
dict="genome.dict",
known="dbsnp.vcf.gz" # optional known sites - single or a list
known="dbsnp.vcf.gz", # optional known sites - single or a list
output:
recal_table="recal/{sample}.grp"
recal_table="recal/{sample}.grp",
log:
"logs/gatk/baserecalibrator/{sample}.log"
"logs/gatk/baserecalibrator/{sample}.log",
params:
extra="", # optional
java_opts="", # optional
# optional specification of memory usage of the JVM that snakemake will respect with global
# resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
# and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
# https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
java_opts="", # optional
resources:
mem_mb=1024
mem_mb=1024,
wrapper:
"master/bio/gatk/baserecalibrator"
17 changes: 12 additions & 5 deletions bio/gatk/baserecalibrator/wrapper.py
Expand Up @@ -4,6 +4,7 @@
__license__ = "MIT"


import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

Expand All @@ -17,8 +18,14 @@
known = [known]
known = list(map("--known-sites {}".format, known))

shell(
"gatk --java-options '{java_opts}' BaseRecalibrator {extra} "
"-R {snakemake.input.ref} -I {snakemake.input.bam} "
"-O {snakemake.output.recal_table} {known} {log}"
)
with tempfile.TemporaryDirectory() as tmpdir:
shell(
"gatk --java-options '{java_opts}' BaseRecalibrator"
" --input {snakemake.input.bam}"
" --reference {snakemake.input.ref}"
" {known}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output {snakemake.output.recal_table}"
" {log}"
)
2 changes: 1 addition & 1 deletion bio/gatk/baserecalibratorspark/environment.yaml
Expand Up @@ -5,4 +5,4 @@ channels:
dependencies:
- gatk4 =4.2
- openjdk =8
- snakemake-wrapper-utils =0.1.3
- snakemake-wrapper-utils =0.3
1 change: 1 addition & 0 deletions bio/gatk/baserecalibratorspark/meta.yaml
Expand Up @@ -5,6 +5,7 @@ authors:
- Christopher Schröder
- Johannes Köster
- Jake VanCampen
- Filipe G. Vieira
input:
- bam file
- fasta reference
Expand Down
10 changes: 5 additions & 5 deletions bio/gatk/baserecalibratorspark/test/Snakefile
Expand Up @@ -3,19 +3,19 @@ rule gatk_baserecalibratorspark:
bam="mapped/{sample}.bam",
ref="genome.fasta",
dict="genome.dict",
known="dbsnp.vcf.gz" # optional known sites
known="dbsnp.vcf.gz", # optional known sites
output:
recal_table="recal/{sample}.grp"
recal_table="recal/{sample}.grp",
log:
"logs/gatk/baserecalibrator/{sample}.log"
"logs/gatk/baserecalibrator/{sample}.log",
params:
extra="", # optional
java_opts="", # optional
java_opts="", # optional
#spark_runner="", # optional, local by default
#spark_master="", # optional
#spark_extra="", # optional
resources:
mem_mb=1024
mem_mb=1024,
threads: 8
wrapper:
"master/bio/gatk/baserecalibratorspark"
22 changes: 11 additions & 11 deletions bio/gatk/baserecalibratorspark/wrapper.py
Expand Up @@ -4,7 +4,6 @@
__license__ = "MIT"

import tempfile

from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

Expand All @@ -16,18 +15,19 @@
spark_extra = snakemake.params.get("spark_extra", "")
java_opts = get_java_opts(snakemake)

tmpdir = tempfile.gettempdir()

log = snakemake.log_fmt_shell(stdout=True, stderr=True)
known = snakemake.input.get("known", "")
if known:
known = "--known-sites {}".format(known)

shell(
"gatk --java-options '{java_opts}' BaseRecalibratorSpark {extra} "
"-R {snakemake.input.ref} -I {snakemake.input.bam} "
"--output {snakemake.output.recal_table} {known} "
"--tmp-dir {tmpdir} "
"-- --spark-runner {spark_runner} --spark-master {spark_master} {spark_extra} "
"{log}"
)
with tempfile.TemporaryDirectory() as tmpdir:
shell(
"gatk --java-options '{java_opts}' BaseRecalibratorSpark"
" --input {snakemake.input.bam}"
" --reference {snakemake.input.ref}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output {snakemake.output.recal_table} {known}"
" -- --spark-runner {spark_runner} --spark-master {spark_master} {spark_extra}"
" {log}"
)
4 changes: 2 additions & 2 deletions bio/gatk/cleansam/environment.yaml
Expand Up @@ -3,5 +3,5 @@ channels:
- conda-forge
- defaults
dependencies:
- gatk4 ==4.2.0.0
- snakemake-wrapper-utils ==0.1.3
- gatk4 =4.2
- snakemake-wrapper-utils =0.3
8 changes: 4 additions & 4 deletions bio/gatk/cleansam/test/Snakefile
@@ -1,13 +1,13 @@
rule gatk_clean_sam:
input:
bam="{sample}.bam"
bam="{sample}.bam",
output:
clean="{sample}.clean.bam"
clean="{sample}.clean.bam",
log:
"logs/{sample}.log"
"logs/{sample}.log",
params:
extra="",
java_opts="", # optional
java_opts="", # optional
resources:
mem_mb=1024,
wrapper:
Expand Down

0 comments on commit c17266b

Please sign in to comment.