Skip to content

Commit

Permalink
feat: add cram support to applybsqr (#563)
Browse files Browse the repository at this point in the history
* add cram support to applybsqr

* reference is 'ref' in input, cram support in spark

* fix test config

* cram output in tests, not input

* added pipe symbol

* wrapper path wrong

* more parameters in snakefile for spark
  • Loading branch information
christopher-schroeder committed Oct 11, 2022
1 parent 76174cf commit 41bb50b
Show file tree
Hide file tree
Showing 9 changed files with 92 additions and 8 deletions.
1 change: 1 addition & 0 deletions bio/gatk/applybqsr/environment.yaml
Expand Up @@ -6,3 +6,4 @@ dependencies:
- gatk4 =4.2
- openjdk =8
- snakemake-wrapper-utils =0.3
- samtools =1.16
1 change: 1 addition & 0 deletions bio/gatk/applybqsr/test/Snakefile
Expand Up @@ -11,6 +11,7 @@ rule gatk_applybqsr:
params:
extra="", # optional
java_opts="", # optional
embed_ref=True, # embed the reference in cram output
resources:
mem_mb=1024,
wrapper:
Expand Down
18 changes: 18 additions & 0 deletions bio/gatk/applybqsr/test/Snakefile_cram
@@ -0,0 +1,18 @@
rule gatk_applybqsr:
input:
bam="mapped/{sample}.bam",
ref="genome.fasta",
dict="genome.dict",
recal_table="recal/{sample}.grp",
output:
bam="recal/{sample}.cram",
log:
"logs/gatk/gatk_applybqsr/{sample}.log",
params:
extra="", # optional
java_opts="", # optional
embed_ref=True, # embed the reference in cram output
resources:
mem_mb=1024,
wrapper:
"master/bio/gatk/applybqsr"
17 changes: 13 additions & 4 deletions bio/gatk/applybqsr/wrapper.py
Expand Up @@ -8,19 +8,28 @@
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

# Extract arguments
extra = snakemake.params.get("extra", "")
reference = snakemake.input.get("ref")
embed_ref = snakemake.params.get("embed_ref", False)
java_opts = get_java_opts(snakemake)

log = snakemake.log_fmt_shell(stdout=True, stderr=True, append=True)

if snakemake.output.bam.endswith(".cram") and embed_ref:
output = "/dev/stdout"
pipe_cmd = " | samtools view -h -O cram,embed_ref -T {reference} -o {snakemake.output.bam} -"
else:
output = snakemake.output.bam
pipe_cmd = ""

with tempfile.TemporaryDirectory() as tmpdir:
shell(
"gatk --java-options '{java_opts}' ApplyBQSR"
"(gatk --java-options '{java_opts}' ApplyBQSR"
" --input {snakemake.input.bam}"
" --bqsr-recal-file {snakemake.input.recal_table}"
" --reference {snakemake.input.ref}"
" --reference {reference}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output {snakemake.output.bam}"
" {log}"
" --output {output}" + pipe_cmd + ") {log}"
)
1 change: 1 addition & 0 deletions bio/gatk/applybqsrspark/environment.yaml
Expand Up @@ -6,3 +6,4 @@ dependencies:
- gatk4 =4.2
- openjdk =8
- snakemake-wrapper-utils =0.3
- samtools =1.16
2 changes: 2 additions & 0 deletions bio/gatk/applybqsrspark/test/Snakefile
Expand Up @@ -14,6 +14,8 @@ rule gatk_applybqsr_spark:
#spark_runner="", # optional, local by default
#spark_master="", # optional
#spark_extra="", # optional
embed_ref=True, # embed reference in cram output
exceed_thread_limit=True, # samtools is also parallized and thread limit is not guaranteed anymore
resources:
mem_mb=1024,
wrapper:
Expand Down
22 changes: 22 additions & 0 deletions bio/gatk/applybqsrspark/test/Snakefile_cram
@@ -0,0 +1,22 @@
rule gatk_applybqsr_spark:
input:
bam="mapped/{sample}.bam",
ref="genome.fasta",
dict="genome.dict",
recal_table="recal/{sample}.grp",
output:
bam="recal/{sample}.cram",
log:
"logs/gatk/gatk_applybqsr_spark/{sample}.log",
params:
extra="", # optional
java_opts="", # optional
#spark_runner="", # optional, local by default
#spark_master="", # optional
#spark_extra="", # optional
embed_ref=True, # embed reference in cram output
exceed_thread_limit=True, # samtools is also parallized
resources:
mem_mb=1024,
wrapper:
"master/bio/gatk/applybqsrspark"
25 changes: 21 additions & 4 deletions bio/gatk/applybqsrspark/wrapper.py
@@ -1,4 +1,4 @@
__author__ = "Filipe G. Vieira"
__author__ = "Filipe G. Vieira, Christopher Schröder"
__copyright__ = "Copyright 2021, Filipe G. Vieira"
__license__ = "MIT"

Expand All @@ -15,23 +15,40 @@
"spark_master", "local[{}]".format(snakemake.threads)
)
spark_extra = snakemake.params.get("spark_extra", "")
reference = snakemake.input.get("ref")
embed_ref = snakemake.params.get("embed_ref", False)
exceed_thread_limit = snakemake.params.get("exceed_thread_limit", False)
java_opts = get_java_opts(snakemake)

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

if exceed_thread_limit:
samtools_threads = snakemake.threads
else:
samtools_threads = 1

if snakemake.output.bam.endswith(".cram") and embed_ref:
output = "/dev/stdout --create-output-bam-splitting-index false"
pipe_cmd = " | samtools view -h -O cram,embed_ref -T {reference} -o {snakemake.output.bam} -@ {samtools_threads} -"
else:
output = snakemake.output.bam
pipe_cmd = ""


with tempfile.TemporaryDirectory() as tmpdir:
# This folder must not exist; it is created by GATK
tmpdir_shards = Path(tmpdir) / "shards_{:06d}".format(random.randrange(10**6))

shell(
"gatk --java-options '{java_opts}' ApplyBQSRSpark"
"(gatk --java-options '{java_opts}' ApplyBQSRSpark"
" --input {snakemake.input.bam}"
" --bqsr-recal-file {snakemake.input.recal_table}"
" --reference {snakemake.input.ref}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output-shard-tmp-dir {tmpdir_shards}"
" --output {snakemake.output.bam}"
" --output {output}"
" -- --spark-runner {spark_runner} --spark-master {spark_master} {spark_extra}"
" {log}"
+ pipe_cmd
+ ") {log}"
)
13 changes: 13 additions & 0 deletions test.py
Expand Up @@ -3617,6 +3617,13 @@ def test_gatk_applybqsr():
["snakemake", "--cores", "1", "recal/a.bam", "--use-conda", "-F"],
)

@skip_if_not_modified
def test_gatk_applybqsr_cram():
run(
"bio/gatk/applybqsr",
["snakemake", "-s", "Snakefile_cram", "--cores", "1", "recal/a.cram", "--use-conda", "-F"],
)


@skip_if_not_modified
def test_gatk_applybqsrspark():
Expand All @@ -3625,6 +3632,12 @@ def test_gatk_applybqsrspark():
["snakemake", "--cores", "1", "recal/a.bam", "--use-conda", "-F"],
)

@skip_if_not_modified
def test_gatk_applybqsrspark_cram():
run(
"bio/gatk/applybqsrspark",
["snakemake", "-s", "Snakefile_cram", "--cores", "1", "recal/a.cram", "--use-conda", "-F"],
)

@skip_if_not_modified
def test_gatk_haplotypecaller_vcf():
Expand Down

0 comments on commit 41bb50b

Please sign in to comment.