diff --git a/bio/gatk/applybqsr/environment.yaml b/bio/gatk/applybqsr/environment.yaml index a249332a02..8a78d126ae 100644 --- a/bio/gatk/applybqsr/environment.yaml +++ b/bio/gatk/applybqsr/environment.yaml @@ -6,3 +6,4 @@ dependencies: - gatk4 =4.2 - openjdk =8 - snakemake-wrapper-utils =0.3 + - samtools =1.16 diff --git a/bio/gatk/applybqsr/test/Snakefile b/bio/gatk/applybqsr/test/Snakefile index 64323d2ee3..d4725b9fa3 100644 --- a/bio/gatk/applybqsr/test/Snakefile +++ b/bio/gatk/applybqsr/test/Snakefile @@ -11,6 +11,7 @@ rule gatk_applybqsr: params: extra="", # optional java_opts="", # optional + embed_ref=True, # embed the reference in cram output resources: mem_mb=1024, wrapper: diff --git a/bio/gatk/applybqsr/test/Snakefile_cram b/bio/gatk/applybqsr/test/Snakefile_cram new file mode 100644 index 0000000000..a786a49a84 --- /dev/null +++ b/bio/gatk/applybqsr/test/Snakefile_cram @@ -0,0 +1,18 @@ +rule gatk_applybqsr: + input: + bam="mapped/{sample}.bam", + ref="genome.fasta", + dict="genome.dict", + recal_table="recal/{sample}.grp", + output: + bam="recal/{sample}.cram", + log: + "logs/gatk/gatk_applybqsr/{sample}.log", + params: + extra="", # optional + java_opts="", # optional + embed_ref=True, # embed the reference in cram output + resources: + mem_mb=1024, + wrapper: + "master/bio/gatk/applybqsr" diff --git a/bio/gatk/applybqsr/wrapper.py b/bio/gatk/applybqsr/wrapper.py index 2a0bbc126e..422200e7e1 100644 --- a/bio/gatk/applybqsr/wrapper.py +++ b/bio/gatk/applybqsr/wrapper.py @@ -8,19 +8,28 @@ from snakemake.shell import shell from snakemake_wrapper_utils.java import get_java_opts +# Extract arguments extra = snakemake.params.get("extra", "") +reference = snakemake.input.get("ref") +embed_ref = snakemake.params.get("embed_ref", False) java_opts = get_java_opts(snakemake) log = snakemake.log_fmt_shell(stdout=True, stderr=True, append=True) +if snakemake.output.bam.endswith(".cram") and embed_ref: + output = "/dev/stdout" + pipe_cmd = " | samtools view -h -O cram,embed_ref -T {reference} -o {snakemake.output.bam} -" +else: + output = snakemake.output.bam + pipe_cmd = "" + with tempfile.TemporaryDirectory() as tmpdir: shell( - "gatk --java-options '{java_opts}' ApplyBQSR" + "(gatk --java-options '{java_opts}' ApplyBQSR" " --input {snakemake.input.bam}" " --bqsr-recal-file {snakemake.input.recal_table}" - " --reference {snakemake.input.ref}" + " --reference {reference}" " {extra}" " --tmp-dir {tmpdir}" - " --output {snakemake.output.bam}" - " {log}" + " --output {output}" + pipe_cmd + ") {log}" ) diff --git a/bio/gatk/applybqsrspark/environment.yaml b/bio/gatk/applybqsrspark/environment.yaml index a249332a02..5d1b71da9e 100644 --- a/bio/gatk/applybqsrspark/environment.yaml +++ b/bio/gatk/applybqsrspark/environment.yaml @@ -6,3 +6,4 @@ dependencies: - gatk4 =4.2 - openjdk =8 - snakemake-wrapper-utils =0.3 + - samtools =1.16 \ No newline at end of file diff --git a/bio/gatk/applybqsrspark/test/Snakefile b/bio/gatk/applybqsrspark/test/Snakefile index d12d4ceb20..68c4acad3b 100644 --- a/bio/gatk/applybqsrspark/test/Snakefile +++ b/bio/gatk/applybqsrspark/test/Snakefile @@ -14,6 +14,8 @@ rule gatk_applybqsr_spark: #spark_runner="", # optional, local by default #spark_master="", # optional #spark_extra="", # optional + embed_ref=True, # embed reference in cram output + exceed_thread_limit=True, # samtools is also parallized and thread limit is not guaranteed anymore resources: mem_mb=1024, wrapper: diff --git a/bio/gatk/applybqsrspark/test/Snakefile_cram b/bio/gatk/applybqsrspark/test/Snakefile_cram new file mode 100644 index 0000000000..14b0e207b1 --- /dev/null +++ b/bio/gatk/applybqsrspark/test/Snakefile_cram @@ -0,0 +1,22 @@ +rule gatk_applybqsr_spark: + input: + bam="mapped/{sample}.bam", + ref="genome.fasta", + dict="genome.dict", + recal_table="recal/{sample}.grp", + output: + bam="recal/{sample}.cram", + log: + "logs/gatk/gatk_applybqsr_spark/{sample}.log", + params: + extra="", # optional + java_opts="", # optional + #spark_runner="", # optional, local by default + #spark_master="", # optional + #spark_extra="", # optional + embed_ref=True, # embed reference in cram output + exceed_thread_limit=True, # samtools is also parallized + resources: + mem_mb=1024, + wrapper: + "master/bio/gatk/applybqsrspark" diff --git a/bio/gatk/applybqsrspark/wrapper.py b/bio/gatk/applybqsrspark/wrapper.py index c14d4aa2dc..2dfd7510d5 100644 --- a/bio/gatk/applybqsrspark/wrapper.py +++ b/bio/gatk/applybqsrspark/wrapper.py @@ -1,4 +1,4 @@ -__author__ = "Filipe G. Vieira" +__author__ = "Filipe G. Vieira, Christopher Schröder" __copyright__ = "Copyright 2021, Filipe G. Vieira" __license__ = "MIT" @@ -15,23 +15,40 @@ "spark_master", "local[{}]".format(snakemake.threads) ) spark_extra = snakemake.params.get("spark_extra", "") +reference = snakemake.input.get("ref") +embed_ref = snakemake.params.get("embed_ref", False) +exceed_thread_limit = snakemake.params.get("exceed_thread_limit", False) java_opts = get_java_opts(snakemake) log = snakemake.log_fmt_shell(stdout=True, stderr=True) +if exceed_thread_limit: + samtools_threads = snakemake.threads +else: + samtools_threads = 1 + +if snakemake.output.bam.endswith(".cram") and embed_ref: + output = "/dev/stdout --create-output-bam-splitting-index false" + pipe_cmd = " | samtools view -h -O cram,embed_ref -T {reference} -o {snakemake.output.bam} -@ {samtools_threads} -" +else: + output = snakemake.output.bam + pipe_cmd = "" + + with tempfile.TemporaryDirectory() as tmpdir: # This folder must not exist; it is created by GATK tmpdir_shards = Path(tmpdir) / "shards_{:06d}".format(random.randrange(10**6)) shell( - "gatk --java-options '{java_opts}' ApplyBQSRSpark" + "(gatk --java-options '{java_opts}' ApplyBQSRSpark" " --input {snakemake.input.bam}" " --bqsr-recal-file {snakemake.input.recal_table}" " --reference {snakemake.input.ref}" " {extra}" " --tmp-dir {tmpdir}" " --output-shard-tmp-dir {tmpdir_shards}" - " --output {snakemake.output.bam}" + " --output {output}" " -- --spark-runner {spark_runner} --spark-master {spark_master} {spark_extra}" - " {log}" + + pipe_cmd + + ") {log}" ) diff --git a/test.py b/test.py index d3dd64f576..5148b7e5d0 100644 --- a/test.py +++ b/test.py @@ -3617,6 +3617,13 @@ def test_gatk_applybqsr(): ["snakemake", "--cores", "1", "recal/a.bam", "--use-conda", "-F"], ) +@skip_if_not_modified +def test_gatk_applybqsr_cram(): + run( + "bio/gatk/applybqsr", + ["snakemake", "-s", "Snakefile_cram", "--cores", "1", "recal/a.cram", "--use-conda", "-F"], + ) + @skip_if_not_modified def test_gatk_applybqsrspark(): @@ -3625,6 +3632,12 @@ def test_gatk_applybqsrspark(): ["snakemake", "--cores", "1", "recal/a.bam", "--use-conda", "-F"], ) +@skip_if_not_modified +def test_gatk_applybqsrspark_cram(): + run( + "bio/gatk/applybqsrspark", + ["snakemake", "-s", "Snakefile_cram", "--cores", "1", "recal/a.cram", "--use-conda", "-F"], + ) @skip_if_not_modified def test_gatk_haplotypecaller_vcf():