feat!: added tempdir, updated version and metadata to all picard wrap…

…pers (#443) * Added tempdir, updated version and metadata * Updated test rules with new option formats * Several typos fixed * Changed file names so they don not get parsed by picard * Fixed typo * Add extra param on all wrappers
snakemake · Jan 26, 2022 · 55e672a · 55e672a
1 parent f422fa7
commit 55e672a
Show file tree

Hide file tree

Showing 70 changed files with 432 additions and 296 deletions.
diff --git a/bio/picard/addorreplacereadgroups/environment.yaml b/bio/picard/addorreplacereadgroups/environment.yaml
@@ -3,5 +3,5 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - picard ==2.22.1
+  - picard =2.26
   - snakemake-wrapper-utils ==0.1.3
diff --git a/bio/picard/addorreplacereadgroups/meta.yaml b/bio/picard/addorreplacereadgroups/meta.yaml
@@ -6,3 +6,8 @@ input:
   - bam file
 output:
   - bam file with added or replaced read groups
+notes: |
+  * The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
+  * The `extra` param allows for additional program arguments.
+  * `--TMP_DIR` is automatically set by `resources.tmpdir`
+  * For more information see, https://broadinstitute.github.io/picard/command-line-overview.html#AddOrReplaceReadGroups
diff --git a/bio/picard/addorreplacereadgroups/test/Snakefile b/bio/picard/addorreplacereadgroups/test/Snakefile
@@ -1,17 +1,17 @@
 rule replace_rg:
     input:
-        "mapped/{sample}.bam"
+        "mapped/{sample}.bam",
     output:
-        "fixed-rg/{sample}.bam"
+        "fixed-rg/{sample}.bam",
     log:
-        "logs/picard/replace_rg/{sample}.log"
+        "logs/picard/replace_rg/{sample}.log",
     params:
-        "RGLB=lib1 RGPL=illumina RGPU={sample} RGSM={sample}"
+        extra="--RGLB lib1 --RGPL illumina --RGPU {sample} --RGSM {sample}",
     # optional specification of memory usage of the JVM that snakemake will respect with global
     # resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
     # and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
     # https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
     resources:
-        mem_mb=1024
+        mem_mb=1024,
     wrapper:
         "master/bio/picard/addorreplacereadgroups"
diff --git a/bio/picard/addorreplacereadgroups/wrapper.py b/bio/picard/addorreplacereadgroups/wrapper.py
@@ -4,13 +4,21 @@
 __license__ = "MIT"
 
 
+import tempfile
 from snakemake.shell import shell
 from snakemake_wrapper_utils.java import get_java_opts
 
-extra = snakemake.params
+log = snakemake.log_fmt_shell()
+
+extra = snakemake.params.get("extra", "")
 java_opts = get_java_opts(snakemake)
 
-shell(
-    "picard AddOrReplaceReadGroups {java_opts} {extra} "
-    "I={snakemake.input} O={snakemake.output} &> {snakemake.log}"
-)
+with tempfile.TemporaryDirectory() as tmpdir:
+    shell(
+        "picard AddOrReplaceReadGroups"
+        " {java_opts} {extra}"
+        " --INPUT {snakemake.input}"
+        " --TMP_DIR {tmpdir}"
+        " --OUTPUT {snakemake.output}"
+        " {log}"
+    )
diff --git a/bio/picard/bedtointervallist/environment.yaml b/bio/picard/bedtointervallist/environment.yaml
@@ -3,5 +3,5 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - picard ==2.22.1
+  - picard =2.26
   - snakemake-wrapper-utils ==0.1.3
diff --git a/bio/picard/bedtointervallist/meta.yaml b/bio/picard/bedtointervallist/meta.yaml
@@ -8,4 +8,8 @@ input:
   - dict: genome dictionary file (from samtools dict or `picard CreateSequenceDictionary <https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/picard/createsequencedictionary.html>`_ )
 output:
   - interval_list Picard format
-
+notes: |
+  * The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
+  * The `extra` param allows for additional program arguments.
+  * `--TMP_DIR` is automatically set by `resources.tmpdir`
+  * For more information see, https://broadinstitute.github.io/picard/command-line-overview.html#BedToIntervalList
diff --git a/bio/picard/bedtointervallist/test/Snakefile b/bio/picard/bedtointervallist/test/Snakefile
@@ -1,19 +1,19 @@
 rule bed_to_interval_list:
     input:
         bed="resources/a.bed",
-        dict="resources/genome.dict"
+        dict="resources/genome.dict",
     output:
-        "a.interval_list"
+        "a.interval_list",
     log:
-        "logs/picard/bedtointervallist/a.log"
+        "logs/picard/bedtointervallist/a.log",
     params:
         # optional parameters
-        "SORT=true " # sort output interval list before writing
+        extra="--SORT true",  # sort output interval list before writing
     # optional specification of memory usage of the JVM that snakemake will respect with global
     # resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
     # and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
     # https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
     resources:
-        mem_mb=1024
+        mem_mb=1024,
     wrapper:
         "master/bio/picard/bedtointervallist"
diff --git a/bio/picard/bedtointervallist/wrapper.py b/bio/picard/bedtointervallist/wrapper.py
@@ -3,20 +3,22 @@
 __email__ = "fkilpert@gmail.com"
 __license__ = "MIT"
 
-
+import tempfile
 from snakemake.shell import shell
 from snakemake_wrapper_utils.java import get_java_opts
 
 log = snakemake.log_fmt_shell()
 
-extra = snakemake.params
+extra = snakemake.params.get("extra", "")
 java_opts = get_java_opts(snakemake)
 
-shell(
-    "picard BedToIntervalList "
-    "{java_opts} {extra} "
-    "INPUT={snakemake.input.bed} "
-    "SEQUENCE_DICTIONARY={snakemake.input.dict} "
-    "OUTPUT={snakemake.output} "
-    "{log} "
-)
+with tempfile.TemporaryDirectory() as tmpdir:
+    shell(
+        "picard BedToIntervalList"
+        " {java_opts} {extra}"
+        " --INPUT {snakemake.input.bed}"
+        " --SEQUENCE_DICTIONARY {snakemake.input.dict}"
+        " --TMP_DIR {tmpdir}"
+        " --OUTPUT {snakemake.output}"
+        " {log}"
+    )
diff --git a/bio/picard/collectalignmentsummarymetrics/environment.yaml b/bio/picard/collectalignmentsummarymetrics/environment.yaml
@@ -3,5 +3,5 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - picard ==2.22.1
+  - picard =2.26
   - snakemake-wrapper-utils ==0.1.3
diff --git a/bio/picard/collectalignmentsummarymetrics/meta.yaml b/bio/picard/collectalignmentsummarymetrics/meta.yaml
@@ -3,3 +3,8 @@ description: |
   Collect metrics on aligned reads with picard tools.
 authors:
   - Johannes Köster
+notes: |
+  * The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
+  * The `extra` param allows for additional program arguments.
+  * `--TMP_DIR` is automatically set by `resources.tmpdir`
+  * For more information see, https://broadinstitute.github.io/picard/command-line-overview.html#CollectAlignmentSummaryMetrics
diff --git a/bio/picard/collectalignmentsummarymetrics/test/Snakefile b/bio/picard/collectalignmentsummarymetrics/test/Snakefile
@@ -1,21 +1,19 @@
 rule alignment_summary:
     input:
         ref="genome.fasta",
-        bam="mapped/{sample}.bam"
+        bam="mapped/{sample}.bam",
     output:
-        "stats/{sample}.summary.txt"
+        "stats/{sample}.summary.txt",
     log:
-        "logs/picard/alignment-summary/{sample}.log"
+        "logs/picard/alignment-summary/{sample}.log",
     params:
         # optional parameters (e.g. relax checks as below)
-        "VALIDATION_STRINGENCY=LENIENT "
-        "METRIC_ACCUMULATION_LEVEL=null "
-        "METRIC_ACCUMULATION_LEVEL=SAMPLE"
+        extra="--VALIDATION_STRINGENCY LENIENT --METRIC_ACCUMULATION_LEVEL null --METRIC_ACCUMULATION_LEVEL SAMPLE",
     # optional specification of memory usage of the JVM that snakemake will respect with global
     # resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
     # and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
     # https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
     resources:
-        mem_mb=1024
+        mem_mb=1024,
     wrapper:
         "master/bio/picard/collectalignmentsummarymetrics"
diff --git a/bio/picard/collectalignmentsummarymetrics/wrapper.py b/bio/picard/collectalignmentsummarymetrics/wrapper.py
@@ -4,16 +4,22 @@
 __license__ = "MIT"
 
 
+import tempfile
 from snakemake.shell import shell
 from snakemake_wrapper_utils.java import get_java_opts
 
 log = snakemake.log_fmt_shell()
 
-extra = snakemake.params
+extra = snakemake.params.get("extra", "")
 java_opts = get_java_opts(snakemake)
 
-shell(
-    "picard CollectAlignmentSummaryMetrics {java_opts} {extra} "
-    "INPUT={snakemake.input.bam} OUTPUT={snakemake.output[0]} "
-    "REFERENCE_SEQUENCE={snakemake.input.ref} {log}"
-)
+with tempfile.TemporaryDirectory() as tmpdir:
+    shell(
+        "picard CollectAlignmentSummaryMetrics"
+        " {java_opts} {extra}"
+        " --INPUT {snakemake.input.bam}"
+        " --REFERENCE_SEQUENCE {snakemake.input.ref}"
+        " --TMP_DIR {tmpdir}"
+        " --OUTPUT {snakemake.output[0]}"
+        " {log}"
+    )
diff --git a/bio/picard/collectgcbiasmetrics/environment.yaml b/bio/picard/collectgcbiasmetrics/environment.yaml
@@ -3,5 +3,5 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - picard ==2.25.4
+  - picard =2.26
   - snakemake-wrapper-utils ==0.1.3
diff --git a/bio/picard/collectgcbiasmetrics/meta.yaml b/bio/picard/collectgcbiasmetrics/meta.yaml
@@ -11,4 +11,7 @@ output:
   - GC metrics PDF figure
   - GC summary metrics text file
 notes: |
-        * For more information, see https://broadinstitute.github.io/picard/command-line-overview.html#CollectGcBiasMetrics.
+  * The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
+  * The `extra` param allows for additional program arguments.
+  * `--TMP_DIR` is automatically set by `resources.tmpdir`
+  * For more information, see https://broadinstitute.github.io/picard/command-line-overview.html#CollectGcBiasMetrics
diff --git a/bio/picard/collectgcbiasmetrics/test/Snakefile b/bio/picard/collectgcbiasmetrics/test/Snakefile
@@ -3,21 +3,21 @@ rule alignment_summary:
         # BAM aligned to reference genome
         bam="mapped/a.bam",
         # reference genome FASTA from which GC-context is inferred
-        ref="genome.fasta"
+        ref="genome.fasta",
     output:
         metrics="results/a.gcmetrics.txt",
         chart="results/a.gc.pdf",
-        summary="results/a.summary.txt"
+        summary="results/a.summary.txt",
     params:
         # optional additional parameters, for example,
-        extra="MINIMUM_GENOME_FRACTION=1E-5"
+        extra="--MINIMUM_GENOME_FRACTION 1E-5",
     log:
-        "logs/picard/a.gcmetrics.log"
+        "logs/picard/a.gcmetrics.log",
     # optional specification of memory usage of the JVM that snakemake will respect with global
     # resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
     # and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
     # https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
     resources:
-        mem_mb=1024
+        mem_mb=1024,
     wrapper:
         "master/bio/picard/collectgcbiasmetrics"
diff --git a/bio/picard/collectgcbiasmetrics/wrapper.py b/bio/picard/collectgcbiasmetrics/wrapper.py
@@ -4,20 +4,24 @@
 __license__ = "MIT"
 
 
+import tempfile
 from snakemake.shell import shell
 from snakemake_wrapper_utils.java import get_java_opts
 
+log = snakemake.log_fmt_shell()
+
 extra = snakemake.params.get("extra", "")
 java_opts = get_java_opts(snakemake)
-log = snakemake.log_fmt_shell(stdout=True, stderr=True)
 
-shell(
-    "picard CollectGcBiasMetrics "
-    "{java_opts} {extra} "
-    "INPUT={snakemake.input.bam} "
-    "OUTPUT={snakemake.output.metrics} "
-    "CHART={snakemake.output.chart} "
-    "SUMMARY_OUTPUT={snakemake.output.summary} "
-    "REFERENCE_SEQUENCE={snakemake.input.ref} "
-    "{log}"
-)
+with tempfile.TemporaryDirectory() as tmpdir:
+    shell(
+        "picard CollectGcBiasMetrics"
+        " {java_opts} {extra}"
+        " --INPUT {snakemake.input.bam}"
+        " --TMP_DIR {tmpdir}"
+        " --OUTPUT {snakemake.output.metrics}"
+        " --CHART {snakemake.output.chart}"
+        " --SUMMARY_OUTPUT {snakemake.output.summary}"
+        " --REFERENCE_SEQUENCE {snakemake.input.ref}"
+        " {log}"
+    )
diff --git a/bio/picard/collecthsmetrics/environment.yaml b/bio/picard/collecthsmetrics/environment.yaml
@@ -2,5 +2,5 @@ channels:
   - bioconda
   - conda-forge
 dependencies:
-  - picard ==2.22.1
+  - picard =2.26
   - snakemake-wrapper-utils ==0.1.3
diff --git a/bio/picard/collecthsmetrics/meta.yaml b/bio/picard/collecthsmetrics/meta.yaml
@@ -7,3 +7,8 @@ input:
   - bam file
 output:
   - metrics file
+notes: |
+  * The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
+  * The `extra` param allows for additional program arguments.
+  * `--TMP_DIR` is automatically set by `resources.tmpdir`
+  * For more information see, https://broadinstitute.github.io/picard/command-line-overview.html#CollectHSMetrics
diff --git a/bio/picard/collecthsmetrics/test/Snakefile b/bio/picard/collecthsmetrics/test/Snakefile
@@ -5,20 +5,20 @@ rule picard_collect_hs_metrics:
         # Baits and targets should be given as interval lists. These can
         # be generated from bed files using picard BedToIntervalList.
         bait_intervals="regions.intervals",
-        target_intervals="regions.intervals"
+        target_intervals="regions.intervals",
     output:
-        "stats/hs_metrics/{sample}.txt"
+        "stats/hs_metrics/{sample}.txt",
     params:
         # Optional extra arguments. Here we reduce sample size
         # to reduce the runtime in our unit test.
-        extra="SAMPLE_SIZE=1000"
+        extra="--SAMPLE_SIZE 1000",
     # optional specification of memory usage of the JVM that snakemake will respect with global
     # resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
     # and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
     # https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
     resources:
-        mem_mb=1024
+        mem_mb=1024,
     log:
-        "logs/picard_collect_hs_metrics/{sample}.log"
+        "logs/picard_collect_hs_metrics/{sample}.log",
     wrapper:
         "master/bio/picard/collecthsmetrics"
diff --git a/bio/picard/collecthsmetrics/wrapper.py b/bio/picard/collecthsmetrics/wrapper.py
@@ -6,22 +6,24 @@
 __license__ = "MIT"
 
 
+import tempfile
 from snakemake.shell import shell
 from snakemake_wrapper_utils.java import get_java_opts
 
+log = snakemake.log_fmt_shell(stdout=False, stderr=True)
 
-inputs = " ".join("INPUT={}".format(in_) for in_ in snakemake.input)
 extra = snakemake.params.get("extra", "")
-log = snakemake.log_fmt_shell(stdout=False, stderr=True)
 java_opts = get_java_opts(snakemake)
 
-shell(
-    "picard CollectHsMetrics"
-    " {java_opts} {extra}"
-    " INPUT={snakemake.input.bam}"
-    " OUTPUT={snakemake.output[0]}"
-    " REFERENCE_SEQUENCE={snakemake.input.reference}"
-    " BAIT_INTERVALS={snakemake.input.bait_intervals}"
-    " TARGET_INTERVALS={snakemake.input.target_intervals}"
-    " {log}"
-)
+with tempfile.TemporaryDirectory() as tmpdir:
+    shell(
+        "picard CollectHsMetrics"
+        " {java_opts} {extra}"
+        " --INPUT {snakemake.input.bam}"
+        " --TMP_DIR {tmpdir}"
+        " --OUTPUT {snakemake.output[0]}"
+        " --REFERENCE_SEQUENCE {snakemake.input.reference}"
+        " --BAIT_INTERVALS {snakemake.input.bait_intervals}"
+        " --TARGET_INTERVALS {snakemake.input.target_intervals}"
+        " {log}"
+    )
diff --git a/bio/picard/collectinsertsizemetrics/environment.yaml b/bio/picard/collectinsertsizemetrics/environment.yaml
@@ -3,6 +3,6 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - picard ==2.22.1
+  - picard =2.26
   - r-base ==3.6.2
   - snakemake-wrapper-utils ==0.1.3
diff --git a/bio/picard/collectinsertsizemetrics/meta.yaml b/bio/picard/collectinsertsizemetrics/meta.yaml
@@ -8,3 +8,8 @@ input:
 output:
   - txt: textual representation of metrics
   - pdf: insert size histogram
+notes: |
+  * The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
+  * The `extra` param allows for additional program arguments.
+  * `--TMP_DIR` is automatically set by `resources.tmpdir`
+  * For more information see, https://broadinstitute.github.io/picard/command-line-overview.html#CollectInsertSizeMetrics