Skip to content

Commit

Permalink
feat!: added tempdir, updated version and metadata to all picard wrap…
Browse files Browse the repository at this point in the history
…pers (#443)

* Added tempdir, updated version and metadata

* Updated test rules with new option formats

* Several typos fixed

* Changed file names so they don not get parsed by picard

* Fixed typo

* Add extra param on all wrappers
  • Loading branch information
fgvieira committed Jan 26, 2022
1 parent f422fa7 commit 55e672a
Show file tree
Hide file tree
Showing 70 changed files with 432 additions and 296 deletions.
2 changes: 1 addition & 1 deletion bio/picard/addorreplacereadgroups/environment.yaml
Expand Up @@ -3,5 +3,5 @@ channels:
- conda-forge
- defaults
dependencies:
- picard ==2.22.1
- picard =2.26
- snakemake-wrapper-utils ==0.1.3
5 changes: 5 additions & 0 deletions bio/picard/addorreplacereadgroups/meta.yaml
Expand Up @@ -6,3 +6,8 @@ input:
- bam file
output:
- bam file with added or replaced read groups
notes: |
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
* The `extra` param allows for additional program arguments.
* `--TMP_DIR` is automatically set by `resources.tmpdir`
* For more information see, https://broadinstitute.github.io/picard/command-line-overview.html#AddOrReplaceReadGroups
10 changes: 5 additions & 5 deletions bio/picard/addorreplacereadgroups/test/Snakefile
@@ -1,17 +1,17 @@
rule replace_rg:
input:
"mapped/{sample}.bam"
"mapped/{sample}.bam",
output:
"fixed-rg/{sample}.bam"
"fixed-rg/{sample}.bam",
log:
"logs/picard/replace_rg/{sample}.log"
"logs/picard/replace_rg/{sample}.log",
params:
"RGLB=lib1 RGPL=illumina RGPU={sample} RGSM={sample}"
extra="--RGLB lib1 --RGPL illumina --RGPU {sample} --RGSM {sample}",
# optional specification of memory usage of the JVM that snakemake will respect with global
# resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
# and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
# https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
resources:
mem_mb=1024
mem_mb=1024,
wrapper:
"master/bio/picard/addorreplacereadgroups"
18 changes: 13 additions & 5 deletions bio/picard/addorreplacereadgroups/wrapper.py
Expand Up @@ -4,13 +4,21 @@
__license__ = "MIT"


import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

extra = snakemake.params
log = snakemake.log_fmt_shell()

extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)

shell(
"picard AddOrReplaceReadGroups {java_opts} {extra} "
"I={snakemake.input} O={snakemake.output} &> {snakemake.log}"
)
with tempfile.TemporaryDirectory() as tmpdir:
shell(
"picard AddOrReplaceReadGroups"
" {java_opts} {extra}"
" --INPUT {snakemake.input}"
" --TMP_DIR {tmpdir}"
" --OUTPUT {snakemake.output}"
" {log}"
)
2 changes: 1 addition & 1 deletion bio/picard/bedtointervallist/environment.yaml
Expand Up @@ -3,5 +3,5 @@ channels:
- conda-forge
- defaults
dependencies:
- picard ==2.22.1
- picard =2.26
- snakemake-wrapper-utils ==0.1.3
6 changes: 5 additions & 1 deletion bio/picard/bedtointervallist/meta.yaml
Expand Up @@ -8,4 +8,8 @@ input:
- dict: genome dictionary file (from samtools dict or `picard CreateSequenceDictionary <https://snakemake-wrappers.readthedocs.io/en/stable/wrappers/picard/createsequencedictionary.html>`_ )
output:
- interval_list Picard format

notes: |
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
* The `extra` param allows for additional program arguments.
* `--TMP_DIR` is automatically set by `resources.tmpdir`
* For more information see, https://broadinstitute.github.io/picard/command-line-overview.html#BedToIntervalList
10 changes: 5 additions & 5 deletions bio/picard/bedtointervallist/test/Snakefile
@@ -1,19 +1,19 @@
rule bed_to_interval_list:
input:
bed="resources/a.bed",
dict="resources/genome.dict"
dict="resources/genome.dict",
output:
"a.interval_list"
"a.interval_list",
log:
"logs/picard/bedtointervallist/a.log"
"logs/picard/bedtointervallist/a.log",
params:
# optional parameters
"SORT=true " # sort output interval list before writing
extra="--SORT true", # sort output interval list before writing
# optional specification of memory usage of the JVM that snakemake will respect with global
# resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
# and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
# https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
resources:
mem_mb=1024
mem_mb=1024,
wrapper:
"master/bio/picard/bedtointervallist"
22 changes: 12 additions & 10 deletions bio/picard/bedtointervallist/wrapper.py
Expand Up @@ -3,20 +3,22 @@
__email__ = "fkilpert@gmail.com"
__license__ = "MIT"


import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

log = snakemake.log_fmt_shell()

extra = snakemake.params
extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)

shell(
"picard BedToIntervalList "
"{java_opts} {extra} "
"INPUT={snakemake.input.bed} "
"SEQUENCE_DICTIONARY={snakemake.input.dict} "
"OUTPUT={snakemake.output} "
"{log} "
)
with tempfile.TemporaryDirectory() as tmpdir:
shell(
"picard BedToIntervalList"
" {java_opts} {extra}"
" --INPUT {snakemake.input.bed}"
" --SEQUENCE_DICTIONARY {snakemake.input.dict}"
" --TMP_DIR {tmpdir}"
" --OUTPUT {snakemake.output}"
" {log}"
)
2 changes: 1 addition & 1 deletion bio/picard/collectalignmentsummarymetrics/environment.yaml
Expand Up @@ -3,5 +3,5 @@ channels:
- conda-forge
- defaults
dependencies:
- picard ==2.22.1
- picard =2.26
- snakemake-wrapper-utils ==0.1.3
5 changes: 5 additions & 0 deletions bio/picard/collectalignmentsummarymetrics/meta.yaml
Expand Up @@ -3,3 +3,8 @@ description: |
Collect metrics on aligned reads with picard tools.
authors:
- Johannes Köster
notes: |
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
* The `extra` param allows for additional program arguments.
* `--TMP_DIR` is automatically set by `resources.tmpdir`
* For more information see, https://broadinstitute.github.io/picard/command-line-overview.html#CollectAlignmentSummaryMetrics
12 changes: 5 additions & 7 deletions bio/picard/collectalignmentsummarymetrics/test/Snakefile
@@ -1,21 +1,19 @@
rule alignment_summary:
input:
ref="genome.fasta",
bam="mapped/{sample}.bam"
bam="mapped/{sample}.bam",
output:
"stats/{sample}.summary.txt"
"stats/{sample}.summary.txt",
log:
"logs/picard/alignment-summary/{sample}.log"
"logs/picard/alignment-summary/{sample}.log",
params:
# optional parameters (e.g. relax checks as below)
"VALIDATION_STRINGENCY=LENIENT "
"METRIC_ACCUMULATION_LEVEL=null "
"METRIC_ACCUMULATION_LEVEL=SAMPLE"
extra="--VALIDATION_STRINGENCY LENIENT --METRIC_ACCUMULATION_LEVEL null --METRIC_ACCUMULATION_LEVEL SAMPLE",
# optional specification of memory usage of the JVM that snakemake will respect with global
# resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
# and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
# https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
resources:
mem_mb=1024
mem_mb=1024,
wrapper:
"master/bio/picard/collectalignmentsummarymetrics"
18 changes: 12 additions & 6 deletions bio/picard/collectalignmentsummarymetrics/wrapper.py
Expand Up @@ -4,16 +4,22 @@
__license__ = "MIT"


import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

log = snakemake.log_fmt_shell()

extra = snakemake.params
extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)

shell(
"picard CollectAlignmentSummaryMetrics {java_opts} {extra} "
"INPUT={snakemake.input.bam} OUTPUT={snakemake.output[0]} "
"REFERENCE_SEQUENCE={snakemake.input.ref} {log}"
)
with tempfile.TemporaryDirectory() as tmpdir:
shell(
"picard CollectAlignmentSummaryMetrics"
" {java_opts} {extra}"
" --INPUT {snakemake.input.bam}"
" --REFERENCE_SEQUENCE {snakemake.input.ref}"
" --TMP_DIR {tmpdir}"
" --OUTPUT {snakemake.output[0]}"
" {log}"
)
2 changes: 1 addition & 1 deletion bio/picard/collectgcbiasmetrics/environment.yaml
Expand Up @@ -3,5 +3,5 @@ channels:
- conda-forge
- defaults
dependencies:
- picard ==2.25.4
- picard =2.26
- snakemake-wrapper-utils ==0.1.3
5 changes: 4 additions & 1 deletion bio/picard/collectgcbiasmetrics/meta.yaml
Expand Up @@ -11,4 +11,7 @@ output:
- GC metrics PDF figure
- GC summary metrics text file
notes: |
* For more information, see https://broadinstitute.github.io/picard/command-line-overview.html#CollectGcBiasMetrics.
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
* The `extra` param allows for additional program arguments.
* `--TMP_DIR` is automatically set by `resources.tmpdir`
* For more information, see https://broadinstitute.github.io/picard/command-line-overview.html#CollectGcBiasMetrics
10 changes: 5 additions & 5 deletions bio/picard/collectgcbiasmetrics/test/Snakefile
Expand Up @@ -3,21 +3,21 @@ rule alignment_summary:
# BAM aligned to reference genome
bam="mapped/a.bam",
# reference genome FASTA from which GC-context is inferred
ref="genome.fasta"
ref="genome.fasta",
output:
metrics="results/a.gcmetrics.txt",
chart="results/a.gc.pdf",
summary="results/a.summary.txt"
summary="results/a.summary.txt",
params:
# optional additional parameters, for example,
extra="MINIMUM_GENOME_FRACTION=1E-5"
extra="--MINIMUM_GENOME_FRACTION 1E-5",
log:
"logs/picard/a.gcmetrics.log"
"logs/picard/a.gcmetrics.log",
# optional specification of memory usage of the JVM that snakemake will respect with global
# resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
# and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
# https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
resources:
mem_mb=1024
mem_mb=1024,
wrapper:
"master/bio/picard/collectgcbiasmetrics"
26 changes: 15 additions & 11 deletions bio/picard/collectgcbiasmetrics/wrapper.py
Expand Up @@ -4,20 +4,24 @@
__license__ = "MIT"


import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

log = snakemake.log_fmt_shell()

extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

shell(
"picard CollectGcBiasMetrics "
"{java_opts} {extra} "
"INPUT={snakemake.input.bam} "
"OUTPUT={snakemake.output.metrics} "
"CHART={snakemake.output.chart} "
"SUMMARY_OUTPUT={snakemake.output.summary} "
"REFERENCE_SEQUENCE={snakemake.input.ref} "
"{log}"
)
with tempfile.TemporaryDirectory() as tmpdir:
shell(
"picard CollectGcBiasMetrics"
" {java_opts} {extra}"
" --INPUT {snakemake.input.bam}"
" --TMP_DIR {tmpdir}"
" --OUTPUT {snakemake.output.metrics}"
" --CHART {snakemake.output.chart}"
" --SUMMARY_OUTPUT {snakemake.output.summary}"
" --REFERENCE_SEQUENCE {snakemake.input.ref}"
" {log}"
)
2 changes: 1 addition & 1 deletion bio/picard/collecthsmetrics/environment.yaml
Expand Up @@ -2,5 +2,5 @@ channels:
- bioconda
- conda-forge
dependencies:
- picard ==2.22.1
- picard =2.26
- snakemake-wrapper-utils ==0.1.3
5 changes: 5 additions & 0 deletions bio/picard/collecthsmetrics/meta.yaml
Expand Up @@ -7,3 +7,8 @@ input:
- bam file
output:
- metrics file
notes: |
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
* The `extra` param allows for additional program arguments.
* `--TMP_DIR` is automatically set by `resources.tmpdir`
* For more information see, https://broadinstitute.github.io/picard/command-line-overview.html#CollectHSMetrics
10 changes: 5 additions & 5 deletions bio/picard/collecthsmetrics/test/Snakefile
Expand Up @@ -5,20 +5,20 @@ rule picard_collect_hs_metrics:
# Baits and targets should be given as interval lists. These can
# be generated from bed files using picard BedToIntervalList.
bait_intervals="regions.intervals",
target_intervals="regions.intervals"
target_intervals="regions.intervals",
output:
"stats/hs_metrics/{sample}.txt"
"stats/hs_metrics/{sample}.txt",
params:
# Optional extra arguments. Here we reduce sample size
# to reduce the runtime in our unit test.
extra="SAMPLE_SIZE=1000"
extra="--SAMPLE_SIZE 1000",
# optional specification of memory usage of the JVM that snakemake will respect with global
# resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
# and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
# https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
resources:
mem_mb=1024
mem_mb=1024,
log:
"logs/picard_collect_hs_metrics/{sample}.log"
"logs/picard_collect_hs_metrics/{sample}.log",
wrapper:
"master/bio/picard/collecthsmetrics"
26 changes: 14 additions & 12 deletions bio/picard/collecthsmetrics/wrapper.py
Expand Up @@ -6,22 +6,24 @@
__license__ = "MIT"


import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

inputs = " ".join("INPUT={}".format(in_) for in_ in snakemake.input)
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=False, stderr=True)
java_opts = get_java_opts(snakemake)

shell(
"picard CollectHsMetrics"
" {java_opts} {extra}"
" INPUT={snakemake.input.bam}"
" OUTPUT={snakemake.output[0]}"
" REFERENCE_SEQUENCE={snakemake.input.reference}"
" BAIT_INTERVALS={snakemake.input.bait_intervals}"
" TARGET_INTERVALS={snakemake.input.target_intervals}"
" {log}"
)
with tempfile.TemporaryDirectory() as tmpdir:
shell(
"picard CollectHsMetrics"
" {java_opts} {extra}"
" --INPUT {snakemake.input.bam}"
" --TMP_DIR {tmpdir}"
" --OUTPUT {snakemake.output[0]}"
" --REFERENCE_SEQUENCE {snakemake.input.reference}"
" --BAIT_INTERVALS {snakemake.input.bait_intervals}"
" --TARGET_INTERVALS {snakemake.input.target_intervals}"
" {log}"
)
2 changes: 1 addition & 1 deletion bio/picard/collectinsertsizemetrics/environment.yaml
Expand Up @@ -3,6 +3,6 @@ channels:
- conda-forge
- defaults
dependencies:
- picard ==2.22.1
- picard =2.26
- r-base ==3.6.2
- snakemake-wrapper-utils ==0.1.3
5 changes: 5 additions & 0 deletions bio/picard/collectinsertsizemetrics/meta.yaml
Expand Up @@ -8,3 +8,8 @@ input:
output:
- txt: textual representation of metrics
- pdf: insert size histogram
notes: |
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
* The `extra` param allows for additional program arguments.
* `--TMP_DIR` is automatically set by `resources.tmpdir`
* For more information see, https://broadinstitute.github.io/picard/command-line-overview.html#CollectInsertSizeMetrics

0 comments on commit 55e672a

Please sign in to comment.