Skip to content

Commit

Permalink
perf: bwa mem2 version update and use wrapper utils (#553)
Browse files Browse the repository at this point in the history
* Version updates, added docs, code re-format

* Added stdout to log
  • Loading branch information
fgvieira committed Sep 17, 2022
1 parent 92579c4 commit 356ee4d
Show file tree
Hide file tree
Showing 9 changed files with 63 additions and 59 deletions.
2 changes: 1 addition & 1 deletion bio/bwa-mem2/index/environment.yaml
Expand Up @@ -3,4 +3,4 @@ channels:
- bioconda
- nodefaults
dependencies:
- bwa-mem2 ==2.2.1
- bwa-mem2 =2.2
7 changes: 6 additions & 1 deletion bio/bwa-mem2/index/meta.yaml
@@ -1,5 +1,10 @@
name: "bwa-mem2 index"
description: "Creates a bwa-mem2 index."
url: https://github.com/bwa-mem2/bwa-mem2
authors:
- Christopher Schröder
- Patrik Smeds
- Patrik Smeds
input:
- Reference genome (FASTA )
output:
- Indexed reference genome
3 changes: 1 addition & 2 deletions bio/bwa-mem2/index/wrapper.py
Expand Up @@ -4,10 +4,9 @@
__license__ = "MIT"

from os import path

from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=False, stderr=True)
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

# Check inputs/arguments.
if len(snakemake.input) == 0:
Expand Down
5 changes: 3 additions & 2 deletions bio/bwa-mem2/mem/environment.yaml
Expand Up @@ -3,6 +3,7 @@ channels:
- bioconda
- nodefaults
dependencies:
- bwa-mem2 ==2.2.1
- samtools ==1.12
- bwa-mem2 =2.2
- samtools =1.15
- picard-slim =2.27
- snakemake-wrapper-utils =0.5
10 changes: 10 additions & 0 deletions bio/bwa-mem2/mem/meta.yaml
@@ -1,6 +1,16 @@
name: "bwa-mem2"
description: Bwa-mem2 is the next version of the bwa-mem algorithm in bwa. It produces alignment identical to bwa and is ~1.3-3.1x faster depending on the use-case, dataset and the running machine. Optional sorting using samtools or picard.
url: https://github.com/bwa-mem2/bwa-mem2
authors:
- Christopher Schröder
- Johannes Köster
- Julian de Ruiter
input:
- FASTQ file(s)
- reference genome
output:
- SAM/BAM/CRAM file
notes: |
* The `extra` param allows for additional arguments for bwa-mem2.
* The `sorting` param allows to enable sorting, and can be either 'none', 'samtools' or 'picard'.
* The `sort_extra` allows for extra arguments for samtools/picard
49 changes: 23 additions & 26 deletions bio/bwa-mem2/mem/wrapper.py
Expand Up @@ -6,25 +6,29 @@
__license__ = "MIT"


import tempfile
from os import path

from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts
from snakemake_wrapper_utils.samtools import get_samtools_opts


# Extract arguments.
extra = snakemake.params.get("extra", "")

log = snakemake.log_fmt_shell(stdout=False, stderr=True)
sort = snakemake.params.get("sort", "none")
sort_order = snakemake.params.get("sort_order", "coordinate")
sort_extra = snakemake.params.get("sort_extra", "")
samtools_opts = get_samtools_opts(snakemake)
java_opts = get_java_opts(snakemake)


index = snakemake.input.get("index", "")
if isinstance(index, str):
index = path.splitext(snakemake.input.idx)[0]
else:
index = path.splitext(snakemake.input.idx[0])[0]

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

# Check inputs/arguments.
if not isinstance(snakemake.input.reads, str) and len(snakemake.input.reads) not in {
Expand All @@ -34,42 +38,35 @@
raise ValueError("input must have 1 (single-end) or 2 (paired-end) elements")

if sort_order not in {"coordinate", "queryname"}:
raise ValueError("Unexpected value for sort_order ({})".format(sort_order))
raise ValueError(f"Unexpected value for sort_order ({sort_order})")

# Determine which pipe command to use for converting to bam or sorting.
if sort == "none":

# Simply convert to bam using samtools view.
pipe_cmd = "samtools view -Sbh -o {snakemake.output[0]} -"
pipe_cmd = "samtools view {samtools_opts}"

elif sort == "samtools":

# Sort alignments using samtools sort.
pipe_cmd = "samtools sort {sort_extra} -o {snakemake.output[0]} -"
pipe_cmd = "samtools sort {samtools_opts} {sort_extra} -T {tmpdir}"

# Add name flag if needed.
if sort_order == "queryname":
sort_extra += " -n"

prefix = path.splitext(snakemake.output[0])[0]
sort_extra += " -T " + prefix + ".tmp"

elif sort == "picard":

# Sort alignments using picard SortSam.
pipe_cmd = (
"picard SortSam {sort_extra} INPUT=/dev/stdin"
" OUTPUT={snakemake.output[0]} SORT_ORDER={sort_order}"
)
pipe_cmd = "picard SortSam {java_opts} {sort_extra} --INPUT /dev/stdin --TMP_DIR {tmpdir} --SORT_ORDER {sort_order} --OUTPUT {snakemake.output[0]}"

else:
raise ValueError("Unexpected value for params.sort ({})".format(sort))

shell(
"(bwa-mem2 mem"
" -t {snakemake.threads}"
" {extra}"
" {index}"
" {snakemake.input.reads}"
" | " + pipe_cmd + ") {log}"
)
raise ValueError(f"Unexpected value for params.sort ({sort})")


with tempfile.TemporaryDirectory() as tmpdir:
shell(
"(bwa-mem2 mem"
" -t {snakemake.threads}"
" {extra}"
" {index}"
" {snakemake.input.reads}"
" | " + pipe_cmd + ") {log}"
)
5 changes: 3 additions & 2 deletions bio/bwa/mem/environment.yaml
Expand Up @@ -3,6 +3,7 @@ channels:
- bioconda
- nodefaults
dependencies:
- bwa ==0.7.17
- samtools =1.12
- bwa =0.7
- samtools =1.15
- picard-slim =2.27
- snakemake-wrapper-utils =0.5
3 changes: 1 addition & 2 deletions bio/bwa/mem/meta.yaml
@@ -1,6 +1,7 @@
name: "bwa mem"
description: Map reads using bwa mem, with optional sorting using
samtools or picard.
url: http://bio-bwa.sourceforge.net/bwa.shtml
authors:
- Johannes Köster
- Julian de Ruiter
Expand All @@ -14,5 +15,3 @@ notes: |
* The `extra` param allows for additional arguments for bwa-mem.
* The `sorting` param allows to enable sorting, and can be either 'none', 'samtools' or 'picard'.
* The `sort_extra` allows for extra arguments for samtools/picard
* The `tmp_dir` param allows to define path to the temp dir.
* For more inforamtion see, http://bio-bwa.sourceforge.net/bwa.shtml
38 changes: 15 additions & 23 deletions bio/bwa/mem/wrapper.py
Expand Up @@ -4,18 +4,22 @@
__license__ = "MIT"


from os import path
import re
import tempfile
from os import path
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts
from snakemake_wrapper_utils.samtools import get_samtools_opts


# Extract arguments.
extra = snakemake.params.get("extra", "")

log = snakemake.log_fmt_shell(stdout=False, stderr=True)
sort = snakemake.params.get("sorting", "none")
sort_order = snakemake.params.get("sort_order", "coordinate")
sort_extra = snakemake.params.get("sort_extra", "")
samtools_opts = get_samtools_opts(snakemake)
java_opts = get_java_opts(snakemake)


index = snakemake.input.idx
if isinstance(index, str):
Expand All @@ -24,51 +28,39 @@
index = path.splitext(snakemake.input.idx[0])[0]


if re.search(r"-T\b", sort_extra) or re.search(r"--TMP_DIR\b", sort_extra):
sys.exit(
"You have specified temp dir (`-T` or `--TMP_DIR`) in params.sort_extra; this is automatically set from params.tmp_dir."
)

log = snakemake.log_fmt_shell(stdout=False, stderr=True)


# Check inputs/arguments.
if not isinstance(snakemake.input.reads, str) and len(snakemake.input.reads) not in {
1,
2,
}:
raise ValueError("input must have 1 (single-end) or " "2 (paired-end) elements")
raise ValueError("input must have 1 (single-end) or 2 (paired-end) elements")


if sort_order not in {"coordinate", "queryname"}:
raise ValueError("Unexpected value for sort_order ({})".format(sort_order))


# Determine which pipe command to use for converting to bam or sorting.
if sort == "none":

# Simply convert to bam using samtools view.
pipe_cmd = "samtools view -Sbh -o {snakemake.output[0]} -"
pipe_cmd = "samtools view {samtools_opts}"

elif sort == "samtools":

# Add name flag if needed.
if sort_order == "queryname":
sort_extra += " -n"

# Sort alignments using samtools sort.
pipe_cmd = "samtools sort -T {tmp} {sort_extra} -o {snakemake.output[0]} -"
pipe_cmd = "samtools sort {samtools_opts} {sort_extra} -T {tmpdir}"

elif sort == "picard":

# Sort alignments using picard SortSam.
pipe_cmd = (
"picard SortSam {sort_extra} --INPUT /dev/stdin"
" --OUTPUT {snakemake.output[0]} --SORT_ORDER {sort_order} --TMP_DIR {tmp}"
)
pipe_cmd = "picard SortSam {java_opts} {sort_extra} --INPUT /dev/stdin --TMP_DIR {tmpdir} --SORT_ORDER {sort_order} --OUTPUT {snakemake.output[0]}"

else:
raise ValueError("Unexpected value for params.sort ({})".format(sort))
raise ValueError(f"Unexpected value for params.sort ({sort})")

with tempfile.TemporaryDirectory() as tmp:
with tempfile.TemporaryDirectory() as tmpdir:
shell(
"(bwa mem"
" -t {snakemake.threads}"
Expand Down

0 comments on commit 356ee4d

Please sign in to comment.