Skip to content

Commit

Permalink
feat: bwa-meme picard option (#558)
Browse files Browse the repository at this point in the history
* typo in rule naming, reactivate picard option, picard test

* wrongly configured test

* wrongly configured test again

* genome wildcard is now hardcoded in test

* wrapper path

* dont use other wrappers in test

* dont use other wrappers in test

* environment.yaml in test

* try again wrapper in test

* try index wrapper again

* use all rule

* version index wrapper

* remove index env

* used_wrappers.yaml

* used_wrappers.yaml

* swap for github action

* swap 10g

* swap 20g

* number of model parameters in index to decrease resource consumption for test

* revert swap size

* different approach, zipped file now

* snakefmt, black

* log for extraction

* conda for gzip to satisfy linter

* add the yaml

* mbuffer to 2g
  • Loading branch information
christopher-schroeder committed Oct 10, 2022
1 parent fee4ab1 commit 76174cf
Show file tree
Hide file tree
Showing 17 changed files with 91 additions and 48 deletions.
8 changes: 5 additions & 3 deletions bio/bwa-meme/index/test/Snakefile
@@ -1,4 +1,4 @@
rule bwa_mem2_index:
rule bwa_meme_index:
input:
"{genome}",
output:
Expand All @@ -12,10 +12,12 @@ rule bwa_mem2_index:
".suffixarray_uint64",
".suffixarray_uint64_L0_PARAMETERS",
".suffixarray_uint64_L1_PARAMETERS",
".suffixarray_uint64_L2_PARAMETERS"
)
".suffixarray_uint64_L2_PARAMETERS",
),
log:
"logs/bwa-meme_index/{genome}.log",
params: #[hide]
num_models=100000, #[hide]
threads: 8
wrapper:
"master/bio/bwa-meme/index"
10 changes: 9 additions & 1 deletion bio/bwa-meme/index/wrapper.py
Expand Up @@ -45,6 +45,14 @@ def get_valid_suffix(path):
raise ValueError("Output files must share common prefix up to their file endings.")
(prefix,) = prefixes

suffixarray = snakemake.input[0] + ".suffixarray_uint64"
dirname = path.dirname(suffixarray)
basename = path.basename(suffixarray)
num_models = snakemake.params.get("num_models", 268435456) # change only for testing!

if not dirname:
dirname = "."

shell(
"(bwa-meme index -a meme -p {prefix} {snakemake.input[0]} -t {snakemake.threads} && build_rmis_dna.sh {snakemake.input[0]}) {log}"
"(bwa-meme index -a meme -p {prefix} {snakemake.input[0]} -t {snakemake.threads} && bwa-meme-train-prmi -t {snakemake.threads} --data-path {dirname} {suffixarray} {basename} pwl,linear,linear_spline {num_models}) {log}"
)
3 changes: 2 additions & 1 deletion bio/bwa-meme/mem/environment.yaml
Expand Up @@ -6,4 +6,5 @@ dependencies:
- bwa-meme ==1.0.4
- samtools ==1.15
- samblaster == 0.1.26
- mbuffer == 20160228
- mbuffer == 20160228
- picard-slim =2.27
79 changes: 58 additions & 21 deletions bio/bwa-meme/mem/test/Snakefile
@@ -1,23 +1,24 @@
rule bwa_meme_index: #[hide]
input: #[hide]
"genome.fasta", #[hide]
output: #[hide]
"{genome}.0123", #[hide]
"{genome}.amb", #[hide]
"{genome}.ann", #[hide]
"{genome}.pac", #[hide]
"{genome}.pos_packed", #[hide]
"{genome}.suffixarray_uint64", #[hide]
"{genome}.suffixarray_uint64_L0_PARAMETERS", #[hide]
"{genome}.suffixarray_uint64_L1_PARAMETERS", #[hide]
"{genome}.suffixarray_uint64_L2_PARAMETERS", #[hide]
threads: 8 #[hide]
log: #[hide]
"logs/bwa_meme_index/{genome}.log", #[hide]
wrapper: #[hide]
"master/bio/bwa-meme/index" #[hide]
#[hide]
#[hide]
rule all: # [hide]
input: # [hide]
"mapped/a.cram", # [hide]
"mapped/a.picard.cram", # [hide]


rule L2_PARAMETERS_extract: #[hide]
input: #[hide]
"genome.fasta.suffixarray_uint64_L2_PARAMETERS.gz", #[hide]
output: #[hide]
temp("genome.fasta.suffixarray_uint64_L2_PARAMETERS"), #[hide]
conda:
"gzip.yaml"
log:
"log/extract.l2.log",
shell: #[hide]
"zcat {input} > {output} 2> {log}"


# [hide]
# [hide]
rule bwa_meme_mem:
input:
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"],
Expand All @@ -41,7 +42,7 @@ rule bwa_meme_mem:
"logs/bwa_meme/{sample}.log",
params:
extra=r"-R '@RG\tID:{sample}\tSM:{sample}' -M",
sort="samtools", # Can be 'none' or 'samtools'.
sort="samtools", # Can be 'none' or 'samtools or picard'.
sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'.
sort_extra="", # Extra args for samtools.
dedup="mark", # Can be 'none' (default), 'mark' or 'remove'.
Expand All @@ -51,3 +52,39 @@ rule bwa_meme_mem:
threads: 8
wrapper:
"master/bio/bwa-meme/mem"


# [hide]
# [hide]
rule bwa_meme_mem_picard: #[hide]
input: #[hide]
reads=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], #[hide]
reference="genome.fasta", #[hide]
idx=multiext(
"genome.fasta",
".0123",
".amb",
".ann",
".pac",
".pos_packed",
".suffixarray_uint64",
".suffixarray_uint64_L0_PARAMETERS",
".suffixarray_uint64_L1_PARAMETERS",
".suffixarray_uint64_L2_PARAMETERS",
),
output: #[hide]
"mapped/{sample}.picard.cram", #[hide]
log: #[hide]
"logs/bwa_meme/{sample}.log", #[hide]
params: #[hide]
extra=r"-R '@RG\tID:{sample}\tSM:{sample}' -M",
sort="picard", #[hide]
sort_order="coordinate", #[hide]
sort_extra="", #[hide]
dedup="mark", #[hide]
dedup_extra="-M", #[hide]
exceed_thread_limit=True, #[hide]
embed_ref=True, #[hide]
threads: 8 #[hide]
wrapper: #[hide]
"master/bio/bwa-meme/mem" #[hide]
Binary file added bio/bwa-meme/mem/test/genome.fasta.0123
Binary file not shown.
1 change: 1 addition & 0 deletions bio/bwa-meme/mem/test/genome.fasta.amb
@@ -0,0 +1 @@
20 1 0
3 changes: 3 additions & 0 deletions bio/bwa-meme/mem/test/genome.fasta.ann
@@ -0,0 +1,3 @@
20 1 11
0 Sheila (null)
0 20 0
1 change: 1 addition & 0 deletions bio/bwa-meme/mem/test/genome.fasta.fai
@@ -0,0 +1 @@
Sheila 20 8 20 21
Binary file added bio/bwa-meme/mem/test/genome.fasta.pac
Binary file not shown.
Binary file added bio/bwa-meme/mem/test/genome.fasta.pos_packed
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
4 changes: 4 additions & 0 deletions bio/bwa-meme/mem/test/gzip.yaml
@@ -0,0 +1,4 @@
channels:
- conda-forge
dependencies:
- gzip ==1.12
25 changes: 7 additions & 18 deletions bio/bwa-meme/mem/wrapper.py
Expand Up @@ -77,13 +77,12 @@

# Sort alignments using samtools sort.

# elif sort == "picard":
# # Sort alignments using picard SortSam.
# pipe_cmd = (
# "picard SortSam {sort_extra} INPUT=/dev/stdin"
# " OUTPUT={snakemake.output[0]} SORT_ORDER={sort_order}"
# )

elif sort == "picard":
# Sort alignments using picard SortSam.
pipe_cmd = (
"picard SortSam {sort_extra} -I /dev/stdin"
" -O /dev/stdout -SO {sort_order} | samtools view -h -O {output_format} -o {snakemake.output[0]} -T {reference} -@ {samtools_threads} -"
)
else:
raise ValueError("Unexpected value for params.sort ({})".format(sort))

Expand All @@ -100,26 +99,16 @@
elif dedup == "remove":
dedup_cmd = "samblaster -q -r {dedup_extra} | "

# elif sort == "picard":
# # Sort alignments using picard SortSam.
# pipe_cmd = (
# "picard SortSam {sort_extra} INPUT=/dev/stdin"
# " OUTPUT={snakemake.output[0]} SORT_ORDER={sort_order}"
# )

else:
raise ValueError("Unexpected value for params.dedup ({})".format(dedup))


duplicates = snakemake.params.get("mark_duplicates", False)


shell(
"(bwa-meme mem -7"
" -t {snakemake.threads}"
" {extra}"
" {reference}"
" {snakemake.input.reads}"
" | mbuffer -q -m 10G "
" | mbuffer -q -m 2G "
" | " + dedup_cmd + pipe_cmd + ") {log}"
)
5 changes: 1 addition & 4 deletions test.py
Expand Up @@ -1643,16 +1643,13 @@ def test_bwa_mem2_sort_samtools():
@skip_if_not_modified
def test_bwa_meme():
run(
"bio/bwa-mem2/mem",
"bio/bwa-meme/mem",
[
"snakemake",
"--cores",
"1",
"mapped/a.cram",
"--use-conda",
"-F",
"-s",
"Snakefile_samtools",
],
)

Expand Down

0 comments on commit 76174cf

Please sign in to comment.