Skip to content

Commit

Permalink
feat: Wrapper and meta-wrapper for rbt consensus reads (#544)
Browse files Browse the repository at this point in the history
* calc consensus wrapper

* Add meta wrapper

* snakefmt

* Hide rule

* Format wrapper

* move function

* rename

* Add test

* Update dir

* Update tests

* Update paths

* cleanup

* fix typos

* Update environment.yaml

* restructuring

* fix filenames and rule order

Co-authored-by: Johannes Köster <johannes.koester@uni-due.de>
  • Loading branch information
FelixMoelder and johanneskoester committed Aug 29, 2022
1 parent aa92df8 commit 6736211
Show file tree
Hide file tree
Showing 16 changed files with 778,720 additions and 0 deletions.
6 changes: 6 additions & 0 deletions bio/rbt/collapse_reads_to_fragments-bam/environment.yaml
@@ -0,0 +1,6 @@
channels:
- bioconda
- conda-forge
- nodefaults
dependencies:
- rust-bio-tools =0.41
6 changes: 6 additions & 0 deletions bio/rbt/collapse_reads_to_fragments-bam/meta.yaml
@@ -0,0 +1,6 @@
name: rbt collapse-reads-to-fragments bam
description: |
Calculate consensus reads from read groups marked by PicardTools MarkDuplicates or UmiAwareMarkDuplicatesWithMateCigar.
url: https://github.com/rust-bio/rust-bio-tools
authors:
- Felix Mölder
19 changes: 19 additions & 0 deletions bio/rbt/collapse_reads_to_fragments-bam/test/Snakefile
@@ -0,0 +1,19 @@
rule all: # [hide]
input: # [hide]
"results/consensus/sampleA.1.fq", # [hide]


rule calc_consensus_reads:
input:
"mapped/{sample}.marked.bam",
output:
consensus_r1="results/consensus/{sample}.1.fq",
consensus_r2="results/consensus/{sample}.2.fq",
consensus_se="results/consensus/{sample}.se.fq",
skipped="results/consensus/{sample}.skipped.bam",
params:
extra="--verbose-read-names",
log:
"logs/consensus/{sample}.log",
wrapper:
"master/bio/rbt/collapse_reads_to_fragments-bam"
Binary file not shown.
14 changes: 14 additions & 0 deletions bio/rbt/collapse_reads_to_fragments-bam/wrapper.py
@@ -0,0 +1,14 @@
__author__ = "Felix Mölder"
__copyright__ = "Copyright 2022, Felix Mölder"
__email__ = "felix.moelder@uk-essen.de"
__license__ = "MIT"

from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

extra = snakemake.params.extra

shell(
"rbt collapse-reads-to-fragments bam {extra} {snakemake.input[0]} {snakemake.output} {log}"
)
5 changes: 5 additions & 0 deletions meta/bio/rbt_calc_consensus/meta.yaml
@@ -0,0 +1,5 @@
name: calc_consensus_reads
description: |
Performs consensus read calculation on a marked bam and merges single,
paired and skipped-consensus reads into a single sorted bam file.
author: Felix Mölder
91 changes: 91 additions & 0 deletions meta/bio/rbt_calc_consensus/test/Snakefile
@@ -0,0 +1,91 @@
rule calc_consensus_reads:
input:
# sorted bam file
"mapped/{sample}.marked.bam",
output:
# non-overlapping consensus read pairs will be written into consensus_r1 and consensus_r2
consensus_r1=temp("results/consensus_reads/{sample}.1.fq"),
consensus_r2=temp("results/consensus_reads/{sample}.2.fq"),
# consensus reads from single end records or overlapping read pairs will be merged into a single end record
consensus_se=temp("results/consensus_reads/{sample}.se.fq"),
# skipped reads (soft-clipped or unpropper mapped reads) will be skipped and unmarked
skipped=temp("results/consensus_reads/{sample}.skipped.bam"),
params:
extra="",
log:
"logs/consensus/{sample}.log",
wrapper:
"master/bio/rbt/collapse_reads_to_fragments-bam"


rule map_consensus_reads:
input:
reads=lambda wc: expand(
"results/consensus_reads/{sample}.{read}.fq",
sample=wc.sample,
read="se" if wc.read_type == "se" else (1, 2),
),
idx=multiext("resources/genome.fa", ".amb", ".ann", ".bwt", ".pac", ".sa"),
output:
temp("results/consensus_mapped/{sample}.{read_type}.bam"),
params:
index=lambda w, input: os.path.splitext(input.idx[0])[0],
sort="samtools",
sort_order="coordinate",
wildcard_constraints:
read_type="pe|se",
log:
"logs/bwa_mem/{sample}.{read_type}.consensus.log",
threads: 8
wrapper:
"master/bio/bwa/mem"


rule sort_skipped_reads:
input:
"results/consensus_reads/{sample}.skipped.bam",
output:
temp("results/consensus_reads/{sample}.skipped.sorted.bam"),
params:
extra="-m 4G",
tmp_dir="/tmp/",
log:
"logs/sort_consensus/{sample}.log",
# Samtools takes additional threads through its option -@
threads: 8 # This value - 1 will be sent to -@.
wrapper:
"master/bio/samtools/sort"


rule mark_duplicates_skipped:
input:
bams=["results/consensus_reads/{sample}.skipped.sorted.bam"],
output:
bam=temp("results/consensus_dupmarked/{sample}.skipped.marked.bam"),
metrics="results/consensus_dupmarked/{sample}.skipped.metrics.txt",
log:
"logs/picard/marked/{sample}.log",
params:
extra="--VALIDATION_STRINGENCY LENIENT --TAG_DUPLICATE_SET_MEMBERS true",
# optional specification of memory usage of the JVM that snakemake will respect with global
# resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
# and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
# https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
resources:
mem_mb=1024,
wrapper:
"master/bio/picard/markduplicates"


rule merge_consensus_reads:
input:
"results/consensus_dupmarked/{sample}.skipped.marked.bam",
"results/consensus_mapped/{sample}.se.bam",
"results/consensus_mapped/{sample}.pe.bam",
output:
"results/consensus/{sample}.bam",
log:
"logs/samtools_merge/{sample}.log",
threads: 8
wrapper:
"master/bio/samtools/merge"
Binary file not shown.

0 comments on commit 6736211

Please sign in to comment.