Skip to content

Commit

Permalink
feat: wrapper for Manta SV caller (#415)
Browse files Browse the repository at this point in the history
* Added Manta wrapper

* Added wrapper for Manta

* Alowed free specification of output files

* Added support for BCF output

* Fixed format

* Fixed typo

* Added bcftools to environment

* Added tempdir and fixed CRAM index issue

* Fixed input BAM file option

* Fixed symlink issue

* Added more descriptive notes

* Changed name to be consistent with other wrappers

* Made code more readable

* Reverted to manta BED file

* Added BED file to test

* Added sorted output

* Removed invalid sort option

* Increased max number of open files to avoid error

* Set to change soft limit

* Revert sort

* Allow specification of max number of open files

* Added default mem value

* Code format

* Removed max_n_open_files

* Code refactoring

* Fixed bug when concatenating path and str

* Removed redundant defaults
  • Loading branch information
fgvieira committed Jan 26, 2022
1 parent 2bb9131 commit 6b4d6a1
Show file tree
Hide file tree
Showing 11 changed files with 13,585 additions and 0 deletions.
7 changes: 7 additions & 0 deletions bio/manta/environment.yaml
@@ -0,0 +1,7 @@
channels:
- bioconda
- conda-forge
- defaults
dependencies:
- manta=1.6
- bcftools=1.14
18 changes: 18 additions & 0 deletions bio/manta/meta.yaml
@@ -0,0 +1,18 @@
name: manta
description: |
Call structural variants with manta.
authors:
- Filipe G. Vieira
input:
- BAM/CRAM file(s)
- reference genome
- BED file (optional)
output:
- SVs and indels scored and genotyped under a diploid model (diploidSV.vcf.gz).
- Unfiltered SV and indel candidates (candidateSV.vcf.gz).
- Subset of the previous file containing only simple insertion and deletion variants less than the minimum scored variant size (candidateSmallIndels.vcf.gz).
notes: |
* The `extra_cfg` param allows for additional program arguments to `configManta.py`.
* The `extra_run` param allows for additional program arguments to `runWorkflow.py`.
* The `runDir` is created using pythons `tempfile`, meaning that all intermediate files are deleted on job completion
* For more information see, https://github.com/Illumina/manta
23 changes: 23 additions & 0 deletions bio/manta/test/Snakefile
@@ -0,0 +1,23 @@
rule manta:
input:
ref="human_g1k_v37_decoy.small.fasta",
samples=["mapped/a.bam"],
index=["mapped/a.bam.bai"],
bed="test.bed.gz", # optional
output:
vcf="results/out.bcf",
idx="results/out.bcf.csi",
cand_indel_vcf="results/small_indels.vcf.gz",
cand_indel_idx="results/small_indels.vcf.gz.tbi",
cand_sv_vcf="results/cand_sv.vcf.gz",
cand_sv_idx="results/cand_sv.vcf.gz.tbi",
params:
extra_cfg="", # optional
extra_run="", # optional
log:
"logs/manta.log",
threads: 2
resources:
mem_mb=4096,
wrapper:
"master/bio/manta"
13,426 changes: 13,426 additions & 0 deletions bio/manta/test/human_g1k_v37_decoy.small.fasta

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions bio/manta/test/human_g1k_v37_decoy.small.fasta.fai
@@ -0,0 +1,6 @@
1 200000 52 60 61
2 200000 203438 60 61
3 200000 406824 60 61
8 1282 610161 60 61
11 3696 611469 60 61
X 200000 615279 60 61
Binary file added bio/manta/test/mapped/a.bam
Binary file not shown.
Binary file added bio/manta/test/mapped/a.bam.bai
Binary file not shown.
Binary file added bio/manta/test/test.bed.gz
Binary file not shown.
Binary file added bio/manta/test/test.bed.gz.tbi
Binary file not shown.
100 changes: 100 additions & 0 deletions bio/manta/wrapper.py
@@ -0,0 +1,100 @@
__author__ = "Filipe G. Vieira"
__copyright__ = "Copyright 2021, Filipe G. Vieira"
__license__ = "MIT"


import math
from snakemake.shell import shell
from pathlib import Path
from tempfile import TemporaryDirectory


extra_cfg = snakemake.params.get("extra_cfg", "")
extra_run = snakemake.params.get("extra_run", "")

bed = snakemake.input.get("bed", "")
if bed:
bed = f"--callRegions {bed}"


mem_gb = snakemake.resources.get("mem_gb", "")
if not mem_gb:
# 20 Gb of mem by default
mem_gb = math.ceil(snakemake.resources.get("mem_mb", 20480) / 1024)

log = snakemake.log_fmt_shell(stdout=True, stderr=True, append=True)


with TemporaryDirectory() as tempdir:
tempdir = Path(tempdir)
run_dir = tempdir / "runDir"
bams = []

# Symlink BAM/CRAM files to avoid problems with filenames
for aln, idx in zip(snakemake.input.samples, snakemake.input.index):
aln = Path(aln)
idx = Path(idx)
(tempdir / aln.name).symlink_to(aln.resolve())
bams.append(tempdir / aln.name)

if idx.name.endswith(".bam.bai") or idx.name.endswith(".cram.crai"):
(tempdir / idx.name).symlink_to(idx.resolve())
if idx.name.endswith(".bai"):
(tempdir / idx.name).with_suffix(".bam.bai").symlink_to(idx.resolve())
elif idx.name.endswith(".crai"):
(tempdir / idx.name).with_suffix(".cram.crai").symlink_to(idx.resolve())
else:
raise ValueError(f"invalid index file name provided: {idx}")

bams = list(map("--normalBam {}".format, bams))

shell(
# Configure Manta
"configManta.py {extra_cfg} {bams} --referenceFasta {snakemake.input.ref} {bed} --runDir {run_dir} {log}; "
# Run Manta
"python2 {run_dir}/runWorkflow.py {extra_run} --jobs {snakemake.threads} --memGb {mem_gb} {log}; "
)

# Copy outputs into proper position.
def infer_vcf_ext(vcf):
if vcf.endswith(".vcf.gz"):
return "z"
elif vcf.endswith(".bcf"):
return "b"
else:
raise ValueError(
"invalid VCF extension. Only '.vcf.gz' and '.bcf' are supported."
)

def copy_vcf(origin_vcf, dest_vcf, dest_idx):
if dest_vcf and dest_vcf != origin_vcf:
dest_vcf_format = infer_vcf_ext(dest_vcf)
shell(
"bcftools view --threads {snakemake.threads} --output {dest_vcf:q} --output-type {dest_vcf_format} {origin_vcf:q} {log}"
)

origin_idx = str(origin_vcf) + ".tbi"
if dest_idx and dest_idx != origin_idx:
shell(
"bcftools index --threads {snakemake.threads} --output {dest_idx:q} {dest_vcf:q} {log}"
)

results_base = run_dir / "results" / "variants"

# Copy main VCF output
vcf_temp = results_base / "diploidSV.vcf.gz"
vcf_final = snakemake.output.get("vcf")
idx_final = snakemake.output.get("idx")
copy_vcf(vcf_temp, vcf_final, idx_final)

# Copy candidate small indels VCF
cand_indel_vcf_temp = results_base / "candidateSmallIndels.vcf.gz"
cand_indel_vcf_final = snakemake.output.get("cand_indel_vcf")
cand_indel_idx_final = snakemake.output.get("cand_indel_idx")
copy_vcf(cand_indel_vcf_temp, cand_indel_vcf_final, cand_indel_idx_final)

# Copy candidates structural variants VCF
cand_sv_vcf_temp = results_base / "candidateSV.vcf.gz"
cand_sv_vcf_final = snakemake.output.get("cand_sv_vcf")
cand_sv_idx_final = snakemake.output.get("cand_sv_idx")
copy_vcf(cand_sv_vcf_temp, cand_sv_vcf_final, cand_sv_idx_final)
5 changes: 5 additions & 0 deletions test.py
Expand Up @@ -2858,6 +2858,11 @@ def test_delly():
run("bio/delly", ["snakemake", "--cores", "1", "sv/calls.bcf", "--use-conda", "-F"])


@skip_if_not_modified
def test_manta():
run("bio/manta", ["snakemake", "--cores", "2", "results/out.bcf", "--use-conda", "-F"])


@skip_if_not_modified
def test_jannovar():
run(
Expand Down

0 comments on commit 6b4d6a1

Please sign in to comment.