Skip to content

Commit

Permalink
feat: added wrapper for bgzip (#195)
Browse files Browse the repository at this point in the history
* added bgzip compression

* reformatted with snakefmt, added test case

* bgzip test case updated to look for test.vcf.gz

* bgzip works in place by default

* replaced test with minimal vcf from https://www.internationalgenome.org

* default behavior is to send output to stdout

* incremented to v1.12
  • Loading branch information
williamrowell committed Jan 26, 2022
1 parent 0e323b1 commit 6d837f2
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 0 deletions.
5 changes: 5 additions & 0 deletions bio/bgzip/environment.yaml
@@ -0,0 +1,5 @@
channels:
- bioconda
- conda-forge
dependencies:
- htslib ==1.12
9 changes: 9 additions & 0 deletions bio/bgzip/meta.yaml
@@ -0,0 +1,9 @@
name: bgzip
description: Block compression/decompression utility
url: https://github.com/samtools/htslib
authors:
- William Rowell
input:
- file to be compressed or decompressed
output:
- compressed or decompressed output
12 changes: 12 additions & 0 deletions bio/bgzip/test/Snakefile
@@ -0,0 +1,12 @@
rule bgzip:
input:
"{prefix}.vcf",
output:
"{prefix}.vcf.gz",
params:
extra="", # optional
threads: 1
log:
"logs/bgzip/{prefix}.log",
wrapper:
"master/bio/bgzip"
23 changes: 23 additions & 0 deletions bio/bgzip/test/test.vcf
@@ -0,0 +1,23 @@
##fileformat=VCFv4.0
##fileDate=20090805
##source=https://www.internationalgenome.org/wiki/Analysis/vcf4.0/
##reference=1000GenomesPilot-NCBI36
##phasing=partial
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
##FILTER=<ID=q10,Description="Quality below 10">
##FILTER=<ID=s50,Description="Less than 50% of samples have data">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.
20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3
20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4
20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2
20 1234567 microsat1 GTCT G,GTACT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3
17 changes: 17 additions & 0 deletions bio/bgzip/wrapper.py
@@ -0,0 +1,17 @@
__author__ = "William Rowell"
__copyright__ = "Copyright 2020, William Rowell"
__email__ = "wrowell@pacb.com"
__license__ = "MIT"


from snakemake.shell import shell

extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

shell(
"""
(bgzip -c {extra} --threads {snakemake.threads} \
{snakemake.input} > {snakemake.output}) {log}
"""
)
8 changes: 8 additions & 0 deletions test.py
Expand Up @@ -919,6 +919,14 @@ def test_bedtools_slop():
)


@skip_if_not_modified
def test_bgzip():
run(
"bio/bgzip",
["snakemake", "--cores", "1", "test.vcf.gz", "--use-conda", "-F"]
)


@skip_if_not_modified
def test_blast_makeblastdb_nucleotide():
run(
Expand Down

0 comments on commit 6d837f2

Please sign in to comment.