Skip to content

Commit

Permalink
feat: output aln is saved from stdout, to reduce I/O (#502)
Browse files Browse the repository at this point in the history
Co-authored-by: Johannes Köster <johannes.koester@uni-due.de>
  • Loading branch information
fgvieira and johanneskoester committed Aug 15, 2022
1 parent 3a20eb7 commit 6695486
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 28 deletions.
2 changes: 1 addition & 1 deletion bio/star/align/meta.yaml
@@ -1,5 +1,6 @@
name: "star"
description: Map reads with STAR.
url: https://github.com/alexdobin/STAR
authors:
- Johannes Köster
- Tomás Di Domenico
Expand All @@ -9,4 +10,3 @@ notes: |
* It is advisable to consider updating the limits setting before running STAR,
such as executing `ulimit -n 10000`, to avoid an issue like this:
https://github.com/alexdobin/STAR/issues/1344
* For more information see, https://github.com/alexdobin/STAR
24 changes: 13 additions & 11 deletions bio/star/align/test/Snakefile
Expand Up @@ -5,15 +5,16 @@ rule star_pe_multi:
fq1=["reads/{sample}_R1.1.fastq", "reads/{sample}_R1.2.fastq"],
# paired end reads needs to be ordered so each item in the two lists match
fq2=["reads/{sample}_R2.1.fastq", "reads/{sample}_R2.2.fastq"], #optional
# path to STAR reference genome index
idx="index",
output:
# see STAR manual for additional output files
sam="star/pe/{sample}/Aligned.out.sam",
log="star/pe/{sample}/Log.out",
aln="star/pe/{sample}/pe_aligned.sam",
log="logs/pe/{sample}/Log.out",
sj="star/pe/{sample}/SJ.out.tab",
log:
"logs/star/pe/{sample}.log",
"logs/pe/{sample}.log",
params:
# path to STAR reference genome index
idx="index",
# optional parameters
extra="",
threads: 8
Expand All @@ -24,17 +25,18 @@ rule star_pe_multi:
rule star_se:
input:
fq1="reads/{sample}_R1.1.fastq",
# path to STAR reference genome index
idx="index",
output:
# see STAR manual for additional output files
sam="star/se/{sample}/Aligned.out.sam",
log="star/se/{sample}/Log.out",
aln="star/se/{sample}/se_aligned.bam",
log="logs/se/{sample}/Log.out",
log_final="logs/se/{sample}/Log.final.out",
log:
"logs/star/se/{sample}.log",
"logs/se/{sample}.log",
params:
# path to STAR reference genome index
idx="index",
# optional parameters
extra="",
extra="--outSAMtype BAM Unsorted",
threads: 8
wrapper:
"master/bio/star/align"
33 changes: 20 additions & 13 deletions bio/star/align/wrapper.py
Expand Up @@ -8,8 +8,10 @@
import tempfile
from snakemake.shell import shell


extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
log = snakemake.log_fmt_shell(stdout=False, stderr=True)


fq1 = snakemake.input.get("fq1")
assert fq1 is not None, "input-> fq1 is a required input parameter"
Expand All @@ -32,15 +34,28 @@
input_str_fq2 = ",".join(fq2) if fq2 is not None else ""
input_str = " ".join([input_str_fq1, input_str_fq2])


if fq1[0].endswith(".gz"):
readcmd = "--readFilesCommand zcat"
readcmd = "--readFilesCommand gunzip -c"
elif fq1[0].endswith(".bz2"):
readcmd = "--readFilesCommand bunzip2 -c"
else:
readcmd = ""


index = snakemake.input.get("idx")
if not index:
index = snakemake.params.get("idx", "")


if "--outSAMtype BAM SortedByCoordinate" in extra:
stdout = "BAM_SortedByCoordinate"
elif "BAM Unsorted" in extra:
stdout = "BAM_Unsorted"
else:
stdout = "SAM"


with tempfile.TemporaryDirectory() as tmpdir:
shell(
"STAR "
Expand All @@ -49,21 +64,13 @@
" --readFilesIn {input_str}"
" {readcmd}"
" {extra}"
" --outTmpDir {tmpdir}/temp"
" --outTmpDir {tmpdir}/STARtmp"
" --outFileNamePrefix {tmpdir}/"
" --outStd Log "
" --outStd {stdout}"
" > {snakemake.output.aln}"
" {log}"
)

if "SortedByCoordinate" in extra:
bamprefix = "Aligned.sortedByCoord.out"
else:
bamprefix = "Aligned.out"

if snakemake.output.get("bam"):
shell("cat {tmpdir}/{bamprefix}.bam > {snakemake.output.bam:q}")
if snakemake.output.get("sam"):
shell("cat {tmpdir}/{bamprefix}.sam > {snakemake.output.sam:q}")
if snakemake.output.get("reads_per_gene"):
shell("cat {tmpdir}/ReadsPerGene.out.tab > {snakemake.output.reads_per_gene:q}")
if snakemake.output.get("chim_junc"):
Expand Down
2 changes: 1 addition & 1 deletion meta/bio/star_arriba/test/Snakefile
Expand Up @@ -23,7 +23,7 @@ rule star_align:
idx="resources/star_genome",
output:
# see STAR manual for additional output files
bam="star/{sample}/Aligned.out.bam",
aln="star/{sample}/Aligned.out.bam",
reads_per_gene="star/{sample}/ReadsPerGene.out.tab",
log:
"logs/star/{sample}.log",
Expand Down
4 changes: 2 additions & 2 deletions test.py
Expand Up @@ -2718,11 +2718,11 @@ def test_star_align():

run(
"bio/star/align",
["snakemake", "--cores", "1", "star/se/a/Aligned.out.sam", "--use-conda", "-F"],
["snakemake", "--cores", "1", "star/se/a/se_aligned.bam", "--use-conda", "-F"],
)
run(
"bio/star/align",
["snakemake", "--cores", "1", "star/pe/a/Aligned.out.sam", "--use-conda", "-F"],
["snakemake", "--cores", "1", "star/pe/a/pe_aligned.sam", "--use-conda", "-F"],
)


Expand Down

0 comments on commit 6695486

Please sign in to comment.