Skip to content

Commit

Permalink
feat: convert gtf to gene pred output (#477)
Browse files Browse the repository at this point in the history
* Allow to convert output depending on param

* Added conversion check

Co-authored-by: Johannes Köster <johannes.koester@uni-due.de>

* Fixed default conversion

Co-authored-by: Johannes Köster <johannes.koester@uni-due.de>
  • Loading branch information
fgvieira and johanneskoester committed Apr 25, 2022
1 parent 42696c2 commit 4672e5b
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 11 deletions.
3 changes: 2 additions & 1 deletion bio/ucsc/gtfToGenePred/environment.yaml
Expand Up @@ -3,4 +3,5 @@ channels:
- conda-forge
- defaults
dependencies:
- ucsc-gtftogenepred ==377
- ucsc-gtftogenepred =377
- csvkit =1.0
5 changes: 5 additions & 0 deletions bio/ucsc/gtfToGenePred/meta.yaml
@@ -1,9 +1,14 @@
name: gtfToGenePred
description: |
Convert a GTF file to genePred format (see https://genome.ucsc.edu/FAQ/FAQformat.html#format9)
url: https://hgdownload.cse.ucsc.edu/admin/exe/
authors:
- Brett Copeland
- Filipe G. Vieira
input:
- GTF file
output:
- genePred table
notes: |
* The `extra` param allows for additional program arguments.
* The `convert_out` param allows to apply some conversions to the `refFlat` output. For example, if set to `PicardCollectRnaSeqMetrics` it makes it compatible with `Picard CollectRnaSeqMetrics` (this one also requires `extra` to be set to `-genePredExt -geneNameAsName2`).
23 changes: 19 additions & 4 deletions bio/ucsc/gtfToGenePred/test/Snakefile
@@ -1,12 +1,27 @@
rule gtfToGenePred:
input:
# annotations containing gene, transcript, exon, etc. data in GTF format
"annotation.gtf"
"annotation.gtf",
output:
"annotation.genePred"
"annotation.genePred",
log:
"logs/gtfToGenePred.log"
"logs/gtfToGenePred.log",
params:
extra="-genePredExt" # optional parameters to pass to gtfToGenePred
extra="-genePredExt", # optional parameters to pass to gtfToGenePred
wrapper:
"master/bio/ucsc/gtfToGenePred"


rule gtfToGenePred_CollectRnaSeqMetrics:
input:
# annotations containing gene, transcript, exon, etc. data in GTF format
"annotation.gtf",
output:
"annotation.PicardCollectRnaSeqMetrics.genePred",
log:
"logs/gtfToGenePred.PicardCollectRnaSeqMetrics.log",
params:
convert_out="PicardCollectRnaSeqMetrics",
extra="-genePredExt -geneNameAsName2", # optional parameters to pass to gtfToGenePred
wrapper:
"master/bio/ucsc/gtfToGenePred"
23 changes: 18 additions & 5 deletions bio/ucsc/gtfToGenePred/wrapper.py
Expand Up @@ -3,13 +3,26 @@
__email__ = "brcopeland@ucsd.edu"
__license__ = "MIT"


import os


from snakemake.shell import shell


extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
convert_out = snakemake.params.get("convert_out", "raw")
log = snakemake.log_fmt_shell(stdout=False, stderr=True)


pipes = ""
if convert_out == "raw":
pipes = ""
elif convert_out == "PicardCollectRnaSeqMetrics":
pipes += " | csvcut -t -c 12,1-10 | csvformat -T"
else:
raise ValueError(
f"Unsupported conversion mode {convert_out}. Please check wrapper documentation."
)


shell("gtfToGenePred {extra} {snakemake.input} {snakemake.output} {log}")
shell(
"(gtfToGenePred {extra} {snakemake.input} /dev/stdout {pipes} > {snakemake.output}) {log}"
)
10 changes: 9 additions & 1 deletion test.py
Expand Up @@ -4211,7 +4211,15 @@ def test_collectrnaseqmetrics():
def test_gtftogenepred():
run(
"bio/ucsc/gtfToGenePred",
["snakemake", "--cores", "1", "--use-conda", "-F"],
["snakemake", "--cores", "1", "annotation.genePred", "--use-conda", "-F"],
)


@skip_if_not_modified
def test_gtftogenepred_picard_collectrnaseqmetrics():
run(
"bio/ucsc/gtfToGenePred",
["snakemake", "--cores", "1", "annotation.PicardCollectRnaSeqMetrics.genePred", "--use-conda", "-F"],
)


Expand Down

0 comments on commit 4672e5b

Please sign in to comment.