Skip to content

Commit

Permalink
feat: support branches (e.g. plants) in ensembl wrappers for sequence…
Browse files Browse the repository at this point in the history
…, annotation, and variation download (#546)
  • Loading branch information
johanneskoester committed Aug 21, 2022
1 parent 67c11a3 commit 94d7f8e
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 9 deletions.
2 changes: 2 additions & 0 deletions bio/reference/ensembl-annotation/test/Snakefile
Expand Up @@ -6,6 +6,7 @@ rule get_annotation:
release="87",
build="GRCh37",
flavor="", # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP.
# branch="plants", # optional: specify branch
log:
"logs/get_annotation.log",
cache: True # save space and time with between workflow caching (see docs)
Expand All @@ -21,6 +22,7 @@ rule get_annotation_gz:
release="87",
build="GRCh37",
flavor="", # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP.
# branch="plants", # optional: specify branch
log:
"logs/get_annotation.log",
cache: True # save space and time with between workflow caching (see docs)
Expand Down
2 changes: 2 additions & 0 deletions bio/reference/ensembl-annotation/wrapper.py
Expand Up @@ -24,6 +24,8 @@
if release >= 81 and build == "GRCh37":
# use the special grch37 branch for new releases
branch = "grch37/"
elif snakemake.params.get("branch"):
branch = snakemake.params.branch + "/"


flavor = snakemake.params.get("flavor", "")
Expand Down
14 changes: 8 additions & 6 deletions bio/reference/ensembl-sequence/test/Snakefile
@@ -1,28 +1,30 @@
rule get_genome:
output:
"refs/genome.fasta"
"refs/genome.fasta",
params:
species="saccharomyces_cerevisiae",
datatype="dna",
build="R64-1-1",
release="98"
release="98",
log:
"logs/get_genome.log"
"logs/get_genome.log",
cache: True # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-sequence"


rule get_chromosome:
output:
"refs/chr1.fasta"
"refs/chr1.fasta",
params:
species="saccharomyces_cerevisiae",
datatype="dna",
build="R64-1-1",
release="101",
chromosome="I"
chromosome="I", # optional: restrict to chromosome
# branch="plants", # optional: specify branch
log:
"logs/get_genome.log"
"logs/get_genome.log",
cache: True # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-sequence"
2 changes: 2 additions & 0 deletions bio/reference/ensembl-sequence/wrapper.py
Expand Up @@ -16,6 +16,8 @@
if release >= 81 and build == "GRCh37":
# use the special grch37 branch for new releases
branch = "grch37/"
elif snakemake.params.get("branch"):
branch = snakemake.params.branch + "/"

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

Expand Down
1 change: 1 addition & 0 deletions bio/reference/ensembl-variation/test/Snakefile
Expand Up @@ -11,6 +11,7 @@ rule get_variation:
build="R64-1-1",
type="all", # one of "all", "somatic", "structural_variation"
# chromosome="21", # optionally constrain to chromosome, only supported for homo_sapiens
# branch="plants", # optional: specify branch
log:
"logs/get_variation.log",
cache: True # save space and time with between workflow caching (see docs)
Expand Down
9 changes: 6 additions & 3 deletions bio/reference/ensembl-variation/wrapper.py
Expand Up @@ -16,14 +16,17 @@
type = snakemake.params.type
chromosome = snakemake.params.get("chromosome", "")

if release < 98:
print("Ensembl releases <98 are unsupported.", file=open(snakemake.log[0], "w"))
exit(1)

branch = ""
if release >= 81 and build == "GRCh37":
# use the special grch37 branch for new releases
branch = "grch37/"
elif snakemake.params.get("branch"):
branch = snakemake.params.branch + "/"

if release < 98 and not branch:
print("Ensembl releases <98 are unsupported.", file=open(snakemake.log[0], "w"))
exit(1)

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

Expand Down

0 comments on commit 94d7f8e

Please sign in to comment.