Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Allow paramspace to separate filename params with custom separator #1299

Merged
merged 5 commits into from Apr 19, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 14 additions & 4 deletions snakemake/utils.py
Expand Up @@ -608,7 +608,7 @@ class Paramspace:
By default, a directory structure with on folder level per parameter is created
(e.g. column1~{column1}/column2~{column2}/***).

The exact behavior can be tweeked with two parameters:
The exact behavior can be tweaked with three parameters:

- ``filename_params`` takes a list of column names of the passed dataframe.
These names are used to build the filename (separated by '_') in the order
Expand All @@ -623,15 +623,25 @@ class Paramspace:
the filename instead of parent directories.

- ``param_sep`` takes a string which is used to join the column name and
column value in the genrated paths (Default: '~'). Example:
column value in the generated paths (Default: '~'). Example:

| ``Paramspace(df, param_sep=":")`` ->
| column1:{value1}/column2:{value2}/column3:{value3}/column4:{value4}

- ``filename_sep`` takes a string which is used to join the parameter
entries listed in ``filename_params`` in the generated paths
(Default: '_'). Example:

| ``Paramspace(df, filename_params="*", filename_sep="-")`` ->
| column1~{value1}-column2~{value2}-column3~{value3}-column4~{value4}
"""

def __init__(self, dataframe, filename_params=None, param_sep="~"):
def __init__(
self, dataframe, filename_params=None, param_sep="~", filename_sep="_"
):
self.dataframe = dataframe
self.param_sep = param_sep
self.filename_sep = filename_sep
if filename_params is None or not filename_params:
# create a pattern of the form {}/{}/{} with one entry for each
# column in the dataframe
Expand All @@ -653,7 +663,7 @@ def __init__(self, dataframe, filename_params=None, param_sep="~"):
self.pattern = "/".join(
[r"{}"] * (len(self.dataframe.columns) - len(filename_params) + 1)
)
self.pattern = "_".join(
self.pattern = self.filename_sep.join(
[self.pattern] + [r"{}"] * (len(filename_params) - 1)
)
self.ordered_columns = [
Expand Down
42 changes: 34 additions & 8 deletions tests/test_paramspace/Snakefile
Expand Up @@ -2,34 +2,41 @@ from snakemake.utils import Paramspace
import pandas as pd


# shold result in alpha~{alpha}/beta~{beta}/gamma~{gamma}
# should result in alpha~{alpha}/beta~{beta}/gamma~{gamma}
paramspace_default = Paramspace(pd.read_csv("params.tsv", sep="\t"))

# shold result in alpha~{alpha}/beta~{beta}/gamma~{gamma}
# should result in alpha~{alpha}/beta~{beta}/gamma~{gamma}
paramspace_empty = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=[])

# shold result in alpha~{alpha}/gamma~{gamma}/beta~{beta}
# should result in alpha~{alpha}/gamma~{gamma}/beta~{beta}
paramspace_one = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=["beta"])

# shold result in alpha~{alpha}/beta~{beta}_gamma~{gamma}
# should result in alpha~{alpha}/beta~{beta}_gamma~{gamma}
paramspace_two = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=["beta", "gamma"])

# shold result in alpha~{alpha}_beta~{beta}_gamma~{gamma}
# should result in alpha~{alpha}_beta~{beta}_gamma~{gamma}
paramspace_full = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=["alpha", "beta", "gamma"])

# shold result in beta~{beta}_gamma~{gamma}_alpha~{alpha}
# should result in beta~{beta}_gamma~{gamma}_alpha~{alpha}
paramspace_full_reorder = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=["beta", "gamma", "alpha"])

# shold result in alpha:{alpha}/beta:{beta}/gamma:{gamma}
# should result in alpha:{alpha}/beta:{beta}/gamma:{gamma}
paramspace_sep = Paramspace(pd.read_csv("params.tsv", sep="\t"), param_sep="_is_")

# shold result in beta={beta}_gamma={gamma}_alpha={alpha}
# should result in beta={beta}_gamma={gamma}_alpha={alpha}
paramspace_sep_and_pattern = Paramspace(
pd.read_csv("params.tsv", sep="\t"),
filename_params=["beta", "gamma", "alpha"],
param_sep="=",
)

# should result in alpha~{alpha}::beta~{beta}::gamma~{gamma}
paramspace_filenamesep = Paramspace(
pd.read_csv("params.tsv", sep="\t"),
filename_params="*",
filename_sep="__",
)


rule all:
input:
Expand All @@ -41,6 +48,7 @@ rule all:
expand("results/full_reorder/plots/{params}.pdf", params=paramspace_full_reorder.instance_patterns),
expand("results/sep/plots/{params}.pdf", params=paramspace_sep.instance_patterns),
expand("results/sep_and_pattern/plots/{params}.pdf", params=paramspace_sep_and_pattern.instance_patterns),
expand("results/filenamesep/plots/{params}.pdf", params=paramspace_filenamesep.instance_patterns),


rule simulate_default:
Expand Down Expand Up @@ -185,3 +193,21 @@ rule plot_sep_and_pattern:
f"results/sep_and_pattern/plots/{paramspace_sep_and_pattern.wildcard_pattern}.pdf"
shell:
"touch {output}"


rule simulate_filenamesep:
output:
f"results/filenamesep/simulations/{paramspace_filenamesep.wildcard_pattern}.tsv"
params:
simulation=paramspace_filenamesep.instance
script:
"scripts/simulate.py"


rule plot_filenamesep:
input:
f"results/filenamesep/simulations/{paramspace_filenamesep.wildcard_pattern}.tsv"
output:
f"results/filenamesep/plots/{paramspace_filenamesep.wildcard_pattern}.pdf"
shell:
"touch {output}"
@@ -0,0 +1 @@
{'alpha': 1.0, 'beta': 0.1, 'gamma': 0.99}
@@ -0,0 +1 @@
{'alpha': 2.0, 'beta': 0.0, 'gamma': 3.9}