From 8236e80794d0f9c9670238ba168770c0947e8379 Mon Sep 17 00:00:00 2001 From: Kim Date: Tue, 19 Apr 2022 12:12:00 +0200 Subject: [PATCH] feat: Allow paramspace to separate filename params with custom separator (#1299) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Allow paramspace to separate filename params with custom separator * Add test for 'filename_sep' * Add missing files * Use filename separator in tests which hopefully works better on windows Co-authored-by: Johannes Köster --- snakemake/utils.py | 18 ++++++-- tests/test_paramspace/Snakefile | 42 ++++++++++++++---- .../plots/alpha~1.0__beta~0.1__gamma~0.99.pdf | Bin .../plots/alpha~2.0__beta~0.0__gamma~3.9.pdf | Bin .../alpha~1.0__beta~0.1__gamma~0.99.tsv | 1 + .../alpha~2.0__beta~0.0__gamma~3.9.tsv | 1 + 6 files changed, 50 insertions(+), 12 deletions(-) create mode 100644 tests/test_paramspace/expected-results/results/filenamesep/plots/alpha~1.0__beta~0.1__gamma~0.99.pdf create mode 100644 tests/test_paramspace/expected-results/results/filenamesep/plots/alpha~2.0__beta~0.0__gamma~3.9.pdf create mode 100644 tests/test_paramspace/expected-results/results/filenamesep/simulations/alpha~1.0__beta~0.1__gamma~0.99.tsv create mode 100644 tests/test_paramspace/expected-results/results/filenamesep/simulations/alpha~2.0__beta~0.0__gamma~3.9.tsv diff --git a/snakemake/utils.py b/snakemake/utils.py index c833e13ed..a846d028e 100644 --- a/snakemake/utils.py +++ b/snakemake/utils.py @@ -608,7 +608,7 @@ class Paramspace: By default, a directory structure with on folder level per parameter is created (e.g. column1~{column1}/column2~{column2}/***). - The exact behavior can be tweaked with two parameters: + The exact behavior can be tweaked with three parameters: - ``filename_params`` takes a list of column names of the passed dataframe. These names are used to build the filename (separated by '_') in the order @@ -622,16 +622,26 @@ class Paramspace: If ``filename_params="*"``, all columns of the dataframe are encoded into the filename instead of parent directories. - - ``param_sep`` takes a string that is used to join the column name and + - ``param_sep`` takes a string which is used to join the column name and column value in the generated paths (Default: '~'). Example: | ``Paramspace(df, param_sep=":")`` -> | column1:{value1}/column2:{value2}/column3:{value3}/column4:{value4} + + - ``filename_sep`` takes a string which is used to join the parameter + entries listed in ``filename_params`` in the generated paths + (Default: '_'). Example: + + | ``Paramspace(df, filename_params="*", filename_sep="-")`` -> + | column1~{value1}-column2~{value2}-column3~{value3}-column4~{value4} """ - def __init__(self, dataframe, filename_params=None, param_sep="~"): + def __init__( + self, dataframe, filename_params=None, param_sep="~", filename_sep="_" + ): self.dataframe = dataframe self.param_sep = param_sep + self.filename_sep = filename_sep if filename_params is None or not filename_params: # create a pattern of the form {}/{}/{} with one entry for each # column in the dataframe @@ -653,7 +663,7 @@ def __init__(self, dataframe, filename_params=None, param_sep="~"): self.pattern = "/".join( [r"{}"] * (len(self.dataframe.columns) - len(filename_params) + 1) ) - self.pattern = "_".join( + self.pattern = self.filename_sep.join( [self.pattern] + [r"{}"] * (len(filename_params) - 1) ) self.ordered_columns = [ diff --git a/tests/test_paramspace/Snakefile b/tests/test_paramspace/Snakefile index a42d50d0a..a71004814 100644 --- a/tests/test_paramspace/Snakefile +++ b/tests/test_paramspace/Snakefile @@ -2,34 +2,41 @@ from snakemake.utils import Paramspace import pandas as pd -# shold result in alpha~{alpha}/beta~{beta}/gamma~{gamma} +# should result in alpha~{alpha}/beta~{beta}/gamma~{gamma} paramspace_default = Paramspace(pd.read_csv("params.tsv", sep="\t")) -# shold result in alpha~{alpha}/beta~{beta}/gamma~{gamma} +# should result in alpha~{alpha}/beta~{beta}/gamma~{gamma} paramspace_empty = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=[]) -# shold result in alpha~{alpha}/gamma~{gamma}/beta~{beta} +# should result in alpha~{alpha}/gamma~{gamma}/beta~{beta} paramspace_one = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=["beta"]) -# shold result in alpha~{alpha}/beta~{beta}_gamma~{gamma} +# should result in alpha~{alpha}/beta~{beta}_gamma~{gamma} paramspace_two = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=["beta", "gamma"]) -# shold result in alpha~{alpha}_beta~{beta}_gamma~{gamma} +# should result in alpha~{alpha}_beta~{beta}_gamma~{gamma} paramspace_full = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=["alpha", "beta", "gamma"]) -# shold result in beta~{beta}_gamma~{gamma}_alpha~{alpha} +# should result in beta~{beta}_gamma~{gamma}_alpha~{alpha} paramspace_full_reorder = Paramspace(pd.read_csv("params.tsv", sep="\t"), filename_params=["beta", "gamma", "alpha"]) -# shold result in alpha:{alpha}/beta:{beta}/gamma:{gamma} +# should result in alpha:{alpha}/beta:{beta}/gamma:{gamma} paramspace_sep = Paramspace(pd.read_csv("params.tsv", sep="\t"), param_sep="_is_") -# shold result in beta={beta}_gamma={gamma}_alpha={alpha} +# should result in beta={beta}_gamma={gamma}_alpha={alpha} paramspace_sep_and_pattern = Paramspace( pd.read_csv("params.tsv", sep="\t"), filename_params=["beta", "gamma", "alpha"], param_sep="=", ) +# should result in alpha~{alpha}::beta~{beta}::gamma~{gamma} +paramspace_filenamesep = Paramspace( + pd.read_csv("params.tsv", sep="\t"), + filename_params="*", + filename_sep="__", +) + rule all: input: @@ -41,6 +48,7 @@ rule all: expand("results/full_reorder/plots/{params}.pdf", params=paramspace_full_reorder.instance_patterns), expand("results/sep/plots/{params}.pdf", params=paramspace_sep.instance_patterns), expand("results/sep_and_pattern/plots/{params}.pdf", params=paramspace_sep_and_pattern.instance_patterns), + expand("results/filenamesep/plots/{params}.pdf", params=paramspace_filenamesep.instance_patterns), rule simulate_default: @@ -185,3 +193,21 @@ rule plot_sep_and_pattern: f"results/sep_and_pattern/plots/{paramspace_sep_and_pattern.wildcard_pattern}.pdf" shell: "touch {output}" + + +rule simulate_filenamesep: + output: + f"results/filenamesep/simulations/{paramspace_filenamesep.wildcard_pattern}.tsv" + params: + simulation=paramspace_filenamesep.instance + script: + "scripts/simulate.py" + + +rule plot_filenamesep: + input: + f"results/filenamesep/simulations/{paramspace_filenamesep.wildcard_pattern}.tsv" + output: + f"results/filenamesep/plots/{paramspace_filenamesep.wildcard_pattern}.pdf" + shell: + "touch {output}" diff --git a/tests/test_paramspace/expected-results/results/filenamesep/plots/alpha~1.0__beta~0.1__gamma~0.99.pdf b/tests/test_paramspace/expected-results/results/filenamesep/plots/alpha~1.0__beta~0.1__gamma~0.99.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/test_paramspace/expected-results/results/filenamesep/plots/alpha~2.0__beta~0.0__gamma~3.9.pdf b/tests/test_paramspace/expected-results/results/filenamesep/plots/alpha~2.0__beta~0.0__gamma~3.9.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/test_paramspace/expected-results/results/filenamesep/simulations/alpha~1.0__beta~0.1__gamma~0.99.tsv b/tests/test_paramspace/expected-results/results/filenamesep/simulations/alpha~1.0__beta~0.1__gamma~0.99.tsv new file mode 100644 index 000000000..4ba5e2159 --- /dev/null +++ b/tests/test_paramspace/expected-results/results/filenamesep/simulations/alpha~1.0__beta~0.1__gamma~0.99.tsv @@ -0,0 +1 @@ +{'alpha': 1.0, 'beta': 0.1, 'gamma': 0.99} diff --git a/tests/test_paramspace/expected-results/results/filenamesep/simulations/alpha~2.0__beta~0.0__gamma~3.9.tsv b/tests/test_paramspace/expected-results/results/filenamesep/simulations/alpha~2.0__beta~0.0__gamma~3.9.tsv new file mode 100644 index 000000000..0c2073ac0 --- /dev/null +++ b/tests/test_paramspace/expected-results/results/filenamesep/simulations/alpha~2.0__beta~0.0__gamma~3.9.tsv @@ -0,0 +1 @@ +{'alpha': 2.0, 'beta': 0.0, 'gamma': 3.9}