diff --git a/docs/snakefiles/deployment.rst b/docs/snakefiles/deployment.rst index 4e67cefed..7da0a1547 100644 --- a/docs/snakefiles/deployment.rst +++ b/docs/snakefiles/deployment.rst @@ -115,6 +115,44 @@ For example, we can easily add another rule to extend the given workflow: "notebooks/plot-vafs.py.ipynb" Moreover, it is possible to further extend the workflow with other modules, thereby generating an integrative analysis. +Here, let us assume that we want to conduct another kind of analysis, say RNA-seq, using a different external workflow. +We can extend above example in the following way: + +.. code-block:: python + + from snakemake.utils import min_version + min_version("6.0") + + configfile: "config/config.yaml" + + module dna_seq: + snakefile: + github("snakemake-workflows/dna-seq-gatk-variant-calling", path="workflow/Snakefile" tag="v2.0.1") + config: config["dna-seq"] + prefix: "dna-seq" + + use rule * from dna_seq as dna_seq_* + + rule plot_vafs: + input: + "filtered/all.vcf.gz" + output: + "results/plots/vafs.svg" + notebook: + "notebooks/plot-vafs.py.ipynb" + + module rna_seq: + snakefile: + github("snakemake-workflows/rna-seq-kallisto-sleuth", path="workflow/Snakefile" tag="v2.0.1") + config: config["rna-seq"] + prefix: "rna-seq" + + use rule * from rna_seq as rna_seq_* + +Above, several things have changed. First, we have added another module ``rna_seq``. +Second, we have added a prefix to all rule names of both modules (``dna_seq_*`` and ``rna_seq_*`` in the ``use rule`` statements) in order to avoid rule name clashes. +Third, we have added a prefix to all non-absolute input and output file names of both modules (``prefix: "dna-seq"`` and ``prefix: "rna-seq"``) in order to avoid file name clashes. +Finally, we provide the config of the two modules via two separate sections in the common config file (``config["dna-seq"]`` and ``config["rna-seq"]``). ---------------------------------- Uploading workflows to WorkflowHub diff --git a/docs/snakefiles/modularization.rst b/docs/snakefiles/modularization.rst index 128ee9e23..5593f189b 100644 --- a/docs/snakefiles/modularization.rst +++ b/docs/snakefiles/modularization.rst @@ -159,6 +159,8 @@ It is possible to overwrite the global config dictionary for the module, which i In this case, any ``configfile`` statements inside the module are ignored. In addition, it is possible to skip any :ref:`validation ` statements in the module, by specifying ``skip_validation: True`` in the module statment. +Moreover, one can automatically move all relative input and output files of a module into a dedicated folder: by specifying ``prefix: "foo"`` in the module definition, e.g. any output file ``path/to/output.txt`` in the module would be stored under ``foo/path/to/output.txt`` instead. +This becomes particularly usefull when combining multiple modules, see :ref:`use_with_modules`. Instead of using all rules, it is possible to import specific rules. Specific rules may even be modified before using them, via a final ``with:`` followed by a block that lists items to overwrite. @@ -186,7 +188,7 @@ Note that the second use statement has to use the original rule name, not the on In order to overwrite the rule ``some_task`` that has been imported with the first ``use rule`` statement, it is crucial to ensure that the rule is used with the same name in the second statement, by adding an equivalent ``as`` clause (here ``other_some_task``). Otherwise, you will have two versions of the same rule, which might be unintended (a common symptom of such unintended repeated uses would be ambiguous rule exceptions thrown by Snakemake). -Of course, it is possible to combine the use of rules from multiple modules, and via modifying statements they can be rewired and reconfigured in an arbitrary way. +Of course, it is possible to combine the use of rules from multiple modules (see :ref:`use_with_modules`), and via modifying statements they can be rewired and reconfigured in an arbitrary way. .. _snakefiles-meta-wrappers: diff --git a/snakemake/common/__init__.py b/snakemake/common/__init__.py index abb31b5ab..9f85c5a40 100644 --- a/snakemake/common/__init__.py +++ b/snakemake/common/__init__.py @@ -38,7 +38,6 @@ def async_run(coroutine): loop = asyncio.get_event_loop() return loop.run_until_complete(coroutine) - else: def async_run(coroutine): diff --git a/snakemake/io.py b/snakemake/io.py index b4d192aa6..ca60a8111 100755 --- a/snakemake/io.py +++ b/snakemake/io.py @@ -96,7 +96,6 @@ def lutime(f, times): def lchmod(f, mode): os.chmod(f, mode, follow_symlinks=False) - else: def lchmod(f, mode): diff --git a/snakemake/modules.py b/snakemake/modules.py index 453747694..37164897a 100644 --- a/snakemake/modules.py +++ b/snakemake/modules.py @@ -3,6 +3,7 @@ __email__ = "johannes.koester@uni-due.de" __license__ = "MIT" +from pathlib import Path import types import re @@ -43,6 +44,7 @@ def __init__( config=None, skip_validation=False, replace_prefix=None, + prefix=None, ): self.workflow = workflow self.name = name @@ -50,7 +52,22 @@ def __init__( self.meta_wrapper = meta_wrapper self.config = config self.skip_validation = skip_validation + + if prefix is not None: + if isinstance(prefix, Path): + prefix = str(prefix) + if not isinstance(prefix, str): + raise WorkflowError( + "Prefix definition in module statement must be string or Path." + ) + if replace_prefix is not None: + raise WorkflowError( + "Module definition contains both prefix and replace_prefix. " + "Only one at a time is allowed." + ) + self.replace_prefix = replace_prefix + self.prefix = prefix def use_rules(self, rules=None, name_modifier=None, ruleinfo=None): snakefile = self.get_snakefile() @@ -66,6 +83,7 @@ def use_rules(self, rules=None, name_modifier=None, ruleinfo=None): allow_rule_overwrite=True, namespace=self.name, replace_prefix=self.replace_prefix, + prefix=self.prefix, replace_wrapper_tag=self.get_wrapper_tag(), ): self.workflow.include(snakefile, overwrite_first_rule=True) @@ -116,6 +134,7 @@ def __init__( ruleinfo_overwrite=None, allow_rule_overwrite=False, replace_prefix=None, + prefix=None, replace_wrapper_tag=None, namespace=None, ): @@ -134,7 +153,7 @@ def __init__( self.rule_whitelist = rule_whitelist self.ruleinfo_overwrite = ruleinfo_overwrite self.allow_rule_overwrite = allow_rule_overwrite - self.path_modifier = PathModifier(replace_prefix, workflow) + self.path_modifier = PathModifier(replace_prefix, prefix, workflow) self.replace_wrapper_tag = replace_wrapper_tag self.namespace = namespace diff --git a/snakemake/parser.py b/snakemake/parser.py index d0546769f..9e0163926 100644 --- a/snakemake/parser.py +++ b/snakemake/parser.py @@ -819,6 +819,10 @@ class ModuleSnakefile(ModuleKeywordState): pass +class ModulePrefix(ModuleKeywordState): + pass + + class ModuleMetaWrapper(ModuleKeywordState): @property def keyword(self): @@ -848,6 +852,7 @@ class Module(GlobalKeywordState): config=ModuleConfig, skip_validation=ModuleSkipValidation, replace_prefix=ModuleReplacePrefix, + prefix=ModulePrefix, ) def __init__(self, snakefile, base_indent=0, dedent=0, root=True): diff --git a/snakemake/path_modifier.py b/snakemake/path_modifier.py index a1de3e860..59604cf19 100644 --- a/snakemake/path_modifier.py +++ b/snakemake/path_modifier.py @@ -12,11 +12,17 @@ class PathModifier: - def __init__(self, replace_prefix: dict, workflow): + def __init__(self, replace_prefix: dict, prefix: str, workflow): self.skip_properties = set() self.workflow = workflow self.trie = None + self.prefix = None + assert not (prefix and replace_prefix) + if prefix: + if not prefix.endswith("/"): + prefix += "/" + self.prefix = prefix if replace_prefix: import datrie @@ -46,25 +52,33 @@ def modify(self, path, property=None): return modified_path def replace_prefix(self, path, property=None): - if self.trie is None or property in self.skip_properties: + if ( + self.trie is None and self.prefix is None + ) or property in self.skip_properties: # no replacement return path - prefixes = self.trie.prefix_items(str(path)) - if len(prefixes) > 1: - # ambiguous prefixes - raise WorkflowError( - "Multiple prefixes ({}) match the path {}. Make sure that the replace_prefix statement " - "in your module definition does not yield ambiguous matches.".format( - ", ".join(prefix[0] for prefix in prefixes), path + if self.trie is not None: + prefixes = self.trie.prefix_items(str(path)) + if len(prefixes) > 1: + # ambiguous prefixes + raise WorkflowError( + "Multiple prefixes ({}) match the path {}. Make sure that the replace_prefix statement " + "in your module definition does not yield ambiguous matches.".format( + ", ".join(prefix[0] for prefix in prefixes), path + ) ) - ) - elif prefixes: - # replace prefix - prefix, replacement = prefixes[0] - return replacement + path[len(prefix) :] - else: - # no matching prefix + elif prefixes: + # replace prefix + prefix, replacement = prefixes[0] + return replacement + path[len(prefix) :] + else: + # no matching prefix + return path + # prefix case + if os.path.isabs(path) or path.startswith(".."): + # do not apply prefix if path is not within the workdir return path + return self.prefix + path def apply_default_remote(self, path): """Apply the defined default remote provider to the given path and return the updated _IOFile. diff --git a/snakemake/workflow.py b/snakemake/workflow.py index cb454f662..721fb3d24 100644 --- a/snakemake/workflow.py +++ b/snakemake/workflow.py @@ -1786,6 +1786,7 @@ def module( config=None, skip_validation=False, replace_prefix=None, + prefix=None, ): self.modules[name] = ModuleInfo( self, @@ -1795,6 +1796,7 @@ def module( config=config, skip_validation=skip_validation, replace_prefix=replace_prefix, + prefix=prefix, ) def userule(self, rules=None, from_module=None, name_modifier=None, lineno=None): diff --git a/tests/test_modules_prefix/Snakefile b/tests/test_modules_prefix/Snakefile new file mode 100644 index 000000000..daebc9d9a --- /dev/null +++ b/tests/test_modules_prefix/Snakefile @@ -0,0 +1,17 @@ +shell.executable("bash") + +configfile: "config/config.yaml" + + +module test: + snakefile: + "module-test/Snakefile" + config: + config + prefix: + "foo" + + +use rule * from test + +assert test.some_func() == 15 diff --git a/tests/test_modules_prefix/config/config.yaml b/tests/test_modules_prefix/config/config.yaml new file mode 100644 index 000000000..7ae63cb9a --- /dev/null +++ b/tests/test_modules_prefix/config/config.yaml @@ -0,0 +1 @@ +test: 1 \ No newline at end of file diff --git a/tests/test_modules_prefix/expected-results/foo/results/test.out b/tests/test_modules_prefix/expected-results/foo/results/test.out new file mode 100644 index 000000000..d00491fd7 --- /dev/null +++ b/tests/test_modules_prefix/expected-results/foo/results/test.out @@ -0,0 +1 @@ +1 diff --git a/tests/test_modules_prefix/module-test/Snakefile b/tests/test_modules_prefix/module-test/Snakefile new file mode 100644 index 000000000..7703450b6 --- /dev/null +++ b/tests/test_modules_prefix/module-test/Snakefile @@ -0,0 +1,13 @@ +configfile: "config.yaml" # does not exist, but this statement should be ignored on module import + + +def some_func(): + return 15 + + +rule a: + output: + "results/test.out", + "/tmp/foo.txt" + shell: + "echo {config[test]} > {output[0]}; touch {output[1]}" \ No newline at end of file diff --git a/tests/tests.py b/tests/tests.py index 57037cfe0..f1a4d3061 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1273,6 +1273,11 @@ def test_modules_all(): run(dpath("test_modules_all"), targets=["a"]) +@skip_on_windows +def test_modules_prefix(): + run(dpath("test_modules_prefix"), targets=["a"]) + + def test_modules_specific(): run(dpath("test_modules_specific"), targets=["test_a"])