From f0e8fa285437a02ca7edcf87334bf00cb347064a Mon Sep 17 00:00:00 2001 From: Till Hartmann Date: Thu, 12 Aug 2021 12:16:34 +0200 Subject: [PATCH] feat: Add support for rust scripts (enabling directly integrated ad-hoc robust high performance scripting) (#1053) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add support for rust scripts * add rust environment yaml * add missing files * some basic docs * clarify default dependencies * add functionality to handle cargo manifest * remove redundant continue * add some more rust script docs and restructure scripts docs * use NamedList type instead of HashMap * remove additional '--features', add indexmap/serde dependency+feature * update test-manifest.rs to use namedlist API aswell * add outer line doc testing and pin rust-script version * small fixes for rust outer doc test * format shell log string for rust-script * fmt * add missing test file * add missing rust script * replace serde-pickle with serde_json + json_typegen * fmt * only iter over positional items * fmt * add code to modify PATH, add functions for redirecting stdout, stderr, fmt and one stray fmt commit * use fully qualified names instead of use statements * update docs * remove print and todo * use ordered list instead * remove example TODO * move comment about R snakemake@source() function to the R section * update src comments * make log impl_iter and dont redirect rust-script stream * minor additions to the docs Co-authored-by: Johannes Köster Co-authored-by: Michael Hall --- docs/snakefiles/rules.rst | 182 +++++- snakemake/script.py | 439 +++++++++++++- tests/conftest.py | 1 - tests/test_expand.py | 26 +- tests/test_script.py | 558 ++++++++++++++++++ tests/test_script/Snakefile | 42 +- tests/test_script/envs/rust.yaml | 5 + .../expected-results/rust-manifest.out | 2 + .../expected-results/rust-outer-line-doc.out | 2 + tests/test_script/expected-results/rust.out | 1 + tests/test_script/scripts/test-manifest.rs | 58 ++ .../scripts/test-outer-line-doc.rs | 57 ++ tests/test_script/scripts/test.rs | 19 + tests/test_script/test2.in | 1 + 14 files changed, 1340 insertions(+), 53 deletions(-) create mode 100644 tests/test_script.py create mode 100644 tests/test_script/envs/rust.yaml create mode 100644 tests/test_script/expected-results/rust-manifest.out create mode 100644 tests/test_script/expected-results/rust-outer-line-doc.out create mode 100644 tests/test_script/expected-results/rust.out create mode 100644 tests/test_script/scripts/test-manifest.rs create mode 100644 tests/test_script/scripts/test-outer-line-doc.rs create mode 100644 tests/test_script/scripts/test.rs create mode 100644 tests/test_script/test2.in diff --git a/docs/snakefiles/rules.rst b/docs/snakefiles/rules.rst index 6dac39d38..7107cf774 100644 --- a/docs/snakefiles/rules.rst +++ b/docs/snakefiles/rules.rst @@ -581,6 +581,9 @@ External scripts A rule can also point to an external script instead of a shell command or inline Python code, e.g. +Python +~~~~~~ + .. code-block:: python rule NAME: @@ -601,29 +604,24 @@ The script path is always relative to the Snakefile containing the directive (in It is recommended to put all scripts into a subfolder ``scripts`` as above. Inside the script, you have access to an object ``snakemake`` that provides access to the same objects that are available in the ``run`` and ``shell`` directives (input, output, params, wildcards, log, threads, resources, config), e.g. you can use ``snakemake.input[0]`` to access the first input file of above rule. -Apart from Python scripts, this mechanism also allows you to integrate R_ and R Markdown_ scripts with Snakemake, e.g. - -.. _R: https://www.r-project.org -.. _Markdown: https://rmarkdown.rstudio.com +An example external Python script could look like this: .. code-block:: python - rule NAME: - input: - "path/to/inputfile", - "path/to/other/inputfile" - output: - "path/to/outputfile", - "path/to/another/outputfile" - script: - "scripts/script.R" + def do_something(data_path, out_path, threads, myparam): + # python code -In the R script, an S4 object named ``snakemake`` analogous to the Python case above is available and allows access to input and output files and other parameters. Here the syntax follows that of S4 classes with attributes that are R lists, e.g. we can access the first input file with ``snakemake@input[[1]]`` (note that the first file does not have index ``0`` here, because R starts counting from ``1``). Named input and output files can be accessed in the same way, by just providing the name instead of an index, e.g. ``snakemake@input[["myfile"]]``. + do_something(snakemake.input[0], snakemake.output[0], snakemake.threads, snakemake.config["myparam"]) +You can use the Python debugger from within the script if you invoke Snakemake with ``--debug``. -Alternatively, it is possible to integrate Julia_ scripts, e.g. +R and R Markdown +~~~~~~~~~~~~~~~~ -.. _Julia: https://julialang.org +Apart from Python scripts, this mechanism also allows you to integrate R_ and R Markdown_ scripts with Snakemake, e.g. + +.. _R: https://www.r-project.org +.. _Markdown: https://rmarkdown.rstudio.com .. code-block:: python @@ -635,23 +633,11 @@ Alternatively, it is possible to integrate Julia_ scripts, e.g. "path/to/outputfile", "path/to/another/outputfile" script: - "path/to/script.jl" - -In the Julia_ script, a ``snakemake`` object is available, which can be accessed similar to the Python case (see above), with the only difference that you have to index from 1 instead of 0. - -For technical reasons, scripts are executed in ``.snakemake/scripts``. The original script directory is available as ``scriptdir`` in the ``snakemake`` object. A convenience method, ``snakemake@source()``, acts as a wrapper for the normal R ``source()`` function, and can be used to source files relative to the original script directory. - -An example external Python script could look like this: - -.. code-block:: python - - def do_something(data_path, out_path, threads, myparam): - # python code + "scripts/script.R" - do_something(snakemake.input[0], snakemake.output[0], snakemake.threads, snakemake.config["myparam"]) +In the R script, an S4 object named ``snakemake`` analogous to the Python case above is available and allows access to input and output files and other parameters. Here the syntax follows that of S4 classes with attributes that are R lists, e.g. we can access the first input file with ``snakemake@input[[1]]`` (note that the first file does not have index ``0`` here, because R starts counting from ``1``). Named input and output files can be accessed in the same way, by just providing the name instead of an index, e.g. ``snakemake@input[["myfile"]]``. -You can use the Python debugger from within the script if you invoke Snakemake with ``--debug``. -An equivalent script written in R would look like this: +An equivalent script (:ref:`to the Python one above `) written in R would look like this: .. code-block:: r @@ -664,6 +650,7 @@ An equivalent script written in R would look like this: To debug R scripts, you can save the workspace with ``save.image()``, and invoke R after Snakemake has terminated. Then you can use the usual R debugging facilities while having access to the ``snakemake`` variable. It is best practice to wrap the actual code into a separate function. This increases the portability if the code shall be invoked outside of Snakemake or from a different rule. +A convenience method, ``snakemake@source()``, acts as a wrapper for the normal R ``source()`` function, and can be used to source files relative to the original script directory. An R Markdown file can be integrated in the same way as R and Python scripts, but only a single output (html) file can be used: @@ -713,6 +700,139 @@ In the R Markdown file you can insert output from a R command, and access variab A link to the R Markdown document with the snakemake object can be inserted. Therefore a variable called ``rmd`` needs to be added to the ``params`` section in the header of the ``report.Rmd`` file. The generated R Markdown file with snakemake object will be saved in the file specified in this ``rmd`` variable. This file can be embedded into the HTML document using base64 encoding and a link can be inserted as shown in the example above. Also other input and output files can be embedded in this way to make a portable report. Note that the above method with a data URI only works for small files. An experimental technology to embed larger files is using Javascript Blob `object `_. +Julia_ +~~~~~~ + +.. _Julia: https://julialang.org + +.. code-block:: python + + rule NAME: + input: + "path/to/inputfile", + "path/to/other/inputfile" + output: + "path/to/outputfile", + "path/to/another/outputfile" + script: + "path/to/script.jl" + +In the Julia_ script, a ``snakemake`` object is available, which can be accessed similar to the :ref:`Python case `, with the only difference that you have to index from 1 instead of 0. + +Rust_ +~~~~~ + +.. _Rust: https://www.rust-lang.org/ + +.. code-block:: python + + rule NAME: + input: + "path/to/inputfile", + "path/to/other/inputfile", + named_input="path/to/named/inputfile", + output: + "path/to/outputfile", + "path/to/another/outputfile" + params: + seed=4 + log: + stdout="path/to/stdout.log", + stderr="path/to/stderr.log", + script: + "path/to/script.rs" + +The ability to execute Rust scripts is facilitated by |rust-script|_. As such, the +script must be a valid ``rust-script`` script and ``rust-script`` must be available in the +environment the rule is run in. + +Some example scripts can be found in the +`tests directory `_. + +In the Rust script, a ``snakemake`` instance is available, which is automatically generated from the python snakemake object using |json_typegen|_. +It usually looks like this: + +.. code-block:: rust + + pub struct Snakemake { + input: Input, + output: Ouput, + params: Params, + wildcards: Wildcards, + threads: u64, + log: Log, + resources: Resources, + config: Config, + rulename: String, + bench_iteration: Option, + scriptdir: String, + } + +Any named parameter is translated to a corresponding ``field_name: Type``, such that ``params.seed`` from the example above can be accessed just like in python, i.e.: + +.. code-block:: rust + + let seed = snakemake.params.seed; + assert_eq!(seed, 4); + +Positional arguments for ``input``, ``output``, ``log`` and ``wildcards`` can be accessed by index and iterated over: + +.. code-block:: rust + + let input = &snakemake.input; + + // Input implements Index + let inputfile = input[0]; + assert_eq!(inputfile, "path/to/inputfile"); + + // Input implements IntoIterator + // + // prints + // > 'path/to/inputfile' + // > 'path/to/other/inputfile' + for f in input { + println!("> '{}'", &f); + } + + +It is also possible to redirect ``stdout`` and ``stderr``: + +.. code-block:: rust + + println!("This will NOT be written to path/to/stdout.log"); + // redirect stdout to "path/to/stdout.log" + let _stdout_redirect = snakemake.redirect_stdout(snakemake.log.stdout)?; + println!("This will be written to path/to/stdout.log"); + + // redirect stderr to "path/to/stderr.log" + let _stderr_redirect = snakemake.redirect_stderr(snakemake.log.stderr)?; + eprintln!("This will be written to path/to/stderr.log"); + drop(_stderr_redirect); + eprintln!("This will NOT be written to path/to/stderr.log"); + +Redirection of stdout/stderr is only "active" as long as the returned ``Redirect`` instance is alive; in order to stop redirecting, drop the respective instance. + +In order to work, rust-script support for snakemake has some dependencies enabled by default: + +#. ``anyhow=1``, for its ``Result`` type +#. ``gag=1``, to enable stdout/stderr redirects +#. ``json_typegen=0.6``, for generating rust structs from a json representation of the snakemake object +#. ``lazy_static=1.4``, to make a ``snakemake`` instance easily accessible +#. ``serde=1``, explicit dependency of ``json_typegen`` +#. ``serde_derive=1``, explicit dependency of ``json_typegen`` +#. ``serde_json=1``, explicit dependency of ``json_typegen`` + +If your script uses any of these packages, you do not need to ``use`` them in your script. Trying to ``use`` them will cause a compilation error. + +.. |rust-script| replace:: ``rust-script`` +.. _rust-script: https://rust-script.org/ +.. |json_typegen| replace:: ``json_typegen`` +.. _json_typegen: https://github.com/evestera/json_typegen + +---- + +For technical reasons, scripts are executed in ``.snakemake/scripts``. The original script directory is available as ``scriptdir`` in the ``snakemake`` object. + .. _snakefiles_notebook-integration: Jupyter notebook integration diff --git a/snakemake/script.py b/snakemake/script.py index 7f9ff1440..99b318796 100644 --- a/snakemake/script.py +++ b/snakemake/script.py @@ -14,6 +14,8 @@ import collections import re from abc import ABC, abstractmethod +from pathlib import Path +from typing import Tuple, Pattern, Union, Optional from urllib.request import urlopen, pathname2url from urllib.error import URLError @@ -31,9 +33,9 @@ from snakemake.io import git_content, split_git_path from snakemake.deployment import singularity - # TODO use this to find the right place for inserting the preamble PY_PREAMBLE_RE = re.compile(r"from( )+__future__( )+import.*?(?P[;\n])") +PathLike = Union[str, Path, os.PathLike] class Snakemake: @@ -102,17 +104,62 @@ def log_fmt_shell(self, stdout=True, stderr=True, append=False): any any any None "" -------- -------- -------- ----- ----------- """ - if not self.log: - return "" - lookup = { - (True, True, True): " >> {0} 2>&1", - (True, False, True): " >> {0}", - (False, True, True): " 2>> {0}", - (True, True, False): " > {0} 2>&1", - (True, False, False): " > {0}", - (False, True, False): " 2> {0}", - } - return lookup[(stdout, stderr, append)].format(self.log) + return _log_shell_redirect(self.log, stdout, stderr, append) + + +def _log_shell_redirect( + log: Optional[PathLike], + stdout: bool = True, + stderr: bool = True, + append: bool = False, +) -> str: + """ + Return a shell redirection string to be used in `shell()` calls + + This function allows scripts and wrappers support optional `log` files + specified in the calling rule. If no `log` was specified, then an + empty string "" is returned, regardless of the values of `stdout`, + `stderr`, and `append`. + + Parameters + --------- + + stdout : bool + Send stdout to log + + stderr : bool + Send stderr to log + + append : bool + Do not overwrite the log file. Useful for sending output of + multiple commands to the same log. Note however that the log will + not be truncated at the start. + + The following table describes the output: + + -------- -------- -------- ----- ------------- + stdout stderr append log return value + -------- -------- -------- ----- ------------ + True True True fn >> fn 2>&1 + True False True fn >> fn + False True True fn 2>> fn + True True False fn > fn 2>&1 + True False False fn > fn + False True False fn 2> fn + any any any None "" + -------- -------- -------- ----- ----------- + """ + if not log: + return "" + lookup = { + (True, True, True): " >> {0} 2>&1", + (True, False, True): " >> {0}", + (False, True, True): " 2>> {0}", + (True, True, False): " > {0} 2>&1", + (True, False, False): " > {0}", + (False, True, False): " 2> {0}", + } + return lookup[(stdout, stderr, append)].format(str(log)) class REncoder: @@ -823,6 +870,371 @@ def execute_script(self, fname, edit=False): self._execute_cmd("julia {fname:q}", fname=fname) +class RustScript(ScriptBase): + @staticmethod + def generate_preamble( + path, + source, + basedir, + input_, + output, + params, + wildcards, + threads, + resources, + log, + config, + rulename, + conda_env, + container_img, + singularity_args, + env_modules, + bench_record, + jobid, + bench_iteration, + cleanup_scripts, + shadow_dir, + preamble_addendum="", + ): + wrapper_path = path[7:] if path.startswith("file://") else path + + # snakemake's namedlists will be encoded as a dict + # which stores the not-named items at the key "positional" + # and unpacks named items into the dict + def encode_namedlist(values): + values = list(values) + if len(values) == 0: + return dict(positional=[]) + positional = [val for key, val in values if not key] + return dict( + positional=positional, **{key: val for key, val in values if key} + ) + + snakemake = dict( + input=encode_namedlist(input_._plainstrings()._allitems()), + output=encode_namedlist(output._plainstrings()._allitems()), + params=encode_namedlist(params.items()), + wildcards=encode_namedlist(wildcards.items()), + threads=threads, + resources=encode_namedlist( + { + name: value + for (name, value) in resources.items() + if name != "_cores" and name != "_nodes" + }.items() + ), + log=encode_namedlist(log._plainstrings()._allitems()), + config=encode_namedlist(config.items()), + rulename=rulename, + bench_iteration=bench_iteration, + scriptdir=os.path.dirname(wrapper_path), + ) + + import json + + json_string = json.dumps(dict(snakemake)) + + # Obtain search path for current snakemake module. + # We append it at the end (as a fallback). + searchpath = SNAKEMAKE_SEARCHPATH + if container_img is not None: + searchpath = singularity.SNAKEMAKE_MOUNTPOINT + searchpath = repr(searchpath) + # For local scripts, add their location to the path in case they use path-based imports + if path.startswith("file://"): + searchpath += ", " + repr(os.path.dirname(path[7:])) + + return textwrap.dedent( + """ + json_typegen::json_typegen!("Snakemake", r###"{json_string}"###, {{ + "/bench_iteration": {{ + "use_type": "Option" + }}, + "/input/positional": {{ + "use_type": "Vec" + }}, + "/output/positional": {{ + "use_type": "Vec" + }}, + "/log/positional": {{ + "use_type": "Vec" + }}, + "/wildcards/positional": {{ + "use_type": "Vec" + }}, + }}); + + pub struct Iter<'a, T>(std::slice::Iter<'a, T>); + impl<'a, T> Iterator for Iter<'a, T> {{ + type Item = &'a T; + + fn next(&mut self) -> Option {{ + self.0.next() + }} + }} + macro_rules! impl_iter {{ + ($($s:ty),+) => {{ + $( + impl IntoIterator for $s {{ + type Item = String; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter {{ + self.positional.into_iter() + }} + }} + + impl<'a> IntoIterator for &'a $s {{ + type Item = &'a String; + type IntoIter = Iter<'a, String>; + + fn into_iter(self) -> Self::IntoIter {{ + Iter(self.positional.as_slice().into_iter()) + }} + }} + )+ + }}; + }} + + macro_rules! impl_index {{ + ($($s:ty),+) => {{ + $( + impl std::ops::Index for $s {{ + type Output = String; + + fn index(&self, index: usize) -> &Self::Output {{ + &self.positional[index] + }} + }} + )+ + }} + }} + + + impl_iter!(Input, Output, Wildcards, Log); + impl_index!(Input, Output, Wildcards, Log); + + impl Snakemake {{ + #[allow(dead_code)] + fn redirect_stderr>( + &self, + path: P, + ) -> anyhow::Result> {{ + let log = std::fs::OpenOptions::new() + .truncate(true) + .read(true) + .create(true) + .write(true) + .open(path)?; + Ok(gag::Redirect::stderr(log)?) + }} + + #[allow(dead_code)] + fn redirect_stdout>( + &self, + path: P, + ) -> anyhow::Result> {{ + let log = std::fs::OpenOptions::new() + .truncate(true) + .read(true) + .create(true) + .write(true) + .open(path)?; + Ok(gag::Redirect::stdout(log)?) + }} + + fn setup_path(&self) -> anyhow::Result<()> {{ + use std::env; + if let Some(path) = env::var_os("PATH") {{ + let mut paths = env::split_paths(&path).collect::>(); + paths.push(std::path::PathBuf::from("{searchpath}")); + let new_path = env::join_paths(paths)?; + env::set_var("PATH", &new_path); + }} + Ok(()) + }} + }} + + lazy_static::lazy_static! {{ + // https://github.com/rust-lang-nursery/lazy-static.rs/issues/153 + #[allow(non_upper_case_globals)] + static ref snakemake: Snakemake = {{ + let s: Snakemake = serde_json::from_str(r###"{json_string}"###).expect("Failed parsing snakemake JSON"); + s.setup_path().expect("Failed setting PATH"); + s + }}; + }} + // TODO include addendum, if any {{preamble_addendum}} + """ + ).format( + searchpath=searchpath, + json_string=json_string, + preamble_addendum=preamble_addendum, + ) + + def get_preamble(self): + wrapper_path = self.path[7:] if self.path.startswith("file://") else self.path + # preamble_addendum = ( + # "__real_file__ = __file__; __file__ = {file_override};".format( + # file_override=repr(os.path.realpath(wrapper_path)) + # ) + # ) + preamble_addendum = "" + + preamble = RustScript.generate_preamble( + self.path, + self.source, + self.basedir, + self.input, + self.output, + self.params, + self.wildcards, + self.threads, + self.resources, + self.log, + self.config, + self.rulename, + self.conda_env, + self.container_img, + self.singularity_args, + self.env_modules, + self.bench_record, + self.jobid, + self.bench_iteration, + self.cleanup_scripts, + self.shadow_dir, + preamble_addendum=preamble_addendum, + ) + return preamble + + def write_script(self, preamble, fd): + content = self.combine_preamble_and_source(preamble) + fd.write(content.encode()) + + def execute_script(self, fname, edit=False): + deps = self.default_dependencies() + ftrs = self.default_features() + self._execute_cmd( + "rust-script -d {deps} --features {ftrs} {fname:q} ", + fname=fname, + deps=deps, + ftrs=ftrs, + ) + + def combine_preamble_and_source(self, preamble: str) -> str: + """The manifest info needs to be moved to before the preamble. + Also, because rust-scipt relies on inner docs, there can't be an empty line + between the manifest and preamble. + """ + manifest, src = RustScript.extract_manifest(self.source.decode()) + return manifest + preamble.lstrip("\r\n") + src + + @staticmethod + def default_dependencies() -> str: + return " -d ".join( + [ + "anyhow=1", + "serde_json=1", + "serde=1", + "serde_derive=1", + "lazy_static=1.4", + "json_typegen=0.6", + "gag=1", + ] + ) + + @staticmethod + def default_features() -> str: + return ",".join(["serde/derive"]) + + @staticmethod + def extract_manifest(source: str) -> Tuple[str, str]: + # we have no need for the shebang for now given the way we run the script + _, src = RustScript._strip_shebang(source) + manifest, src = RustScript._strip_manifest(src) + + return manifest, src + + @staticmethod + def _strip_shebang(src: str) -> Tuple[str, str]: + """From https://github.com/fornwall/rust-script/blob/ce508bad02a11d574657d2f1debf7e73fca2bf6e/src/manifest.rs#L312-L320""" + rgx = re.compile(r"^#![^\[].*?(\r\n|\n)") + return strip_re(rgx, src) + + @staticmethod + def _strip_manifest(src: str) -> Tuple[str, str]: + """From https://github.com/fornwall/rust-script/blob/ce508bad02a11d574657d2f1debf7e73fca2bf6e/src/manifest.rs#L405-L411""" + manifest, remainder = RustScript._strip_single_line_manifest(src) + if not manifest: + manifest, remainder = RustScript._strip_code_block_manifest(src) + return manifest, remainder + + @staticmethod + def _strip_single_line_manifest(src: str) -> Tuple[str, str]: + """From https://github.com/fornwall/rust-script/blob/ce508bad02a11d574657d2f1debf7e73fca2bf6e/src/manifest.rs#L618-L632""" + rgx = re.compile(r"^\s*//\s*cargo-deps\s*:(.*?)(\r\n|\n)", flags=re.IGNORECASE) + return strip_re(rgx, src) + + @staticmethod + def _strip_code_block_manifest(src: str) -> Tuple[str, str]: + """From https://github.com/fornwall/rust-script/blob/ce508bad02a11d574657d2f1debf7e73fca2bf6e/src/manifest.rs#L634-L664 + We need to find the first `/*!` or `//!` that *isn't* preceeded by something + that would make it apply to anything other than the crate itself. Because we + can't do this accurately, we'll just require that the doc comment is the + *first* thing in the file (after the optional shebang, which should already + have been stripped). + """ + crate_comment_re = re.compile( + r"^\s*(/\*!|//([!/]))(.*?)(\r\n|\n)", flags=re.MULTILINE + ) + # does src start with a crate comment? + match = crate_comment_re.match(src) + if not match: + return "", src + end_of_comment = match.end() + # find end of crate comment + while match is not None: + end_of_comment = match.end() + match = crate_comment_re.match(src, pos=end_of_comment) + + crate_comment = src[:end_of_comment] + found_code_block_open = False + code_block_open_re = re.compile(r"```\s*cargo") + found_code_block_close = False + code_block_close_re = re.compile(r"```") + for line in crate_comment.splitlines(): + if not found_code_block_open: + m = code_block_open_re.search(line) + if m: + found_code_block_open = True + else: + m = code_block_close_re.search(line) + if m: + found_code_block_close = True + break + + crate_comment_has_manifest = found_code_block_open and found_code_block_close + if crate_comment_has_manifest: + return crate_comment, src[end_of_comment:] + else: + return "", src + + +def strip_re(regex: Pattern, s: str) -> Tuple[str, str]: + """Strip a substring matching a regex from a string and return the stripped part + and the remainder of the original string. + Returns an empty string and the original string if the regex is not found + """ + rgx = re.compile(regex) + match = rgx.search(s) + if match: + head, tail = s[: match.end()], s[match.end() :] + else: + head, tail = "", s + + return head, tail + + def get_source(path, basedir=".", wildcards=None, params=None): source = None if not path.startswith("http") and not path.startswith("git+file"): @@ -869,6 +1281,8 @@ def get_language(path, source): language = "rmarkdown" elif path.endswith(".jl"): language = "julia" + elif path.endswith(".rs"): + language = "rust" # detect kernel language for Jupyter Notebooks if language == "jupyter": @@ -919,6 +1333,7 @@ def script( "r": RScript, "rmarkdown": RMarkdown, "julia": JuliaScript, + "rust": RustScript, }.get(language, None) if exec_class is None: raise ValueError( diff --git a/tests/conftest.py b/tests/conftest.py index 8e299512d..9e23d4eab 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -30,4 +30,3 @@ def prepend_usable_bash_to_path(monkeypatch): @pytest.fixture(autouse=True) def reset_shell_exec_on_windows(prepend_usable_bash_to_path): shell.executable(None) - diff --git a/tests/test_expand.py b/tests/test_expand.py index 8094a38ba..daba72ea4 100644 --- a/tests/test_expand.py +++ b/tests/test_expand.py @@ -56,12 +56,22 @@ def test_allow_missing(): a="1 2".split(), b="3 4".split(), allow_missing=True, - ) == ["1_3_{C}.ab", "1_4_{C}.ab", "2_3_{C}.ab", "2_4_{C}.ab", "3_{c}.b", "4_{c}.b"] + ) == [ + "1_3_{C}.ab", + "1_4_{C}.ab", + "2_3_{C}.ab", + "2_4_{C}.ab", + "3_{c}.b", + "4_{c}.b", + ] # replace product - assert expand( - ["{a}_{b}_{C}.ab", "{b}_{c}.b"], - zip, - a="1 2".split(), - b="3 4".split(), - allow_missing=True, - ) == ["1_3_{C}.ab", "2_4_{C}.ab", "3_{c}.b", "4_{c}.b"] + assert ( + expand( + ["{a}_{b}_{C}.ab", "{b}_{c}.b"], + zip, + a="1 2".split(), + b="3 4".split(), + allow_missing=True, + ) + == ["1_3_{C}.ab", "2_4_{C}.ab", "3_{c}.b", "4_{c}.b"] + ) diff --git a/tests/test_script.py b/tests/test_script.py new file mode 100644 index 000000000..93103f1aa --- /dev/null +++ b/tests/test_script.py @@ -0,0 +1,558 @@ +from textwrap import dedent + +from snakemake.script import RustScript + + +class TestRustScriptExtractManifest: + def test_single_line_manifest_with_shebang_and_second_manifest(self): + source = dedent( + """#!/usr/bin/env rust-script +// cargo-deps: time="0.1.25", serde="*" +// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = '// cargo-deps: time="0.1.25", serde="*"\n' + + assert manifest == expected_manifest + + expected_remaining_src = dedent( + """// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + assert remaining_src == expected_remaining_src + + def test_single_line_manifest_not_at_start_with_shebang(self): + source = dedent( + """#!/usr/bin/env rust-script +// this is where cargo-deps should be +// cargo-deps: time="0.1.25", serde="*" +// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = "" + + assert manifest == expected_manifest + + expected_remaining_src = dedent( + """// this is where cargo-deps should be +// cargo-deps: time="0.1.25", serde="*" +// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + assert remaining_src == expected_remaining_src + + def test_single_line_manifest_not_at_start_without_shebang(self): + source = dedent( + """// this is where cargo-deps should be +// cargo-deps: time="0.1.25", serde="*" +// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = "" + + assert manifest == expected_manifest + + expected_remaining_src = dedent( + """// this is where cargo-deps should be +// cargo-deps: time="0.1.25", serde="*" +// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + assert remaining_src == expected_remaining_src + + def test_single_line_manifest_with_empty_line_without_shebang(self): + source = dedent( + """ +// cargo-deps: time="0.1.25", serde="*" +// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = '\n// cargo-deps: time="0.1.25", serde="*"\n' + + assert manifest == expected_manifest + + expected_remaining_src = dedent( + """// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + assert remaining_src == expected_remaining_src + + def test_single_line_manifest_is_case_insensitive(self): + source = dedent( + """ +// Cargo-deps: time="0.1.25", serde="*" +// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = '\n// Cargo-deps: time="0.1.25", serde="*"\n' + + assert manifest == expected_manifest + + expected_remaining_src = dedent( + """// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + assert remaining_src == expected_remaining_src + + def test_single_line_manifest_spacing_has_no_impact(self): + source = dedent( + """ +// cargo-deps : time="0.1.25", serde="*" +// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = '\n// cargo-deps : time="0.1.25", serde="*"\n' + + assert manifest == expected_manifest + + expected_remaining_src = dedent( + """// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + assert remaining_src == expected_remaining_src + + def test_single_line_manifest_formatting_not_touched_even_if_wrong(self): + """The dependency delimiter is wrong, but we let rust-script deal with it""" + source = dedent( + """ +// cargo-deps: time="0.1.25"; serde="*" +// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = '\n// cargo-deps: time="0.1.25"; serde="*"\n' + + assert manifest == expected_manifest + + expected_remaining_src = dedent( + """// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + assert remaining_src == expected_remaining_src + + def test_single_line_manifest_spelt_wrong(self): + source = dedent( + """ +// cargo-dependencies: time="0.1.25"; serde="*" +// You can also leave off the version number, in which case, it's assumed +// to be "*". Also, the `cargo-deps` comment *must* be a single-line +// comment, and it *must* be the first thing in the file, after the +// shebang. +// This second dependency line should be ignored +// cargo-deps: time="0.1.25", libc="0.2.5" +fn main() { + println!("{}", time::now().rfc822z()); +} +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = "" + + assert manifest == expected_manifest + + expected_remaining_src = source + + assert remaining_src == expected_remaining_src + + def test_code_block_manifest_with_shebang(self): + source = dedent( + """#!/usr/bin/env rust-script +//! This is a regular crate doc comment, but it also contains a partial +//! Cargo manifest. Note the use of a *fenced* code block, and the +//! `cargo` "language". +//! +//! ```cargo +//! [dependencies] +//! time = "0.1.25" +//! ``` +fn main() { + println!("{}", time::now().rfc822z()); +} + +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = dedent( + """//! This is a regular crate doc comment, but it also contains a partial +//! Cargo manifest. Note the use of a *fenced* code block, and the +//! `cargo` "language". +//! +//! ```cargo +//! [dependencies] +//! time = "0.1.25" +//! ``` +""" + ) + + assert manifest == expected_manifest + + expected_remaining_src = dedent( + """fn main() { + println!("{}", time::now().rfc822z()); +} + +""" + ) + + assert remaining_src == expected_remaining_src + + def test_code_block_manifest_without_shebang(self): + source = dedent( + """ +//! This is a regular crate doc comment, but it also contains a partial +//! Cargo manifest. Note the use of a *fenced* code block, and the +//! `cargo` "language". +//! +//! ```cargo +//! [dependencies] +//! time = "0.1.25" +//! ``` +fn main() { + println!("{}", time::now().rfc822z()); +} + +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = dedent( + """\n//! This is a regular crate doc comment, but it also contains a partial +//! Cargo manifest. Note the use of a *fenced* code block, and the +//! `cargo` "language". +//! +//! ```cargo +//! [dependencies] +//! time = "0.1.25" +//! ``` +""" + ) + + assert manifest == expected_manifest + + expected_remaining_src = dedent( + """fn main() { + println!("{}", time::now().rfc822z()); +} + +""" + ) + + assert remaining_src == expected_remaining_src + + def test_code_block_manifest_spacing_around_language(self): + source = dedent( + """ +//! This is a regular crate doc comment, but it also contains a partial +//! Cargo manifest. Note the use of a *fenced* code block, and the +//! `cargo` "language". +//! +//! ``` cargo +//! [dependencies] +//! time = "0.1.25" +//! ``` +fn main() { + println!("{}", time::now().rfc822z()); +} + +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = dedent( + """\n//! This is a regular crate doc comment, but it also contains a partial +//! Cargo manifest. Note the use of a *fenced* code block, and the +//! `cargo` "language". +//! +//! ``` cargo +//! [dependencies] +//! time = "0.1.25" +//! ``` +""" + ) + + assert manifest == expected_manifest + + expected_remaining_src = dedent( + """fn main() { + println!("{}", time::now().rfc822z()); +} + +""" + ) + + assert remaining_src == expected_remaining_src + + def test_code_block_manifest_has_non_cargo_block(self): + source = dedent( + """ +//! This is a regular crate doc comment, but it also contains a partial +//! Cargo manifest. Note the use of a *fenced* code block, and the +//! `cargo` "language". +//! +//! ```rust +//! [dependencies] +//! time = "0.1.25" +//! ``` +fn main() { + println!("{}", time::now().rfc822z()); +} + +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = "" + + assert manifest == expected_manifest + + expected_remaining_src = source + + assert remaining_src == expected_remaining_src + + def test_code_block_manifest_missing_closing_fence(self): + source = dedent( + """ +//! This is a regular crate doc comment, but it also contains a partial +//! Cargo manifest. Note the use of a *fenced* code block, and the +//! `cargo` "language". +//! +//! ```cargo +//! [dependencies] +//! time = "0.1.25" +//! +fn main() { + println!("{}", time::now().rfc822z()); +} + +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = "" + + assert manifest == expected_manifest + + expected_remaining_src = source + + assert remaining_src == expected_remaining_src + + def test_code_block_manifest_not_in_first_comment_block(self): + source = dedent( + """//! crate comment +static FOO: &str = "foo"; +//! This is a regular crate doc comment, but it also contains a partial +//! Cargo manifest. Note the use of a *fenced* code block, and the +//! `cargo` "language". +//! +//! ```cargo +//! [dependencies] +//! time = "0.1.25" +//! ``` +//! +fn main() { + println!("{}", time::now().rfc822z()); +} + +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = "" + + assert manifest == expected_manifest + + expected_remaining_src = source + + assert remaining_src == expected_remaining_src + + def test_code_block_manifest_with_outer_line_doc_comment(self): + source = dedent( + """#!/usr/bin/env rust-script +/// This is a regular crate doc comment, but it also contains a partial +/// Cargo manifest. Note the use of a *fenced* code block, and the +/// `cargo` "language". +/// +/// ```cargo +/// [dependencies] +/// time = "0.1.25" +/// ``` +fn main() { + println!("{}", time::now().rfc822z()); +} + +""" + ) + + manifest, remaining_src = RustScript.extract_manifest(source) + + expected_manifest = dedent( + """/// This is a regular crate doc comment, but it also contains a partial +/// Cargo manifest. Note the use of a *fenced* code block, and the +/// `cargo` "language". +/// +/// ```cargo +/// [dependencies] +/// time = "0.1.25" +/// ``` +""" + ) + + assert manifest == expected_manifest + + expected_remaining_src = dedent( + """fn main() { + println!("{}", time::now().rfc822z()); +} + +""" + ) + + assert remaining_src == expected_remaining_src diff --git a/tests/test_script/Snakefile b/tests/test_script/Snakefile index 1ce5cb17c..8d767f773 100644 --- a/tests/test_script/Snakefile +++ b/tests/test_script/Snakefile @@ -6,7 +6,10 @@ rule all: "test.out", "test.html", "rel_source.out", - "julia.out" + "julia.out", + "rust.out", + "rust-manifest.out", + "rust-outer-line-doc.out", rule: input: @@ -56,3 +59,40 @@ rule: "envs/julia.yaml" script: "scripts/test.jl" + +rule: + input: + "test.in", + "test2.in", + ["test.in", "test2.in"], + named_input="test.in", + output: + "rust.out", + params: + integer=123 + conda: + "envs/rust.yaml" + script: + "scripts/test.rs" + +rule: + output: + "rust-manifest.out", + params: + keep="-" + conda: + "envs/rust.yaml" + log: + "rust-manifest.log" + script: + "scripts/test-manifest.rs" + +rule: + output: + "rust-outer-line-doc.out", + params: + keep="-", + conda: + "envs/rust.yaml" + script: + "scripts/test-outer-line-doc.rs" diff --git a/tests/test_script/envs/rust.yaml b/tests/test_script/envs/rust.yaml new file mode 100644 index 000000000..1cb4c5bb4 --- /dev/null +++ b/tests/test_script/envs/rust.yaml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - rust-script>=0.15.0 diff --git a/tests/test_script/expected-results/rust-manifest.out b/tests/test_script/expected-results/rust-manifest.out new file mode 100644 index 000000000..52ca3764d --- /dev/null +++ b/tests/test_script/expected-results/rust-manifest.out @@ -0,0 +1,2 @@ +chrom1 14 +chrom2 41 diff --git a/tests/test_script/expected-results/rust-outer-line-doc.out b/tests/test_script/expected-results/rust-outer-line-doc.out new file mode 100644 index 000000000..52ca3764d --- /dev/null +++ b/tests/test_script/expected-results/rust-outer-line-doc.out @@ -0,0 +1,2 @@ +chrom1 14 +chrom2 41 diff --git a/tests/test_script/expected-results/rust.out b/tests/test_script/expected-results/rust.out new file mode 100644 index 000000000..1cc480467 --- /dev/null +++ b/tests/test_script/expected-results/rust.out @@ -0,0 +1 @@ +Rust test succeded! diff --git a/tests/test_script/scripts/test-manifest.rs b/tests/test_script/scripts/test-manifest.rs new file mode 100644 index 000000000..c94053488 --- /dev/null +++ b/tests/test_script/scripts/test-manifest.rs @@ -0,0 +1,58 @@ +//! This is a regular crate doc comment, but it also contains a partial +//! Cargo manifest. Note the use of a *fenced* code block, and the +//! `cargo` "language". +//! +//! ```cargo +//! [dependencies] +//! csv = "1.1" +//! serde = { version = "1.0", features = ["derive"] } +//! ``` + + +use std::error::Error; +use std::io::{BufWriter, Write}; +use std::fs::File; +use serde::Deserialize; + + +static BED: &[u8] = b"chrom1 1 15 foo 454 - +chrom1 40 45 bar 2 + +chrom2 4 45 baz 2 - +"; + +#[derive(Debug, Deserialize)] +struct BedRecord { + chrom: String, + start: u64, + end: u64, + name: Option, + score: Option, + strand: Option, +} + +fn main() -> Result<(), Box> { + snakemake.redirect_stderr(&snakemake.log[0])?; + let f_out = File::create(&snakemake.output[0])?; + + let mut ostream = BufWriter::new(f_out); + println!("Loaded"); + + let keep_strand = match &snakemake.params.keep { + s if s.len() == 1 => Some(s.chars().next().unwrap() as char), + _ => None, + }; + + println!("Reading BED file..."); + let mut rdr = csv::ReaderBuilder::new().has_headers(false).delimiter(b'\t').from_reader(BED); + for result in rdr.deserialize() { + // Notice that we need to provide a type hint for automatic + // deserialization. + let record: BedRecord = result?; + let l = record.end - record.start; + if record.strand == keep_strand { + write!(&mut ostream, "{}\t{}\n", record.chrom, l)?; + } + } + println!("Output written to {}", &snakemake.output[0]); + Ok(()) +} diff --git a/tests/test_script/scripts/test-outer-line-doc.rs b/tests/test_script/scripts/test-outer-line-doc.rs new file mode 100644 index 000000000..41bbb9d0e --- /dev/null +++ b/tests/test_script/scripts/test-outer-line-doc.rs @@ -0,0 +1,57 @@ +/// This is a regular outer line doc comment, but it also contains a partial +/// Cargo manifest. Note the use of a *fenced* code block, and the +/// `cargo` "language". +/// +/// ```cargo +/// [dependencies] +/// csv = "1.1" +/// serde = { version = "1.0", features = ["derive"] } +/// ``` + + +use std::error::Error; +use std::io::{BufWriter, Write}; +use std::fs::File; +use serde::Deserialize; + + +static BED: &[u8] = b"chrom1 1 15 foo 454 - +chrom1 40 45 bar 2 + +chrom2 4 45 baz 2 - +"; + +#[derive(Debug, Deserialize)] +struct BedRecord { + chrom: String, + start: u64, + end: u64, + name: Option, + score: Option, + strand: Option, +} + +fn main() -> Result<(), Box> { + let f_out = File::create(&snakemake.output[0])?; + + let mut ostream = BufWriter::new(f_out); + println!("Loaded"); + + let keep_strand = match &snakemake.params.keep { + s if s.len() == 1 => Some(s.chars().next().unwrap() as char), + _ => None, + }; + + println!("Reading BED file..."); + let mut rdr = csv::ReaderBuilder::new().has_headers(false).delimiter(b'\t').from_reader(BED); + for result in rdr.deserialize() { + // Notice that we need to provide a type hint for automatic + // deserialization. + let record: BedRecord = result?; + let l = record.end - record.start; + if record.strand == keep_strand { + write!(&mut ostream, "{}\t{}\n", record.chrom, l)?; + } + } + println!("Output written to {}", &snakemake.output[0]); + Ok(()) +} diff --git a/tests/test_script/scripts/test.rs b/tests/test_script/scripts/test.rs new file mode 100644 index 000000000..040657c28 --- /dev/null +++ b/tests/test_script/scripts/test.rs @@ -0,0 +1,19 @@ +use std::io::Write; +println!("Rust script executing!"); + +assert_eq!(snakemake.config.test, true); +assert_eq!(snakemake.params.integer, 123); +assert_eq!(snakemake.output[0], "rust.out"); +assert_eq!(snakemake.input[0], "test.in"); +assert_eq!(snakemake.input.named_input, "test.in"); +for (idx, val) in (&snakemake.input).into_iter().enumerate() { + dbg!(idx, &val); +} + +let input = &snakemake.input; +for value in input { + dbg!(value); +} + +let mut f = std::fs::File::create(&snakemake.output[0])?; +write!(&mut f, "Rust test succeded!")?; diff --git a/tests/test_script/test2.in b/tests/test_script/test2.in new file mode 100644 index 000000000..b85905ec0 --- /dev/null +++ b/tests/test_script/test2.in @@ -0,0 +1 @@ +1 2 3