From d2793223f914790c07b25363cb9b314ef166cb3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20K=C3=B6ster?= Date: Mon, 29 Nov 2021 10:16:39 +0100 Subject: [PATCH] feat: add flag --draft-notebook for generating a skeleton notebook for manual editing (e.g. in VSCode). (#1284) * working on draft implementation * test case and fixes * update docs --- docs/snakefiles/rules.rst | 13 ++++ snakemake/__init__.py | 18 +++++- snakemake/executors/__init__.py | 2 +- snakemake/jobs.py | 4 ++ snakemake/notebook.py | 61 ++++++++++++++++--- tests/test_jupyter_notebook_draft/Snakefile | 46 ++++++++++++++ tests/test_jupyter_notebook_draft/data.txt | 1 + tests/test_jupyter_notebook_draft/env.yaml | 7 +++ .../expected-results/Notebook.py.ipynb | 38 ++++++++++++ tests/tests.py | 12 ++++ 10 files changed, 191 insertions(+), 11 deletions(-) create mode 100644 tests/test_jupyter_notebook_draft/Snakefile create mode 100644 tests/test_jupyter_notebook_draft/data.txt create mode 100644 tests/test_jupyter_notebook_draft/env.yaml create mode 100644 tests/test_jupyter_notebook_draft/expected-results/Notebook.py.ipynb diff --git a/docs/snakefiles/rules.rst b/docs/snakefiles/rules.rst index 0086a694a..6ee4333e7 100644 --- a/docs/snakefiles/rules.rst +++ b/docs/snakefiles/rules.rst @@ -945,6 +945,19 @@ with The last dependency is advisable in order to enable autoformatting of notebook cells when editing. When using other languages than Python in the notebook, one needs to additionally add the respective kernel, e.g. ``r-irkernel`` for R support. +When using an IDE with built-in Jupyter support, an alternative to ``--edit-notebook`` is ``--draft-notebook``. +Instead of firing up a notebook server, ``--draft-notebook`` just creates a skeleton notebook for editing within the IDE. +In addition, it prints instructions for configuring the IDE's notebook environment to use the interpreter from the +Conda environment defined in the corresponding rule. +For example, running + +.. code-block:: console + + snakemake --cores 1 --edit-notebook test.txt --use-conda + +will generate skeleton code in ``notebooks/hello.py.ipynb`` and additionally print instructions on how to open and execute the notebook in VSCode. + + Protected and Temporary Files ----------------------------- diff --git a/snakemake/__init__.py b/snakemake/__init__.py index 7c08e4d98..4ceb05bab 100644 --- a/snakemake/__init__.py +++ b/snakemake/__init__.py @@ -299,7 +299,7 @@ def snakemake( export_cwl (str): Compile workflow to CWL and save to given file log_handler (function): redirect snakemake output to this custom log handler, a function that takes a log message dictionary (see below) as its only argument (default None). The log message dictionary for the log handler has to following entries: keep_incomplete (bool): keep incomplete output files of failed jobs - edit_notebook (object): "notebook.Listen" object to configuring notebook server for interactive editing of a rule notebook. If None, do not edit. + edit_notebook (object): "notebook.EditMode" object to configuring notebook server for interactive editing of a rule notebook. If None, do not edit. scheduler (str): Select scheduling algorithm (default ilp) scheduler_ilp_solver (str): Set solver for ilp scheduler. overwrite_groups (dict): Rule to group assignments (default None) @@ -1489,6 +1489,13 @@ def get_argument_parser(profile=None): group_notebooks = parser.add_argument_group("NOTEBOOKS") + group_notebooks.add_argument( + "--draft-notebook", + metavar="TARGET", + help="Draft a skeleton notebook for the rule used to generate the given target file. This notebook " + "can then be opened in a jupyter server, exeucted and implemented until ready. After saving, it " + "will automatically be reused in non-interactive mode by Snakemake for subsequent jobs.", + ) group_notebooks.add_argument( "--edit-notebook", metavar="TARGET", @@ -2721,12 +2728,17 @@ def open_browser(): ) log_handler.append(wms_logger.log_handler) - if args.edit_notebook: + if args.draft_notebook: + from snakemake import notebook + + args.target = [args.draft_notebook] + args.edit_notebook = notebook.EditMode(draft_only=True) + elif args.edit_notebook: from snakemake import notebook args.target = [args.edit_notebook] args.force = True - args.edit_notebook = notebook.Listen(args.notebook_listen) + args.edit_notebook = notebook.EditMode(args.notebook_listen) aggregated_wait_for_files = args.wait_for_files if args.wait_for_files_file is not None: diff --git a/snakemake/executors/__init__.py b/snakemake/executors/__init__.py index 0f42fba3d..7674b9d5c 100644 --- a/snakemake/executors/__init__.py +++ b/snakemake/executors/__init__.py @@ -512,7 +512,7 @@ def job_args_and_prepare(self, job): self.workflow.cleanup_scripts, job.shadow_dir, job.jobid, - self.workflow.edit_notebook, + self.workflow.edit_notebook if self.dag.is_edit_notebook_job(job) else None, self.workflow.conda_base_path, job.rule.basedir, self.workflow.sourcecache.runtime_cache_path, diff --git a/snakemake/jobs.py b/snakemake/jobs.py index 3840dff5e..ae4558bd4 100644 --- a/snakemake/jobs.py +++ b/snakemake/jobs.py @@ -1004,6 +1004,10 @@ def postprocess( latency_wait=None, keep_metadata=True, ): + if self.dag.is_edit_notebook_job(self): + # No postprocessing necessary, we have just created the skeleton notebook and + # execution will anyway stop afterwards. + return if assume_shared_fs: if not error and handle_touch: self.dag.handle_touch(self) diff --git a/snakemake/notebook.py b/snakemake/notebook.py index fa7768db9..f946fe0d2 100644 --- a/snakemake/notebook.py +++ b/snakemake/notebook.py @@ -1,4 +1,6 @@ +from abc import abstractmethod import os, sys +from pathlib import Path from urllib.error import URLError import tempfile import re @@ -10,15 +12,17 @@ from snakemake.logging import logger from snakemake.common import is_local_file from snakemake.common import ON_WINDOWS -from snakemake.sourcecache import SourceCache +from snakemake.sourcecache import SourceCache, infer_source_file KERNEL_STARTED_RE = re.compile(r"Kernel started: (?P\S+)") KERNEL_SHUTDOWN_RE = re.compile(r"Kernel shutdown: (?P\S+)") -class Listen: - def __init__(self, arg): - self.ip, self.port = arg.split(":") +class EditMode: + def __init__(self, server_addr=None, draft_only=False): + if server_addr is not None: + self.ip, self.port = server_addr.split(":") + self.draft_only = draft_only def get_cell_sources(source): @@ -33,7 +37,7 @@ class JupyterNotebook(ScriptBase): editable = True - def draft(self, listen): + def draft(self): import nbformat preamble = self.get_preamble() @@ -41,12 +45,16 @@ def draft(self, listen): self.insert_preamble_cell(preamble, nb) nb["cells"].append(nbformat.v4.new_code_cell("# start coding here")) + nb["metadata"] = {"language_info": {"name": self.get_language_name()}} os.makedirs(os.path.dirname(self.local_path), exist_ok=True) with open(self.local_path, "wb") as out: out.write(nbformat.writes(nb).encode()) + def draft_and_edit(self, listen): + self.draft() + self.source = open(self.local_path).read() self.evaluate(edit=listen) @@ -73,6 +81,7 @@ def execute_script(self, fname, edit=None): with tempfile.TemporaryDirectory() as tmp: if edit is not None: + assert not edit.draft_only logger.info("Opening notebook for editing.") cmd = ( "jupyter notebook --browser ':' --no-browser --log-level ERROR --ip {edit.ip} --port {edit.port} " @@ -132,6 +141,14 @@ def remove_preamble_cell(self, notebook): # remove old preamble del notebook["cells"][preamble] + @abstractmethod + def get_language_name(self): + ... + + @abstractmethod + def get_interpreter_exec(self): + ... + class PythonJupyterNotebook(JupyterNotebook): def get_preamble(self): @@ -163,6 +180,12 @@ def get_preamble(self): preamble_addendum=preamble_addendum, ) + def get_language_name(self): + return "python" + + def get_interpreter_exec(self): + return "python" + class RJupyterNotebook(JupyterNotebook): def get_preamble(self): @@ -193,6 +216,12 @@ def get_preamble(self): preamble_addendum=preamble_addendum, ) + def get_language_name(self): + return "r" + + def get_interpreter_exec(self): + return "RScript" + def get_exec_class(language): exec_class = { @@ -266,6 +295,7 @@ def notebook( else: source = None is_local = True + path = infer_source_file(path) exec_class = get_exec_class(language) @@ -295,7 +325,24 @@ def notebook( is_local, ) - if draft: - executor.draft(listen=edit) + if edit is None: + executor.evaluate(edit=edit) + elif edit.draft_only: + executor.draft() + msg = "Generated skeleton notebook:\n{} ".format(path) + if conda_env and not container_img: + msg += ( + "\n\nEditing with VSCode:\nOpen notebook, run command 'Select notebook kernel' (Ctrl+Shift+P or Cmd+Shift+P), and choose:" + "\n{}\n".format( + str(Path(conda_env) / "bin" / executor.get_interpreter_exec()) + ) + ) + msg += ( + "\nEditing with Jupyter CLI:" + "\nconda activate {}\njupyter notebook {}\n".format(conda_env, path) + ) + logger.info(msg) + elif draft: + executor.draft_and_edit(listen=edit) else: executor.evaluate(edit=edit) diff --git a/tests/test_jupyter_notebook_draft/Snakefile b/tests/test_jupyter_notebook_draft/Snakefile new file mode 100644 index 000000000..0b7526f01 --- /dev/null +++ b/tests/test_jupyter_notebook_draft/Snakefile @@ -0,0 +1,46 @@ +shell.executable("bash") + +rule all: + input: + 'result_final.txt', + 'book.result_final.txt', + +rule foo: + output: + fname = 'data.txt' + run: + with open(output.fname, 'w') as fd: + fd.write('result of serious computation') + +rule bar: + input: + infile = 'data.txt' + output: + outfile = 'result_intermediate.txt' + conda: + 'env.yaml' + notebook: + 'Notebook.py.ipynb' + +rule baz: + input: + infile = 'result_intermediate.txt' + output: + outfile = 'result_final.txt' + log: + notebook = 'Notebook_Processed.ipynb' + conda: + 'env.yaml' + notebook: + 'Notebook.py.ipynb' + + +rule wild: + input: + infile = 'result_intermediate.txt' + output: + outfile = '{what}.result_final.txt' + conda: + 'env.yaml' + notebook: + 'Note{wildcards.what}.py.ipynb' diff --git a/tests/test_jupyter_notebook_draft/data.txt b/tests/test_jupyter_notebook_draft/data.txt new file mode 100644 index 000000000..6b8b69cfd --- /dev/null +++ b/tests/test_jupyter_notebook_draft/data.txt @@ -0,0 +1 @@ +result of serious computation \ No newline at end of file diff --git a/tests/test_jupyter_notebook_draft/env.yaml b/tests/test_jupyter_notebook_draft/env.yaml new file mode 100644 index 000000000..0d031e45e --- /dev/null +++ b/tests/test_jupyter_notebook_draft/env.yaml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - python >=3.5 + - jupyter + - ipykernel diff --git a/tests/test_jupyter_notebook_draft/expected-results/Notebook.py.ipynb b/tests/test_jupyter_notebook_draft/expected-results/Notebook.py.ipynb new file mode 100644 index 000000000..18bf48fbb --- /dev/null +++ b/tests/test_jupyter_notebook_draft/expected-results/Notebook.py.ipynb @@ -0,0 +1,38 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "f8c5fca4", + "metadata": { + "tags": [ + "snakemake-job-properties" + ] + }, + "outputs": [], + "source": [ + "\n", + "######## snakemake preamble start (automatically inserted, do not edit) ########\n", + "import sys; sys.path.extend(['/Users/johannes/scms/snakemake', '/Users/johannes/scms/snakemake/tests/test_jupyter_notebook_draft']); import pickle; snakemake = pickle.loads(b'\\x80\\x03csnakemake.script\\nSnakemake\\nq\\x00)\\x81q\\x01}q\\x02(X\\x05\\x00\\x00\\x00inputq\\x03csnakemake.io\\nInputFiles\\nq\\x04)\\x81q\\x05X\\x08\\x00\\x00\\x00data.txtq\\x06a}q\\x07(X\\x06\\x00\\x00\\x00_namesq\\x08}q\\tX\\x06\\x00\\x00\\x00infileq\\nK\\x00N\\x86q\\x0bsX\\x12\\x00\\x00\\x00_allowed_overridesq\\x0c]q\\r(X\\x05\\x00\\x00\\x00indexq\\x0eX\\x04\\x00\\x00\\x00sortq\\x0feh\\x0ecfunctools\\npartial\\nq\\x10cbuiltins\\ngetattr\\nq\\x11csnakemake.io\\nNamedlist\\nq\\x12X\\x0f\\x00\\x00\\x00_used_attributeq\\x13\\x86q\\x14Rq\\x15\\x85q\\x16Rq\\x17(h\\x15)}q\\x18X\\x05\\x00\\x00\\x00_nameq\\x19h\\x0esNtq\\x1abh\\x0fh\\x10h\\x15\\x85q\\x1bRq\\x1c(h\\x15)}q\\x1dh\\x19h\\x0fsNtq\\x1ebh\\nh\\x06ubX\\x06\\x00\\x00\\x00outputq\\x1fcsnakemake.io\\nOutputFiles\\nq )\\x81q!X\\x17\\x00\\x00\\x00result_intermediate.txtq\"a}q#(h\\x08}q$X\\x07\\x00\\x00\\x00outfileq%K\\x00N\\x86q&sh\\x0c]q\\'(h\\x0eh\\x0feh\\x0eh\\x10h\\x15\\x85q(Rq)(h\\x15)}q*h\\x19h\\x0esNtq+bh\\x0fh\\x10h\\x15\\x85q,Rq-(h\\x15)}q.h\\x19h\\x0fsNtq/bh%h\"ubX\\x06\\x00\\x00\\x00paramsq0csnakemake.io\\nParams\\nq1)\\x81q2}q3(h\\x08}q4h\\x0c]q5(h\\x0eh\\x0feh\\x0eh\\x10h\\x15\\x85q6Rq7(h\\x15)}q8h\\x19h\\x0esNtq9bh\\x0fh\\x10h\\x15\\x85q:Rq;(h\\x15)}qcsnakemake.io\\nWildcards\\nq?)\\x81q@}qA(h\\x08}qBh\\x0c]qC(h\\x0eh\\x0feh\\x0eh\\x10h\\x15\\x85qDRqE(h\\x15)}qFh\\x19h\\x0esNtqGbh\\x0fh\\x10h\\x15\\x85qHRqI(h\\x15)}qJh\\x19h\\x0fsNtqKbubX\\x07\\x00\\x00\\x00threadsqLK\\x01X\\t\\x00\\x00\\x00resourcesqMcsnakemake.io\\nResources\\nqN)\\x81qO(K\\x01K\\x01X0\\x00\\x00\\x00/var/folders/l0/9bhq7fc12lgfknlx5gyxckv00000gp/TqPe}qQ(h\\x08}qR(X\\x06\\x00\\x00\\x00_coresqSK\\x00N\\x86qTX\\x06\\x00\\x00\\x00_nodesqUK\\x01N\\x86qVX\\x06\\x00\\x00\\x00tmpdirqWK\\x02N\\x86qXuh\\x0c]qY(h\\x0eh\\x0feh\\x0eh\\x10h\\x15\\x85qZRq[(h\\x15)}q\\\\h\\x19h\\x0esNtq]bh\\x0fh\\x10h\\x15\\x85q^Rq_(h\\x15)}q`h\\x19h\\x0fsNtqabhSK\\x01hUK\\x01hWhPubX\\x03\\x00\\x00\\x00logqbcsnakemake.io\\nLog\\nqc)\\x81qd}qe(h\\x08}qfh\\x0c]qg(h\\x0eh\\x0feh\\x0eh\\x10h\\x15\\x85qhRqi(h\\x15)}qjh\\x19h\\x0esNtqkbh\\x0fh\\x10h\\x15\\x85qlRqm(h\\x15)}qnh\\x19h\\x0fsNtqobubX\\x06\\x00\\x00\\x00configqp}qqX\\x04\\x00\\x00\\x00ruleqrX\\x03\\x00\\x00\\x00barqsX\\x0f\\x00\\x00\\x00bench_iterationqtNX\\t\\x00\\x00\\x00scriptdirquX@\\x00\\x00\\x00/Users/johannes/scms/snakemake/tests/test_jupyter_notebook_draftqvub.'); from snakemake.logging import logger; logger.printshellcmds = False; import os; os.chdir(r'/Users/johannes/scms/snakemake/tests/test_jupyter_notebook_draft');\n", + "######## snakemake preamble end #########\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4df7dcd5", + "metadata": {}, + "outputs": [], + "source": [ + "# start coding here" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/tests/tests.py b/tests/tests.py index 78a5fcd72..abad5dc63 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1172,6 +1172,18 @@ def test_jupyter_notebook(): run(dpath("test_jupyter_notebook"), use_conda=True) +def test_jupyter_notebook_draft(): + from snakemake.notebook import EditMode + + run( + dpath("test_jupyter_notebook_draft"), + use_conda=True, + edit_notebook=EditMode(draft_only=True), + targets=["result_intermediate.txt"], + check_md5=False, + ) + + def test_github_issue456(): run(dpath("test_github_issue456"))