Skip to content

Commit

Permalink
refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
choldgraf committed Mar 12, 2020
1 parent efe3b1a commit a28af99
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 103 deletions.
2 changes: 1 addition & 1 deletion nbclean/__init__.py
Expand Up @@ -3,4 +3,4 @@
__version__ = "0.3.2"

from .clean import NotebookCleaner
from .run import run_notebook_directory, run_notebook
from .run import run_notebooks, run_notebook
12 changes: 11 additions & 1 deletion nbclean/clean.py
Expand Up @@ -4,7 +4,7 @@
from nbgrader.preprocessors import ClearSolutions
from glob import glob
from .preprocessors import RemoveCells, ClearCells, ConvertCells
from .utils import _check_nb_file
from .utils import _check_nb_file, _find_notebooks


class NotebookCleaner(object):
Expand Down Expand Up @@ -167,3 +167,13 @@ def save(self, path_save):
os.makedirs(dir_save)
nbf.write(self.ntbk, path_save)


def clear_notebooks(path, kind, skip=".ipynb_checkpoints", **kwargs):
notebooks = _find_notebooks(path, skip)
if len(notebooks) == 0:
print("Note: no notebooks were found to be cleared.")
for path in notebooks:
print(f"Clearing {kind} in {path}")
cleaner = NotebookCleaner(str(path))
cleaner.clear(kind, **kwargs)
cleaner.save(str(path))
41 changes: 2 additions & 39 deletions nbclean/cli.py
@@ -1,9 +1,8 @@
import click
import nbformat as nbf
from glob import glob
from nbclient import execute
from .clean import NotebookCleaner
from .run import run_notebook
from .clean import clear_notebooks
from .run import run_notebooks

@click.group()
def main():
Expand All @@ -17,44 +16,8 @@ def main():
def clear(path, kind, skip, **kwargs):
clear_notebooks(path, kind, skip=skip, **kwargs)

def _find_notebooks(path, skip):
from pathlib import Path
path = Path(path)
if not path.exists():
raise ValueError(f"You gave a path that doesn't exist: {path}")
elif path.is_dir():
notebooks = list(path.rglob("*.ipynb"))
notebooks = [ii for ii in notebooks if skip not in str(ii)]
elif path.suffix == ".ipynb":
notebooks = [str(path)]
else:
raise ValueError(f"You gave a path that isn't a folder or a notebook file: {path}")
return notebooks

def clear_notebooks(path, kind, skip=".ipynb_checkpoints", **kwargs):
notebooks = _find_notebooks(path, skip)
if len(notebooks) == 0:
print("Note: no notebooks were found to be cleared.")
for path in notebooks:
print(f"Clearing {kind} in {path}")
cleaner = NotebookCleaner(str(path))
cleaner.clear(kind, **kwargs)
cleaner.save(str(path))


@main.command()
@click.argument("path")
@click.option("--skip", default=".ipynb_checkpoints", help="What element of the notebooks you wish to clear")
def run(path, skip, **kwargs):
run_notebooks(path, skip, **kwargs)

def run_notebooks(path, skip, **kwargs):
notebooks = _find_notebooks(path, skip)
if len(notebooks) == 0:
print("Note: no notebooks were found to be executed.")

for path in notebooks:
print(f"Running {path}")
ntbk = nbf.read(str(path), nbf.NO_CONVERT)
ntbk = execute(ntbk, cwd=str(path.parent))
nbf.write(ntbk, str(path))
79 changes: 18 additions & 61 deletions nbclean/run.py
Expand Up @@ -2,65 +2,22 @@
import os
import os.path as op
from nbgrader.preprocessors import LimitOutput, Execute
from .utils import _check_nb_file
from .utils import _check_nb_file, _find_notebooks
from glob import glob
from pathlib import Path
from tqdm import tqdm
from nbclient import execute


def run_notebook_directory(path, path_save=None, max_output_lines=1000,
overwrite=False):
"""Run all the notebooks in a directory and save them somewhere else.
def run_notebooks(path, skip, **kwargs):
notebooks = _find_notebooks(path, skip)
if len(notebooks) == 0:
print("Note: no notebooks were found to be executed.")

Parameters
----------
path : str
A path to a directory that contains jupyter notebooks.
All notebooks in this folder ending in `.ipynb` will be run,
and the outputs will be placed in `path_save`. This may
optionally contain a wildcard matching ``<something>.ipynb`` in which
case only notebooks that match will be run.
path_save : str | None
A path to a directory to save the notebooks. If this doesn't exist,
it will be created. If `None`, notebooks will not be saved.
max_output_lines : int | None
The maximum number of lines allowed in notebook outputs.
overwrite : bool
Whether to overwrite the output directory if it exists.
Returns
-------
notebooks : list
A list of the `NotebookNode` instances, one for each notebook.
"""
if not op.exists(path):
raise ValueError("You've specified an input path that doesn't exist")
to_glob = op.join(path, '*.ipynb') if '.ipynb' not in path else path
notebooks = glob(to_glob)

# Execute notebooks
outputs = []
for notebook in tqdm(notebooks):
outputs.append(run_notebook(notebook,
max_output_lines=max_output_lines))

# Now save them
if path_save is not None:
print('Saving {} notebooks to: {}'.format(len(notebooks), path_save))
if not op.exists(path_save):
os.makedirs(path_save)
elif overwrite is True:
print('Overwriting output directory')
for ifile in glob(path_save + '*-exe.ipynb'):
os.remove(ifile)
else:
raise ValueError('path_save exists and overwrite is not True')

for filename, notebook in zip(notebooks, outputs):
this_name = op.basename(filename)
left, right = this_name.split('.')
left += '-exe'
this_name = '.'.join([left, right])
nbf.write(notebook, op.join(path_save, this_name))
for path in notebooks:
print(f"Running {path}")
ntbk = run_notebook(path)
nbf.write(ntbk, str(path))


def run_notebook(ntbk, max_output_lines=1000):
Expand All @@ -73,12 +30,12 @@ def run_notebook(ntbk, max_output_lines=1000):
max_output_lines : int | None
The maximum number of lines allowed in notebook outputs.
"""
if isinstance(ntbk, (str, type(Path))):
ntbk = Path(ntbk)
wd = str(ntbk.parent)
ntbk = str(ntbk)
else:
wd = str(Path())
ntbk = _check_nb_file(ntbk)

preprocessors = [Execute()]
if max_output_lines is not None:
preprocessors.append(LimitOutput(max_lines=max_output_lines,
max_traceback=max_output_lines))
for prep in preprocessors:
ntbk, _ = prep.preprocess(ntbk, {})
ntbk = execute(ntbk, cwd=wd)
return ntbk
20 changes: 19 additions & 1 deletion nbclean/utils.py
@@ -1,12 +1,30 @@
import nbformat as nbf
from nbformat.notebooknode import NotebookNode
from copy import deepcopy
from pathlib import Path


def _check_nb_file(ntbk):
if isinstance(ntbk, Path):
ntbk = str(ntbk)
if isinstance(ntbk, str):
ntbk = nbf.read(ntbk, nbf.NO_CONVERT)
elif not isinstance(ntbk, NotebookNode):
raise TypeError('`ntbk` must be type string or `NotebookNode`')
raise TypeError(f'`ntbk` must be type string or `NotebookNode`, found: {type(ntbk)}')
ntbk = deepcopy(ntbk)
return ntbk


def _find_notebooks(path, skip):
from pathlib import Path
path = Path(path)
if not path.exists():
raise ValueError(f"You gave a path that doesn't exist: {path}")
elif path.is_dir():
notebooks = list(path.rglob("*.ipynb"))
notebooks = [ii for ii in notebooks if skip not in str(ii)]
elif path.suffix == ".ipynb":
notebooks = [str(path)]
else:
raise ValueError(f"You gave a path that isn't a folder or a notebook file: {path}")
return notebooks

0 comments on commit a28af99

Please sign in to comment.