Skip to content

Commit

Permalink
V2 Modernize boilerplate (#354)
Browse files Browse the repository at this point in the history
* WIP

* Remove setuptools, add tqdm

* Lint

* Don't need setuptools or wheel

* Add comment fences

* Can't believe E266 is a thing

* Whitespace

* Add linting to tests

* Remove template from project linting and formatting

* Reorganize boilerplate modules

* Get rid of editable check

* Actually run linting

* Actually test linting

* Update verify_files test

* Update verify_folders

* Remove duplicate file

* Fix lint

---------

Co-authored-by: Jay Qi <jayqi@users.noreply.github.com>
  • Loading branch information
jayqi and jayqi committed Apr 15, 2024
1 parent 39a6cad commit df8b5d3
Show file tree
Hide file tree
Showing 21 changed files with 230 additions and 54 deletions.
12 changes: 6 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ requirements:

## Format the code using isort and black
format:
isort --profile black ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}"
black ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}"
isort --profile black ccds hooks tests docs/scripts
black ccds hooks tests docs/scripts

lint:
flake8 ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}"
isort --check --profile black ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}"
black --check ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}"
flake8 ccds hooks tests docs/scripts
isort --check --profile black ccds hooks tests docs/scripts
black --check ccds hooks tests docs/scripts


### DOCS
Expand Down
2 changes: 1 addition & 1 deletion hooks/post_gen_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
"isort",
"pip",
"python-dotenv",
"setuptools",
]

# {% if cookiecutter.dataset_storage.s3 %}
Expand All @@ -26,6 +25,7 @@
packages += [
"typer",
"loguru",
"tqdm",
]
# {% endif %}

Expand Down
49 changes: 36 additions & 13 deletions tests/test_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def test_baking_configs(config, fast):
with bake_project(config) as project_directory:
verify_folders(project_directory, config)
verify_files(project_directory, config)
lint(project_directory)

if fast < 2:
verify_makefile_commands(project_directory, config)
Expand All @@ -54,10 +55,7 @@ def verify_folders(root, config):

if config["include_code_scaffold"] == "Yes":
expected_dirs += [
f"{config['module_name']}/data",
f"{config['module_name']}/features",
f"{config['module_name']}/models",
f"{config['module_name']}/visualization",
f"{config['module_name']}/modeling",
]

if config["docs"] == "mkdocs":
Expand Down Expand Up @@ -104,15 +102,13 @@ def verify_files(root, config):

if config["include_code_scaffold"] == "Yes":
expected_files += [
f"{config['module_name']}/data/__init__.py",
f"{config['module_name']}/data/make_dataset.py",
f"{config['module_name']}/features/__init__.py",
f"{config['module_name']}/features/build_features.py",
f"{config['module_name']}/models/__init__.py",
f"{config['module_name']}/models/train_model.py",
f"{config['module_name']}/models/predict_model.py",
f"{config['module_name']}/visualization/__init__.py",
f"{config['module_name']}/visualization/visualize.py",
f"{config['module_name']}/config.py",
f"{config['module_name']}/dataset.py",
f"{config['module_name']}/features.py",
f"{config['module_name']}/modeling/__init__.py",
f"{config['module_name']}/modeling/train.py",
f"{config['module_name']}/modeling/predict.py",
f"{config['module_name']}/plots.py",
]

if config["docs"] == "mkdocs":
Expand Down Expand Up @@ -183,3 +179,30 @@ def verify_makefile_commands(root, config):
assert "clean Delete all compiled Python files" in stdout_output

assert result_returncode == 0


def lint(root):
"""Run the linters on the project."""
result = run(
["make", "lint"],
cwd=root,
stderr=PIPE,
stdout=PIPE,
)
result_returncode = result.returncode

encoding = sys.stdout.encoding

if encoding is None:
encoding = "utf-8"

# normally hidden by pytest except in failure we want this displayed
print("PATH=", os.getenv("PATH"))
print("\n======================= STDOUT ======================")
stdout_output = result.stdout.decode(encoding)
print(stdout_output)

print("\n======================= STDERR ======================")
print(result.stderr.decode(encoding))

assert result_returncode == 0
6 changes: 3 additions & 3 deletions {{ cookiecutter.repo_name }}/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ PYTHON_INTERPRETER = python
.PHONY: requirements
requirements:
{% if "requirements.txt" == cookiecutter.dependency_file -%}
$(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel
$(PYTHON_INTERPRETER) -m pip install -U pip
$(PYTHON_INTERPRETER) -m pip install -r requirements.txt
{% elif "environment.yml" == cookiecutter.dependency_file -%}
conda env update --name $(PROJECT_NAME) --file environment.yml --prune
Expand All @@ -35,9 +35,9 @@ clean:
.PHONY: lint
lint:
flake8 {{ cookiecutter.module_name }}
isort --check --diff --profile black {{ cookiecutter.module_name }}
black --check --config pyproject.toml {{ cookiecutter.module_name }}


## Format source code with black
.PHONY: format
format:
Expand Down Expand Up @@ -119,4 +119,4 @@ endef
export PRINT_HELP_PYSCRIPT

help:
@python -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
@python -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
4 changes: 4 additions & 0 deletions {{ cookiecutter.repo_name }}/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,7 @@ exclude = '''
| \.venv
)/
'''

[tool.ruff.lint.isort]
known_first_party = ["{{ cookiecutter.module_name }}"]
force_sort_within_sections = true
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import config # noqa: F401
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from pathlib import Path

from dotenv import load_dotenv
from loguru import logger

# Load environment variables from .env file if it exists
load_dotenv()

# Paths
PROJ_ROOT = Path(__file__).resolve().parents[1]
logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")

DATA_DIR = PROJ_ROOT / "data"
RAW_DATA_DIR = DATA_DIR / "raw"
INTERIM_DATA_DIR = DATA_DIR / "interim"
PROCESSED_DATA_DIR = DATA_DIR / "processed"
EXTERNAL_DATA_DIR = DATA_DIR / "external"

MODELS_DIR = PROJ_ROOT / "models"

REPORTS_DIR = PROJ_ROOT / "reports"
FIGURES_DIR = REPORTS_DIR / "figures"

# If tqdm is installed, configure loguru with tqdm.write
# https://github.com/Delgan/loguru/issues/135
try:
from tqdm import tqdm

logger.remove(0)
logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
except ModuleNotFoundError:
pass

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from pathlib import Path

import typer
from loguru import logger
from tqdm import tqdm

from {{ cookiecutter.module_name }}.config import PROCESSED_DATA_DIR, RAW_DATA_DIR

app = typer.Typer()


@app.command()
def main(
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
input_path: Path = RAW_DATA_DIR / "dataset.csv",
output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
# ----------------------------------------------
):
# ---- REPLACE THIS WITH YOUR OWN CODE ----
logger.info("Processing dataset...")
for i in tqdm(range(10), total=10):
if i == 5:
logger.info("Something happened for iteration 5.")
logger.success("Processing dataset complete.")
# -----------------------------------------


if __name__ == "__main__":
app()
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from pathlib import Path

import typer
from loguru import logger
from tqdm import tqdm

from {{ cookiecutter.module_name }}.config import PROCESSED_DATA_DIR

app = typer.Typer()


@app.command()
def main(
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
output_path: Path = PROCESSED_DATA_DIR / "features.csv",
# -----------------------------------------
):
# ---- REPLACE THIS WITH YOUR OWN CODE ----
logger.info("Generating features from dataset...")
for i in tqdm(range(10), total=10):
if i == 5:
logger.info("Something happened for iteration 5.")
logger.success("Features generation complete.")
# -----------------------------------------


if __name__ == "__main__":
app()
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from pathlib import Path

import typer
from loguru import logger
from tqdm import tqdm

from {{ cookiecutter.module_name }}.config import MODELS_DIR, PROCESSED_DATA_DIR

app = typer.Typer()


@app.command()
def main(
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
features_path: Path = PROCESSED_DATA_DIR / "test_features.csv",
model_path: Path = MODELS_DIR / "model.pkl",
predictions_path: Path = PROCESSED_DATA_DIR / "test_predictions.csv",
# -----------------------------------------
):
# ---- REPLACE THIS WITH YOUR OWN CODE ----
logger.info("Performing inference for model...")
for i in tqdm(range(10), total=10):
if i == 5:
logger.info("Something happened for iteration 5.")
logger.success("Inference complete.")
# -----------------------------------------


if __name__ == "__main__":
app()
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from pathlib import Path

import typer
from loguru import logger
from tqdm import tqdm

from {{ cookiecutter.module_name }}.config import MODELS_DIR, PROCESSED_DATA_DIR

app = typer.Typer()


@app.command()
def main(
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
features_path: Path = PROCESSED_DATA_DIR / "features.csv",
labels_path: Path = PROCESSED_DATA_DIR / "labels.csv",
model_path: Path = MODELS_DIR / "model.pkl",
# -----------------------------------------
):
# ---- REPLACE THIS WITH YOUR OWN CODE ----
logger.info("Training some model...")
for i in tqdm(range(10), total=10):
if i == 5:
logger.info("Something happened for iteration 5.")
logger.success("Modeling training complete.")
# -----------------------------------------


if __name__ == "__main__":
app()
Empty file.
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from pathlib import Path

import typer
from loguru import logger
from tqdm import tqdm

from {{ cookiecutter.module_name }}.config import FIGURES_DIR, PROCESSED_DATA_DIR

app = typer.Typer()


@app.command()
def main(
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
output_path: Path = FIGURES_DIR / "plot.png",
# -----------------------------------------
):
# ---- REPLACE THIS WITH YOUR OWN CODE ----
logger.info("Generating plot from data...")
for i in tqdm(range(10), total=10):
if i == 5:
logger.info("Something happened for iteration 5.")
logger.success("Plot generation complete.")
# -----------------------------------------


if __name__ == "__main__":
app()
Empty file.
Empty file.

0 comments on commit df8b5d3

Please sign in to comment.