V2 Modernize boilerplate (#354)

* WIP * Remove setuptools, add tqdm * Lint * Don't need setuptools or wheel * Add comment fences * Can't believe E266 is a thing * Whitespace * Add linting to tests * Remove template from project linting and formatting * Reorganize boilerplate modules * Get rid of editable check * Actually run linting * Actually test linting * Update verify_files test * Update verify_folders * Remove duplicate file * Fix lint --------- Co-authored-by: Jay Qi <jayqi@users.noreply.github.com>
drivendataorg · Apr 15, 2024 · df8b5d3 · df8b5d3
1 parent 39a6cad
commit df8b5d3
Show file tree

Hide file tree

Showing 21 changed files with 230 additions and 54 deletions.
diff --git a/Makefile b/Makefile
@@ -26,13 +26,13 @@ requirements:
 
 ## Format the code using isort and black
 format:
-	isort --profile black ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}"
-	black ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}"
-	
+	isort --profile black ccds hooks tests docs/scripts
+	black ccds hooks tests docs/scripts
+
 lint:
-	flake8 ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}"
-	isort --check --profile black ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}"
-	black --check ccds hooks tests docs/scripts "{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}"
+	flake8 ccds hooks tests docs/scripts
+	isort --check --profile black ccds hooks tests docs/scripts
+	black --check ccds hooks tests docs/scripts
 
 
 ###     DOCS

diff --git a/hooks/post_gen_project.py b/hooks/post_gen_project.py
@@ -15,7 +15,6 @@
     "isort",
     "pip",
     "python-dotenv",
-    "setuptools",
 ]
 
 # {% if cookiecutter.dataset_storage.s3 %}
@@ -26,6 +25,7 @@
 packages += [
     "typer",
     "loguru",
+    "tqdm",
 ]
 # {% endif %}
 

diff --git a/tests/test_creation.py b/tests/test_creation.py
@@ -29,6 +29,7 @@ def test_baking_configs(config, fast):
     with bake_project(config) as project_directory:
         verify_folders(project_directory, config)
         verify_files(project_directory, config)
+        lint(project_directory)
 
         if fast < 2:
             verify_makefile_commands(project_directory, config)
@@ -54,10 +55,7 @@ def verify_folders(root, config):
 
     if config["include_code_scaffold"] == "Yes":
         expected_dirs += [
-            f"{config['module_name']}/data",
-            f"{config['module_name']}/features",
-            f"{config['module_name']}/models",
-            f"{config['module_name']}/visualization",
+            f"{config['module_name']}/modeling",
         ]
 
     if config["docs"] == "mkdocs":
@@ -104,15 +102,13 @@ def verify_files(root, config):
 
     if config["include_code_scaffold"] == "Yes":
         expected_files += [
-            f"{config['module_name']}/data/__init__.py",
-            f"{config['module_name']}/data/make_dataset.py",
-            f"{config['module_name']}/features/__init__.py",
-            f"{config['module_name']}/features/build_features.py",
-            f"{config['module_name']}/models/__init__.py",
-            f"{config['module_name']}/models/train_model.py",
-            f"{config['module_name']}/models/predict_model.py",
-            f"{config['module_name']}/visualization/__init__.py",
-            f"{config['module_name']}/visualization/visualize.py",
+            f"{config['module_name']}/config.py",
+            f"{config['module_name']}/dataset.py",
+            f"{config['module_name']}/features.py",
+            f"{config['module_name']}/modeling/__init__.py",
+            f"{config['module_name']}/modeling/train.py",
+            f"{config['module_name']}/modeling/predict.py",
+            f"{config['module_name']}/plots.py",
         ]
 
     if config["docs"] == "mkdocs":
@@ -183,3 +179,30 @@ def verify_makefile_commands(root, config):
     assert "clean                    Delete all compiled Python files" in stdout_output
 
     assert result_returncode == 0
+
+
+def lint(root):
+    """Run the linters on the project."""
+    result = run(
+        ["make", "lint"],
+        cwd=root,
+        stderr=PIPE,
+        stdout=PIPE,
+    )
+    result_returncode = result.returncode
+
+    encoding = sys.stdout.encoding
+
+    if encoding is None:
+        encoding = "utf-8"
+
+    # normally hidden by pytest except in failure we want this displayed
+    print("PATH=", os.getenv("PATH"))
+    print("\n======================= STDOUT ======================")
+    stdout_output = result.stdout.decode(encoding)
+    print(stdout_output)
+
+    print("\n======================= STDERR ======================")
+    print(result.stderr.decode(encoding))
+
+    assert result_returncode == 0
diff --git a/{{ cookiecutter.repo_name }}/Makefile b/{{ cookiecutter.repo_name }}/Makefile
@@ -15,7 +15,7 @@ PYTHON_INTERPRETER = python
 .PHONY: requirements
 requirements:
 	{% if "requirements.txt" == cookiecutter.dependency_file -%}
-	$(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel
+	$(PYTHON_INTERPRETER) -m pip install -U pip
 	$(PYTHON_INTERPRETER) -m pip install -r requirements.txt
 	{% elif "environment.yml" == cookiecutter.dependency_file -%}
 	conda env update --name $(PROJECT_NAME) --file environment.yml --prune
@@ -35,9 +35,9 @@ clean:
 .PHONY: lint
 lint:
 	flake8 {{ cookiecutter.module_name }}
+	isort --check --diff --profile black {{ cookiecutter.module_name }}
 	black --check --config pyproject.toml {{ cookiecutter.module_name }}
 
-
 ## Format source code with black
 .PHONY: format
 format:
@@ -119,4 +119,4 @@ endef
 export PRINT_HELP_PYSCRIPT
 
 help:
-	@python -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
+	@python -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
diff --git a/{{ cookiecutter.repo_name }}/pyproject.toml b/{{ cookiecutter.repo_name }}/pyproject.toml
@@ -26,3 +26,7 @@ exclude = '''
   | \.venv
 )/
 '''
+
+[tool.ruff.lint.isort]
+known_first_party = ["{{ cookiecutter.module_name }}"]
+force_sort_within_sections = true
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/__init__.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/__init__.py
@@ -0,0 +1 @@
+import config  # noqa: F401
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/config.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/config.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+
+from dotenv import load_dotenv
+from loguru import logger
+
+# Load environment variables from .env file if it exists
+load_dotenv()
+
+# Paths
+PROJ_ROOT = Path(__file__).resolve().parents[1]
+logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")
+
+DATA_DIR = PROJ_ROOT / "data"
+RAW_DATA_DIR = DATA_DIR / "raw"
+INTERIM_DATA_DIR = DATA_DIR / "interim"
+PROCESSED_DATA_DIR = DATA_DIR / "processed"
+EXTERNAL_DATA_DIR = DATA_DIR / "external"
+
+MODELS_DIR = PROJ_ROOT / "models"
+
+REPORTS_DIR = PROJ_ROOT / "reports"
+FIGURES_DIR = REPORTS_DIR / "figures"
+
+# If tqdm is installed, configure loguru with tqdm.write
+# https://github.com/Delgan/loguru/issues/135
+try:
+    from tqdm import tqdm
+
+    logger.remove(0)
+    logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
+except ModuleNotFoundError:
+    pass
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/data/make_dataset.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/data/make_dataset.py
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/dataset.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/dataset.py
@@ -0,0 +1,29 @@
+from pathlib import Path
+
+import typer
+from loguru import logger
+from tqdm import tqdm
+
+from {{ cookiecutter.module_name }}.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+    # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+    input_path: Path = RAW_DATA_DIR / "dataset.csv",
+    output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
+    # ----------------------------------------------
+):
+    # ---- REPLACE THIS WITH YOUR OWN CODE ----
+    logger.info("Processing dataset...")
+    for i in tqdm(range(10), total=10):
+        if i == 5:
+            logger.info("Something happened for iteration 5.")
+    logger.success("Processing dataset complete.")
+    # -----------------------------------------
+
+
+if __name__ == "__main__":
+    app()
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features.py
@@ -0,0 +1,29 @@
+from pathlib import Path
+
+import typer
+from loguru import logger
+from tqdm import tqdm
+
+from {{ cookiecutter.module_name }}.config import PROCESSED_DATA_DIR
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+    # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+    input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
+    output_path: Path = PROCESSED_DATA_DIR / "features.csv",
+    # -----------------------------------------
+):
+    # ---- REPLACE THIS WITH YOUR OWN CODE ----
+    logger.info("Generating features from dataset...")
+    for i in tqdm(range(10), total=10):
+        if i == 5:
+            logger.info("Something happened for iteration 5.")
+    logger.success("Features generation complete.")
+    # -----------------------------------------
+
+
+if __name__ == "__main__":
+    app()
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features/__init__.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features/__init__.py
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features/build_features.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features/build_features.py
diff --git a/...kiecutter.module_name }}/data/__init__.py → ...utter.module_name }}/modeling/__init__.py b/...kiecutter.module_name }}/data/__init__.py → ...utter.module_name }}/modeling/__init__.py
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/predict.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/predict.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+
+import typer
+from loguru import logger
+from tqdm import tqdm
+
+from {{ cookiecutter.module_name }}.config import MODELS_DIR, PROCESSED_DATA_DIR
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+    # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+    features_path: Path = PROCESSED_DATA_DIR / "test_features.csv",
+    model_path: Path = MODELS_DIR / "model.pkl",
+    predictions_path: Path = PROCESSED_DATA_DIR / "test_predictions.csv",
+    # -----------------------------------------
+):
+    # ---- REPLACE THIS WITH YOUR OWN CODE ----
+    logger.info("Performing inference for model...")
+    for i in tqdm(range(10), total=10):
+        if i == 5:
+            logger.info("Something happened for iteration 5.")
+    logger.success("Inference complete.")
+    # -----------------------------------------
+
+
+if __name__ == "__main__":
+    app()
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/train.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/train.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+
+import typer
+from loguru import logger
+from tqdm import tqdm
+
+from {{ cookiecutter.module_name }}.config import MODELS_DIR, PROCESSED_DATA_DIR
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+    # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+    features_path: Path = PROCESSED_DATA_DIR / "features.csv",
+    labels_path: Path = PROCESSED_DATA_DIR / "labels.csv",
+    model_path: Path = MODELS_DIR / "model.pkl",
+    # -----------------------------------------
+):
+    # ---- REPLACE THIS WITH YOUR OWN CODE ----
+    logger.info("Training some model...")
+    for i in tqdm(range(10), total=10):
+        if i == 5:
+            logger.info("Something happened for iteration 5.")
+    logger.success("Modeling training complete.")
+    # -----------------------------------------
+
+
+if __name__ == "__main__":
+    app()
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/__init__.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/__init__.py
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/predict_model.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/predict_model.py
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/train_model.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/models/train_model.py
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/plots.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/plots.py
@@ -0,0 +1,29 @@
+from pathlib import Path
+
+import typer
+from loguru import logger
+from tqdm import tqdm
+
+from {{ cookiecutter.module_name }}.config import FIGURES_DIR, PROCESSED_DATA_DIR
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+    # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+    input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
+    output_path: Path = FIGURES_DIR / "plot.png",
+    # -----------------------------------------
+):
+    # ---- REPLACE THIS WITH YOUR OWN CODE ----
+    logger.info("Generating plot from data...")
+    for i in tqdm(range(10), total=10):
+        if i == 5:
+            logger.info("Something happened for iteration 5.")
+    logger.success("Plot generation complete.")
+    # -----------------------------------------
+
+
+if __name__ == "__main__":
+    app()
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/visualization/__init__.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/visualization/__init__.py
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/visualization/visualize.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/visualization/visualize.py