Merge pull request #190 from nasa/feature/issue-134-expand-tests-with…

…-output-from-subsetter Feature/issue 134 expand tests with output from subsetter
nasa · May 15, 2024 · 6e2f8f3 · 6e2f8f3
2 parents fc254d0 + 2326fb3
commit 6e2f8f3
Show file tree

Hide file tree

Showing 19 changed files with 389 additions and 184 deletions.
diff --git a/.codecov.yml b/.codecov.yml
@@ -0,0 +1,5 @@
+flags:
+  unittests:
+    carryforward: false
+  integration:
+    carryforward: false
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
@@ -0,0 +1,64 @@
+name: Integration Tests
+
+on:
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+    secrets:
+      DEK_EDL_USER:
+        required: true
+      DEK_EDL_PASSWORD:
+        required: true
+      codecov_token:
+        required: true
+  push:
+    branches:
+      - main
+      - develop
+      - release/**
+      - feature/**
+
+# When this workflow is queued, automatically cancel any previous running
+# or pending jobs from the same branch
+concurrency:
+  group: integration-tests-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  POETRY_VERSION: "1.3.2"
+  PYTHON_VERSION: "3.10"
+
+jobs:
+  integration-tests:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Retrieve repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Set up Poetry
+        uses: abatilo/actions-poetry@v3.0.0
+        with:
+          poetry-version: ${{ env.POETRY_VERSION }}
+
+      - name: Install package
+        run: poetry install --with=integration --without harmony
+
+      - name: Test
+        env:
+          EDL_USER: ${{ secrets.DEK_EDL_USER }}
+          EDL_PASSWORD: ${{ secrets.DEK_EDL_PASSWORD }}
+        run: |
+          scripts/create-netrc
+          poetry run pytest --cov=concatenator --cov-report=xml tests/integration
+
+      - name: Upload coverage
+        uses: codecov/codecov-action@v4
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          verbose: true
+          flags: integration
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -1,9 +1,10 @@
+# A reusable workflow to build and run the unit test suite
+#
 # This workflow will install Python dependencies, run tests,
 # and report test results and code coverage as artifacts. It will
 # be called by the workflow that run tests against new PRs and as
 # a first step in the workflow that publishes new Docker images.
-
-name: A reusable workflow to build and run the unit test suite
+name: Unit Tests
 
 on:
   workflow_call:
@@ -17,7 +18,7 @@ env:
   PYTHON_VERSION: "3.10"
 
 jobs:
-  build_and_test:
+  unit-tests:
     runs-on: ubuntu-latest
 
     steps:
@@ -42,11 +43,11 @@ jobs:
           poetry run ruff check concatenator
 
       - name: Run tests and collect coverage
-        run: poetry run pytest --cov=concatenator tests/unit/test_dataset_and_group_handling.py --cov-report=xml
-        # TODO: expand tests to include full concatenation runs, i.e., not only test_dataset_and_group_handling.py
+        run: poetry run pytest --cov=concatenator --cov-report=xml tests/unit
 
       - name: Upload coverage reports to Codecov
         uses: codecov/codecov-action@v4
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           verbose: true
+          flags: unittests
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [Issue #133](https://github.com/nasa/stitchee/issues/133): Add readthedocs documentation build
 - [Issue #185](https://github.com/nasa/stitchee/issues/185): Added arguments for temporary file copies and overwriting output file in main stitchee function
 - [Issue #181](https://github.com/nasa/stitchee/issues/181): Add a group delimiter argument
+- [Issue #134](https://github.com/nasa/stitchee/issues/134): Add an integration test that runs stitchee on files first subsetted by the operational Harmony subsetter
 ### Changed
 ### Deprecated
 ### Removed

diff --git a/concatenator/attribute_handling.py b/concatenator/attribute_handling.py
@@ -11,7 +11,6 @@
 
 import importlib_metadata
 import netCDF4
-import xarray as xr
 
 import concatenator
 
@@ -96,114 +95,6 @@ def _flatten_coordinate_attribute(attribute_string: str) -> str:
     )
 
 
-def create_new_attributes(input_dataset: xr.Dataset, request_parameters: dict) -> dict:
-    """Set the global attributes of the merged output file.
-
-    These begin as the global attributes of the input granule, but are updated to also include
-    the provenance data via an updated `history` CF attribute (or `History`
-    if that is already present), and a `history_json` attribute that is
-    compliant with the schema defined at the URL specified by
-    `HISTORY_JSON_SCHEMA`.
-
-    `projection` is not included in the output parameters, as this is not
-    an original message parameter. It is a derived `pyproj.Proj` instance
-    that is defined by the input `crs` parameter.
-
-    `x_extent` and `y_extent` are not serializable, and are instead
-    included by `x_min`, `x_max` and `y_min` `y_max` accordingly.
-
-    Parameters
-    ----------
-    input_dataset : Dataset
-    request_parameters : dict
-    """
-    # Get attributes from input file
-    output_attributes = input_dataset.attrs
-
-    # Reconstruct parameters' dictionary with only keys that correspond to non-null values.
-    valid_request_parameters = {
-        parameter_name: parameter_value
-        for parameter_name, parameter_value in request_parameters.items()
-        if parameter_value is not None
-    }
-
-    # Remove unnecessary and unserializable request parameters
-    for surplus_key in ["projection", "x_extent", "y_extent"]:
-        valid_request_parameters.pop(surplus_key, None)
-
-    # Retrieve `granule_url` and replace the `input_file` attribute.
-    # This ensures `history_json` refers to the archived granule location, rather
-    # than a temporary file in the Docker container.
-    valid_request_parameters["input_file"] = valid_request_parameters.pop("granule_url", None)
-
-    # Preferentially use `history`, unless `History` is already present in the
-    # input file.
-    cf_att_name = "History" if hasattr(input_dataset, "History") else "history"
-    input_history = getattr(input_dataset, cf_att_name, None)
-
-    # Create new history_json attribute
-    new_history_json_record = create_history_record(str(input_history), valid_request_parameters)
-
-    # Extract existing `history_json` from input granule
-    if hasattr(input_dataset, "history_json"):
-        old_history_json = json.loads(output_attributes["history_json"])
-        if isinstance(old_history_json, list):
-            output_history_json = old_history_json
-        else:
-            # Single `history_record` element.
-            output_history_json = [old_history_json]
-    else:
-        output_history_json = []
-
-    # Append `history_record` to the existing `history_json` array:
-    output_history_json.append(new_history_json_record)
-    output_attributes["history_json"] = json.dumps(output_history_json)
-
-    # Create history attribute
-    history_parameters = {
-        parameter_name: parameter_value
-        for parameter_name, parameter_value in new_history_json_record["parameters"].items()
-        if parameter_name != "input_file"
-    }
-
-    new_history_line = " ".join(
-        [
-            new_history_json_record["date_time"],
-            new_history_json_record["program"],
-            new_history_json_record["version"],
-            json.dumps(history_parameters),
-        ]
-    )
-
-    output_history = "\n".join(filter(None, [input_history, new_history_line]))
-    output_attributes[cf_att_name] = output_history
-
-    return output_attributes
-
-
-def create_history_record(input_history: str, request_parameters: dict) -> dict:
-    """Create a serializable dictionary for the `history_json` global
-    attribute in the merged output NetCDF-4 file.
-
-    """
-    history_record = {
-        "$schema": HISTORY_JSON_SCHEMA,
-        "date_time": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
-        "program": PROGRAM,
-        "version": VERSION,
-        "parameters": request_parameters,
-        "derived_from": request_parameters["input_file"],
-        "program_ref": PROGRAM_REF,
-    }
-
-    if isinstance(input_history, str):
-        history_record["cf_history"] = input_history.split("\n")
-    elif isinstance(input_history, list):
-        history_record["cf_history"] = input_history
-
-    return history_record
-
-
 def retrieve_history(dataset: netCDF4.Dataset) -> dict:
     """
     Retrieve history_json field from NetCDF dataset, if it exists

diff --git a/concatenator/run_stitchee.py b/concatenator/run_stitchee.py
@@ -1,5 +1,6 @@
 """A simple CLI wrapper around the main concatenation process."""
 
+import argparse
 import json
 import logging
 import sys
@@ -12,13 +13,12 @@
 from concatenator.stitchee import stitchee
 
 
-def parse_args(args: list) -> tuple[list[str], str, str, bool, str, dict, bool, str]:
-    """
-    Parse args for this script.
+def parse_args(args: list) -> argparse.Namespace:
+    """Parse args for this script.
 
     Returns
     -------
-    tuple
+    argparse.Namespace
     """
     parser = ArgumentParser(
         prog="stitchee", description="Run the along-existing-dimension concatenator."
@@ -98,6 +98,13 @@ def parse_args(args: list) -> tuple[list[str], str, str, bool, str, dict, bool,
 
     parsed = parser.parse_args(args)
 
+    return parsed
+
+
+def validate_parsed_args(
+    parsed: argparse.Namespace,
+) -> tuple[list[str], str, str, bool, str, dict, bool, str]:
+    """Perform preliminary validation of the parsed arguments and return them as a tuple."""
     if parsed.verbose:
         logging.basicConfig(level=logging.DEBUG)
 
@@ -107,6 +114,7 @@ def parse_args(args: list) -> tuple[list[str], str, str, bool, str, dict, bool,
 
     print(f"CONCAT METHOD === {parsed.concat_method}")
     print(f"CONCAT DIM === {parsed.concat_dim}")
+
     if parsed.concat_method == "xarray-concat":
         if not parsed.concat_dim:
             raise ValueError(
@@ -140,9 +148,7 @@ def parse_args(args: list) -> tuple[list[str], str, str, bool, str, dict, bool,
 
 
 def run_stitchee(args: list) -> None:
-    """
-    Parse arguments and run subsetter on the specified input file
-    """
+    """Parse arguments and run subsetter on the specified input file."""
     (
         input_files,
         output_path,
@@ -152,7 +158,7 @@ def run_stitchee(args: list) -> None:
         concat_kwargs,
         copy_input_files,
         group_delimiter,
-    ) = parse_args(args)
+    ) = validate_parsed_args(parse_args(args))
     num_inputs = len(input_files)
 
     history_json: list[dict] = []