Skip to content

Commit

Permalink
Merge pull request #190 from nasa/feature/issue-134-expand-tests-with…
Browse files Browse the repository at this point in the history
…-output-from-subsetter

Feature/issue 134 expand tests with output from subsetter
  • Loading branch information
danielfromearth committed May 15, 2024
2 parents fc254d0 + 2326fb3 commit 6e2f8f3
Show file tree
Hide file tree
Showing 19 changed files with 389 additions and 184 deletions.
5 changes: 5 additions & 0 deletions .codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
flags:
unittests:
carryforward: false
integration:
carryforward: false
64 changes: 64 additions & 0 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: Integration Tests

on:
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
secrets:
DEK_EDL_USER:
required: true
DEK_EDL_PASSWORD:
required: true
codecov_token:
required: true
push:
branches:
- main
- develop
- release/**
- feature/**

# When this workflow is queued, automatically cancel any previous running
# or pending jobs from the same branch
concurrency:
group: integration-tests-${{ github.ref }}
cancel-in-progress: true

env:
POETRY_VERSION: "1.3.2"
PYTHON_VERSION: "3.10"

jobs:
integration-tests:
runs-on: ubuntu-latest

steps:
- name: Retrieve repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}

- name: Set up Poetry
uses: abatilo/actions-poetry@v3.0.0
with:
poetry-version: ${{ env.POETRY_VERSION }}

- name: Install package
run: poetry install --with=integration --without harmony

- name: Test
env:
EDL_USER: ${{ secrets.DEK_EDL_USER }}
EDL_PASSWORD: ${{ secrets.DEK_EDL_PASSWORD }}
run: |
scripts/create-netrc
poetry run pytest --cov=concatenator --cov-report=xml tests/integration
- name: Upload coverage
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
verbose: true
flags: integration
11 changes: 6 additions & 5 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# A reusable workflow to build and run the unit test suite
#
# This workflow will install Python dependencies, run tests,
# and report test results and code coverage as artifacts. It will
# be called by the workflow that run tests against new PRs and as
# a first step in the workflow that publishes new Docker images.

name: A reusable workflow to build and run the unit test suite
name: Unit Tests

on:
workflow_call:
Expand All @@ -17,7 +18,7 @@ env:
PYTHON_VERSION: "3.10"

jobs:
build_and_test:
unit-tests:
runs-on: ubuntu-latest

steps:
Expand All @@ -42,11 +43,11 @@ jobs:
poetry run ruff check concatenator
- name: Run tests and collect coverage
run: poetry run pytest --cov=concatenator tests/unit/test_dataset_and_group_handling.py --cov-report=xml
# TODO: expand tests to include full concatenation runs, i.e., not only test_dataset_and_group_handling.py
run: poetry run pytest --cov=concatenator --cov-report=xml tests/unit

- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
verbose: true
flags: unittests
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [Issue #133](https://github.com/nasa/stitchee/issues/133): Add readthedocs documentation build
- [Issue #185](https://github.com/nasa/stitchee/issues/185): Added arguments for temporary file copies and overwriting output file in main stitchee function
- [Issue #181](https://github.com/nasa/stitchee/issues/181): Add a group delimiter argument
- [Issue #134](https://github.com/nasa/stitchee/issues/134): Add an integration test that runs stitchee on files first subsetted by the operational Harmony subsetter
### Changed
### Deprecated
### Removed
Expand Down
109 changes: 0 additions & 109 deletions concatenator/attribute_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

import importlib_metadata
import netCDF4
import xarray as xr

import concatenator

Expand Down Expand Up @@ -96,114 +95,6 @@ def _flatten_coordinate_attribute(attribute_string: str) -> str:
)


def create_new_attributes(input_dataset: xr.Dataset, request_parameters: dict) -> dict:
"""Set the global attributes of the merged output file.
These begin as the global attributes of the input granule, but are updated to also include
the provenance data via an updated `history` CF attribute (or `History`
if that is already present), and a `history_json` attribute that is
compliant with the schema defined at the URL specified by
`HISTORY_JSON_SCHEMA`.
`projection` is not included in the output parameters, as this is not
an original message parameter. It is a derived `pyproj.Proj` instance
that is defined by the input `crs` parameter.
`x_extent` and `y_extent` are not serializable, and are instead
included by `x_min`, `x_max` and `y_min` `y_max` accordingly.
Parameters
----------
input_dataset : Dataset
request_parameters : dict
"""
# Get attributes from input file
output_attributes = input_dataset.attrs

# Reconstruct parameters' dictionary with only keys that correspond to non-null values.
valid_request_parameters = {
parameter_name: parameter_value
for parameter_name, parameter_value in request_parameters.items()
if parameter_value is not None
}

# Remove unnecessary and unserializable request parameters
for surplus_key in ["projection", "x_extent", "y_extent"]:
valid_request_parameters.pop(surplus_key, None)

# Retrieve `granule_url` and replace the `input_file` attribute.
# This ensures `history_json` refers to the archived granule location, rather
# than a temporary file in the Docker container.
valid_request_parameters["input_file"] = valid_request_parameters.pop("granule_url", None)

# Preferentially use `history`, unless `History` is already present in the
# input file.
cf_att_name = "History" if hasattr(input_dataset, "History") else "history"
input_history = getattr(input_dataset, cf_att_name, None)

# Create new history_json attribute
new_history_json_record = create_history_record(str(input_history), valid_request_parameters)

# Extract existing `history_json` from input granule
if hasattr(input_dataset, "history_json"):
old_history_json = json.loads(output_attributes["history_json"])
if isinstance(old_history_json, list):
output_history_json = old_history_json
else:
# Single `history_record` element.
output_history_json = [old_history_json]
else:
output_history_json = []

# Append `history_record` to the existing `history_json` array:
output_history_json.append(new_history_json_record)
output_attributes["history_json"] = json.dumps(output_history_json)

# Create history attribute
history_parameters = {
parameter_name: parameter_value
for parameter_name, parameter_value in new_history_json_record["parameters"].items()
if parameter_name != "input_file"
}

new_history_line = " ".join(
[
new_history_json_record["date_time"],
new_history_json_record["program"],
new_history_json_record["version"],
json.dumps(history_parameters),
]
)

output_history = "\n".join(filter(None, [input_history, new_history_line]))
output_attributes[cf_att_name] = output_history

return output_attributes


def create_history_record(input_history: str, request_parameters: dict) -> dict:
"""Create a serializable dictionary for the `history_json` global
attribute in the merged output NetCDF-4 file.
"""
history_record = {
"$schema": HISTORY_JSON_SCHEMA,
"date_time": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
"program": PROGRAM,
"version": VERSION,
"parameters": request_parameters,
"derived_from": request_parameters["input_file"],
"program_ref": PROGRAM_REF,
}

if isinstance(input_history, str):
history_record["cf_history"] = input_history.split("\n")
elif isinstance(input_history, list):
history_record["cf_history"] = input_history

return history_record


def retrieve_history(dataset: netCDF4.Dataset) -> dict:
"""
Retrieve history_json field from NetCDF dataset, if it exists
Expand Down
22 changes: 14 additions & 8 deletions concatenator/run_stitchee.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""A simple CLI wrapper around the main concatenation process."""

import argparse
import json
import logging
import sys
Expand All @@ -12,13 +13,12 @@
from concatenator.stitchee import stitchee


def parse_args(args: list) -> tuple[list[str], str, str, bool, str, dict, bool, str]:
"""
Parse args for this script.
def parse_args(args: list) -> argparse.Namespace:
"""Parse args for this script.
Returns
-------
tuple
argparse.Namespace
"""
parser = ArgumentParser(
prog="stitchee", description="Run the along-existing-dimension concatenator."
Expand Down Expand Up @@ -98,6 +98,13 @@ def parse_args(args: list) -> tuple[list[str], str, str, bool, str, dict, bool,

parsed = parser.parse_args(args)

return parsed


def validate_parsed_args(
parsed: argparse.Namespace,
) -> tuple[list[str], str, str, bool, str, dict, bool, str]:
"""Perform preliminary validation of the parsed arguments and return them as a tuple."""
if parsed.verbose:
logging.basicConfig(level=logging.DEBUG)

Expand All @@ -107,6 +114,7 @@ def parse_args(args: list) -> tuple[list[str], str, str, bool, str, dict, bool,

print(f"CONCAT METHOD === {parsed.concat_method}")
print(f"CONCAT DIM === {parsed.concat_dim}")

if parsed.concat_method == "xarray-concat":
if not parsed.concat_dim:
raise ValueError(
Expand Down Expand Up @@ -140,9 +148,7 @@ def parse_args(args: list) -> tuple[list[str], str, str, bool, str, dict, bool,


def run_stitchee(args: list) -> None:
"""
Parse arguments and run subsetter on the specified input file
"""
"""Parse arguments and run subsetter on the specified input file."""
(
input_files,
output_path,
Expand All @@ -152,7 +158,7 @@ def run_stitchee(args: list) -> None:
concat_kwargs,
copy_input_files,
group_delimiter,
) = parse_args(args)
) = validate_parsed_args(parse_args(args))
num_inputs = len(input_files)

history_json: list[dict] = []
Expand Down

0 comments on commit 6e2f8f3

Please sign in to comment.