Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support new protobuf value param types for Pipeline Job client #797

Merged
merged 4 commits into from Oct 28, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
54 changes: 33 additions & 21 deletions google/cloud/aiplatform/utils/pipeline_utils.py
Expand Up @@ -64,9 +64,13 @@ def from_job_spec_json(
.get("inputDefinitions", {})
.get("parameters", {})
)
parameter_types = {k: v["type"] for k, v in parameter_input_definitions.items()}
# 'type' is deprecated in IR and change to 'parameterType'.
parameter_types = {
k: v.get("type") or v.get("parameterType")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: usually we read the latest field first, and fallback to the deprecated field if the latest one not found.

for k, v in parameter_input_definitions.items()
}

pipeline_root = runtime_config_spec.get("gcs_output_directory")
pipeline_root = runtime_config_spec.get("gcsOutputDirectory")
parameter_values = _parse_runtime_parameters(runtime_config_spec)
return cls(pipeline_root, parameter_types, parameter_values)

Expand Down Expand Up @@ -108,7 +112,7 @@ def build(self) -> Dict[str, Any]:
"compile time, or when calling the service."
)
return {
"gcs_output_directory": self._pipeline_root,
"gcsOutputDirectory": self._pipeline_root,
"parameters": {
k: self._get_vertex_value(k, v)
for k, v in self._parameter_values.items()
Expand All @@ -117,14 +121,14 @@ def build(self) -> Dict[str, Any]:
}

def _get_vertex_value(
self, name: str, value: Union[int, float, str]
self, name: str, value: Union[int, float, str, bool, list, dict]
) -> Dict[str, Any]:
"""Converts primitive values into Vertex pipeline Value proto message.

Args:
name (str):
Required. The name of the pipeline parameter.
value (Union[int, float, str]):
value (Union[int, float, str, bool, list, dict]):
Required. The value of the pipeline parameter.

Returns:
Expand All @@ -150,6 +154,16 @@ def _get_vertex_value(
result["doubleValue"] = value
elif self._parameter_types[name] == "STRING":
result["stringValue"] = value
elif self._parameter_types[name] == "BOOLEAN":
result["boolValue"] = value
elif self._parameter_types[name] == "NUMBER_DOUBLE":
result["numberValue"] = value
elif self._parameter_types[name] == "NUMBER_INTEGER":
result["numberValue"] = value
elif self._parameter_types[name] == "LIST":
result["listValue"] = value
elif self._parameter_types[name] == "STRUCT":
result["structValue"] = value
else:
raise TypeError("Got unknown type of value: {}".format(value))

Expand All @@ -164,19 +178,17 @@ def _parse_runtime_parameters(
Raises:
TypeError: if the parameter type is not one of 'INT', 'DOUBLE', 'STRING'.
"""
runtime_parameters = runtime_config_spec.get("parameters")
if not runtime_parameters:
return None

result = {}
for name, value in runtime_parameters.items():
if "intValue" in value:
result[name] = int(value["intValue"])
elif "doubleValue" in value:
result[name] = float(value["doubleValue"])
elif "stringValue" in value:
result[name] = value["stringValue"]
else:
raise TypeError("Got unknown type of value: {}".format(value))

return result
# 'parameters' are deprecated in IR and changed to 'parameterValues'.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: same here, try reading parameterValues first, then the old field as a fallback.
That would be more "efficient" going forward.

if runtime_config_spec.get("parameters") is not None:
result = {}
for name, value in runtime_config_spec.get("parameters").items():
if "intValue" in value:
result[name] = int(value["intValue"])
elif "doubleValue" in value:
result[name] = float(value["doubleValue"])
elif "stringValue" in value:
result[name] = value["stringValue"]
else:
raise TypeError("Got unknown type of value: {}".format(value))
return result
return runtime_config_spec.get("parameterValues")
223 changes: 120 additions & 103 deletions tests/unit/aiplatform/test_pipeline_jobs.py
Expand Up @@ -52,17 +52,65 @@

_TEST_PIPELINE_JOB_NAME = f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/pipelineJobs/{_TEST_PIPELINE_JOB_ID}"

_TEST_PIPELINE_PARAMETER_VALUES = {"name_param": "hello"}
_TEST_PIPELINE_PARAMETER_VALUES = {"string_param": "hello"}
_TEST_PIPELINE_SPEC_LEGACY = {
"pipelineInfo": {"name": "my-pipeline"},
"root": {
"dag": {"tasks": {}},
"inputDefinitions": {"parameters": {"string_param": {"type": "STRING"}}},
},
"components": {},
}
_TEST_PIPELINE_SPEC = {
"pipelineInfo": {"name": "my-pipeline"},
"root": {
"dag": {"tasks": {}},
"inputDefinitions": {"parameters": {"name_param": {"type": "STRING"}}},
"inputDefinitions": {
"parameters": {
"string_param": {"parameterType": "STRING"},
# uncomment when GAPIC library change for protobufValue is in
# "bool_param": {
# "parameterType": "BOOLEAN"
# },
# "double_param": {
# "parameterType": "NUMBER_DOUBLE"
# },
# "int_param": {
# "parameterType": "NUMBER_INTEGER"
# },
# "list_int_param": {
# "parameterType": "LIST"
# },
# "list_string_param": {
# "parameterType": "LIST"
# },
# "struct_param": {
# "parameterType": "STRUCT"
# }
}
},
},
"components": {},
}
_TEST_PIPELINE_JOB = {

_TEST_PIPELINE_JOB_LEGACY = {
"runtimeConfig": {},
"pipelineSpec": _TEST_PIPELINE_SPEC_LEGACY,
}

_TEST_PIPELINE_JOB = {
"runtimeConfig": {
"parameterValues": {
"string_param": "lorem ipsum",
# uncomment when GAPIC library change for protobufValue is in
# "bool_param": True,
# "double_param": 12.34,
# "int_param": 5678,
# "list_int_param": [123, 456, 789],
# "list_string_param": ["lorem", "ipsum"],
# "struct_param": { "key1": 12345, "key2": 67890}
},
},
"pipelineSpec": _TEST_PIPELINE_SPEC,
}

Expand Down Expand Up @@ -176,23 +224,10 @@ def mock_pipeline_service_list():


@pytest.fixture
def mock_load_pipeline_job_json():
with patch.object(storage.Blob, "download_as_bytes") as mock_load_pipeline_job_json:
mock_load_pipeline_job_json.return_value = json.dumps(
_TEST_PIPELINE_JOB
).encode()
yield mock_load_pipeline_job_json


@pytest.fixture
def mock_load_pipeline_spec_json():
with patch.object(
storage.Blob, "download_as_bytes"
) as mock_load_pipeline_spec_json:
mock_load_pipeline_spec_json.return_value = json.dumps(
_TEST_PIPELINE_SPEC
).encode()
yield mock_load_pipeline_spec_json
def mock_load_json(job_spec_json):
with patch.object(storage.Blob, "download_as_bytes") as mock_load_json:
mock_load_json.return_value = json.dumps(job_spec_json).encode()
yield mock_load_json


class TestPipelineJob:
Expand All @@ -213,10 +248,23 @@ def setup_method(self):
def teardown_method(self):
initializer.global_pool.shutdown(wait=True)

@pytest.mark.usefixtures("mock_load_pipeline_job_json")
@pytest.mark.parametrize(
"job_spec_json",
[
_TEST_PIPELINE_SPEC,
_TEST_PIPELINE_JOB,
_TEST_PIPELINE_SPEC_LEGACY,
_TEST_PIPELINE_JOB_LEGACY,
],
)
@pytest.mark.parametrize("sync", [True, False])
def test_run_call_pipeline_service_pipeline_job_create(
self, mock_pipeline_service_create, mock_pipeline_service_get, sync,
def test_run_call_pipeline_service_create(
self,
mock_pipeline_service_create,
mock_pipeline_service_get,
job_spec_json,
mock_load_json,
sync,
):
aiplatform.init(
project=_TEST_PROJECT,
Expand All @@ -241,78 +289,21 @@ def test_run_call_pipeline_service_pipeline_job_create(
job.wait()

expected_runtime_config_dict = {
"gcs_output_directory": _TEST_GCS_BUCKET_NAME,
"parameters": {"name_param": {"stringValue": "hello"}},
"gcsOutputDirectory": _TEST_GCS_BUCKET_NAME,
"parameters": {"string_param": {"stringValue": "hello"}},
}
runtime_config = gca_pipeline_job_v1beta1.PipelineJob.RuntimeConfig()._pb
json_format.ParseDict(expected_runtime_config_dict, runtime_config)

# Construct expected request
expected_gapic_pipeline_job = gca_pipeline_job_v1beta1.PipelineJob(
display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
pipeline_spec={
"components": {},
"pipelineInfo": _TEST_PIPELINE_JOB["pipelineSpec"]["pipelineInfo"],
"root": _TEST_PIPELINE_JOB["pipelineSpec"]["root"],
},
runtime_config=runtime_config,
service_account=_TEST_SERVICE_ACCOUNT,
network=_TEST_NETWORK,
)

mock_pipeline_service_create.assert_called_once_with(
parent=_TEST_PARENT,
pipeline_job=expected_gapic_pipeline_job,
pipeline_job_id=_TEST_PIPELINE_JOB_ID,
)

mock_pipeline_service_get.assert_called_with(name=_TEST_PIPELINE_JOB_NAME)

assert job._gca_resource == make_pipeline_job(
gca_pipeline_state_v1beta1.PipelineState.PIPELINE_STATE_SUCCEEDED
)

@pytest.mark.usefixtures("mock_load_pipeline_spec_json")
@pytest.mark.parametrize("sync", [True, False])
def test_run_call_pipeline_service_pipeline_spec_create(
self, mock_pipeline_service_create, mock_pipeline_service_get, sync,
):
aiplatform.init(
project=_TEST_PROJECT,
staging_bucket=_TEST_GCS_BUCKET_NAME,
location=_TEST_LOCATION,
credentials=_TEST_CREDENTIALS,
)

job = pipeline_jobs.PipelineJob(
display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
template_path=_TEST_TEMPLATE_PATH,
job_id=_TEST_PIPELINE_JOB_ID,
parameter_values=_TEST_PIPELINE_PARAMETER_VALUES,
enable_caching=True,
)

job.run(
service_account=_TEST_SERVICE_ACCOUNT, network=_TEST_NETWORK, sync=sync,
)

if not sync:
job.wait()

expected_runtime_config_dict = {
"gcs_output_directory": _TEST_GCS_BUCKET_NAME,
"parameters": {"name_param": {"stringValue": "hello"}},
}
runtime_config = gca_pipeline_job_v1beta1.PipelineJob.RuntimeConfig()._pb
json_format.ParseDict(expected_runtime_config_dict, runtime_config)
pipeline_spec = job_spec_json.get("pipelineSpec") or job_spec_json

# Construct expected request
expected_gapic_pipeline_job = gca_pipeline_job_v1beta1.PipelineJob(
display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
pipeline_spec={
"components": {},
"pipelineInfo": _TEST_PIPELINE_JOB["pipelineSpec"]["pipelineInfo"],
"root": _TEST_PIPELINE_JOB["pipelineSpec"]["root"],
"pipelineInfo": pipeline_spec["pipelineInfo"],
"root": pipeline_spec["root"],
},
runtime_config=runtime_config,
service_account=_TEST_SERVICE_ACCOUNT,
Expand Down Expand Up @@ -340,13 +331,18 @@ def test_get_pipeline_job(self, mock_pipeline_service_get):
assert isinstance(job, pipeline_jobs.PipelineJob)

@pytest.mark.usefixtures(
"mock_pipeline_service_create",
"mock_pipeline_service_get",
"mock_load_pipeline_job_json",
"mock_pipeline_service_create", "mock_pipeline_service_get",
)
def test_cancel_pipeline_job(
self, mock_pipeline_service_cancel,
):
@pytest.mark.parametrize(
"job_spec_json",
[
_TEST_PIPELINE_SPEC,
_TEST_PIPELINE_JOB,
_TEST_PIPELINE_SPEC_LEGACY,
_TEST_PIPELINE_JOB_LEGACY,
],
)
def test_cancel_pipeline_job(self, mock_pipeline_service_cancel, mock_load_json):
aiplatform.init(
project=_TEST_PROJECT,
staging_bucket=_TEST_GCS_BUCKET_NAME,
Expand All @@ -367,11 +363,18 @@ def test_cancel_pipeline_job(
)

@pytest.mark.usefixtures(
"mock_pipeline_service_create",
"mock_pipeline_service_get",
"mock_load_pipeline_job_json",
"mock_pipeline_service_create", "mock_pipeline_service_get",
)
@pytest.mark.parametrize(
"job_spec_json",
[
_TEST_PIPELINE_SPEC,
_TEST_PIPELINE_JOB,
_TEST_PIPELINE_SPEC_LEGACY,
_TEST_PIPELINE_JOB_LEGACY,
],
)
def test_list_pipeline_job(self, mock_pipeline_service_list):
def test_list_pipeline_job(self, mock_pipeline_service_list, mock_load_json):
aiplatform.init(
project=_TEST_PROJECT,
staging_bucket=_TEST_GCS_BUCKET_NAME,
Expand All @@ -392,12 +395,19 @@ def test_list_pipeline_job(self, mock_pipeline_service_list):
)

@pytest.mark.usefixtures(
"mock_pipeline_service_create",
"mock_pipeline_service_get",
"mock_load_pipeline_job_json",
"mock_pipeline_service_create", "mock_pipeline_service_get",
)
@pytest.mark.parametrize(
"job_spec_json",
[
_TEST_PIPELINE_SPEC,
_TEST_PIPELINE_JOB,
_TEST_PIPELINE_SPEC_LEGACY,
_TEST_PIPELINE_JOB_LEGACY,
],
)
def test_cancel_pipeline_job_without_running(
self, mock_pipeline_service_cancel,
self, mock_pipeline_service_cancel, mock_load_json,
):
aiplatform.init(
project=_TEST_PROJECT,
Expand All @@ -417,12 +427,19 @@ def test_cancel_pipeline_job_without_running(
assert e.match(regexp=r"PipelineJob resource has not been created")

@pytest.mark.usefixtures(
"mock_pipeline_service_create",
"mock_pipeline_service_get_with_fail",
"mock_load_pipeline_job_json",
"mock_pipeline_service_create", "mock_pipeline_service_get_with_fail",
)
@pytest.mark.parametrize(
"job_spec_json",
[
_TEST_PIPELINE_SPEC,
_TEST_PIPELINE_JOB,
_TEST_PIPELINE_SPEC_LEGACY,
_TEST_PIPELINE_JOB_LEGACY,
],
)
@pytest.mark.parametrize("sync", [True, False])
def test_pipeline_failure_raises(self, sync):
def test_pipeline_failure_raises(self, mock_load_json, sync):
aiplatform.init(
project=_TEST_PROJECT,
staging_bucket=_TEST_GCS_BUCKET_NAME,
Expand Down