Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Improve handling of undeploying model without redistributing remaining traffic #898

Merged
merged 6 commits into from Dec 16, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
33 changes: 24 additions & 9 deletions google/cloud/aiplatform/models.py
Expand Up @@ -993,22 +993,23 @@ def undeploy(
sync=True,
) -> None:
"""Undeploys a deployed model.

Proportionally adjusts the traffic_split among the remaining deployed
models of the endpoint.

The model to be undeployed should have no traffic or user must provide
a new traffic_split with the remaining deployed models. Refer
to `Endpoint.traffic_split` for the current traffic split mapping.

Args:
deployed_model_id (str):
Required. The ID of the DeployedModel to be undeployed from the
Endpoint.
traffic_split (Dict[str, int]):
Optional. A map from a DeployedModel's ID to the percentage of
Optional. A map of DeployedModel IDs to the percentage of
this Endpoint's traffic that should be forwarded to that DeployedModel.
If a DeployedModel's ID is not listed in this map, then it receives
no traffic. The traffic percentage values must add up to 100, or
map must be empty if the Endpoint is to not accept any traffic at
the moment. Key for model being deployed is "0". Should not be
provided if traffic_percentage is provided.
Required if undeploying a model with non-zero traffic from an Endpoint
with multiple deployed models. The traffic percentage values must add
up to 100, or map must be empty if the Endpoint is to not accept any traffic
at the moment. If a DeployedModel's ID is not listed in this map, then it
receives no traffic.
metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as
metadata.
Expand All @@ -1023,6 +1024,20 @@ def undeploy(
"Sum of all traffic within traffic split needs to be 100."
)

elif (
len(self.traffic_split) > 1 # Two or more models deployed to Endpoint
vinnysenthil marked this conversation as resolved.
Show resolved Hide resolved
and deployed_model_id in self._gca_resource.traffic_split
and self._gca_resource.traffic_split[deployed_model_id]
):
leftover_traffic = self._gca_resource.traffic_split[deployed_model_id]

raise ValueError(
f"Undeploying deployed model '{deployed_model_id}' would leave the remaining "
f"traffic split at {100 - leftover_traffic}%. Traffic split must add up to 100% "
f"when models are deployed. Please provide an updated traffic_split "
"or undeploy other models without traffic first."
)

self._undeploy(
deployed_model_id=deployed_model_id,
traffic_split=traffic_split,
Expand Down
67 changes: 65 additions & 2 deletions tests/unit/aiplatform/test_endpoints.py
Expand Up @@ -15,6 +15,7 @@
# limitations under the License.
#

import copy
import pytest

from unittest import mock
Expand Down Expand Up @@ -56,8 +57,10 @@

_TEST_DISPLAY_NAME = "test-display-name"
_TEST_DISPLAY_NAME_2 = "test-display-name-2"
_TEST_DISPLAY_NAME_3 = "test-display-name-3"
_TEST_ID = "1028944691210842416"
_TEST_ID_2 = "4366591682456584192"
_TEST_ID_3 = "5820582938582924817"
_TEST_DESCRIPTION = "test-description"

_TEST_ENDPOINT_NAME = (
Expand All @@ -80,8 +83,11 @@
_TEST_DEPLOYED_MODELS = [
gca_endpoint.DeployedModel(id=_TEST_ID, display_name=_TEST_DISPLAY_NAME),
gca_endpoint.DeployedModel(id=_TEST_ID_2, display_name=_TEST_DISPLAY_NAME_2),
gca_endpoint.DeployedModel(id=_TEST_ID_3, display_name=_TEST_DISPLAY_NAME_3),
]

_TEST_TRAFFIC_SPLIT = {_TEST_ID: 35, _TEST_ID_2: 65, _TEST_ID_3: 0}

_TEST_MACHINE_TYPE = "n1-standard-32"
_TEST_ACCELERATOR_TYPE = "NVIDIA_TESLA_P100"
_TEST_ACCELERATOR_COUNT = 2
Expand Down Expand Up @@ -200,6 +206,7 @@ def get_endpoint_with_models_mock():
display_name=_TEST_DISPLAY_NAME,
name=_TEST_ENDPOINT_NAME,
deployed_models=_TEST_DEPLOYED_MODELS,
traffic_split=_TEST_TRAFFIC_SPLIT,
)
yield get_endpoint_mock

Expand Down Expand Up @@ -992,23 +999,79 @@ def test_undeploy_with_traffic_split(self, undeploy_model_mock, sync):
@pytest.mark.usefixtures("get_endpoint_mock")
@pytest.mark.parametrize("sync", [True, False])
def test_undeploy_raise_error_traffic_split_total(self, sync):
with pytest.raises(ValueError):
with pytest.raises(ValueError) as e:
test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
test_endpoint.undeploy(
deployed_model_id="model1", traffic_split={"model2": 99}, sync=sync
)

assert e.match("Sum of all traffic within traffic split needs to be 100.")

@pytest.mark.usefixtures("get_endpoint_mock")
@pytest.mark.parametrize("sync", [True, False])
def test_undeploy_raise_error_undeployed_model_traffic(self, sync):
with pytest.raises(ValueError):
with pytest.raises(ValueError) as e:
test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
test_endpoint.undeploy(
deployed_model_id="model1",
traffic_split={"model1": 50, "model2": 50},
sync=sync,
)

assert e.match("Model being undeployed should have 0 traffic.")

@pytest.mark.usefixtures("get_endpoint_with_models_mock")
@pytest.mark.parametrize("sync", [True, False])
def test_undeploy_raises_error_on_leftover_traffic(self, sync):
"""
Attempting to undeploy model with traffic on it, without an
updated traffic_split should raise an informative error.
"""

traffic_remaining = _TEST_TRAFFIC_SPLIT[_TEST_ID]

assert traffic_remaining # Confirm there is non-zero traffic

with pytest.raises(ValueError) as e:
test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
test_endpoint.undeploy(
deployed_model_id=_TEST_ID, sync=sync,
)

assert e.match(
f"Undeploying deployed model '{_TEST_ID}' would leave the remaining "
f"traffic split at {100 - traffic_remaining}%."
)

@pytest.mark.usefixtures("get_endpoint_with_models_mock")
@pytest.mark.parametrize("sync", [True, False])
def test_undeploy_zero_traffic_model_without_new_traffic_split(
self, undeploy_model_mock, sync
):
"""
Attempting to undeploy model with zero traffic without providing
a new traffic split should not raise any errors.
"""

traffic_remaining = _TEST_TRAFFIC_SPLIT[_TEST_ID_3]

assert not traffic_remaining # Confirm there is zero traffic

test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
test_endpoint.undeploy(
deployed_model_id=_TEST_ID_3, sync=sync,
)

expected_new_traffic_split = copy.deepcopy(_TEST_TRAFFIC_SPLIT)
expected_new_traffic_split.pop(_TEST_ID_3)

undeploy_model_mock.assert_called_once_with(
endpoint=test_endpoint.resource_name,
deployed_model_id=_TEST_ID_3,
traffic_split=expected_new_traffic_split,
metadata=(),
)

def test_predict(self, get_endpoint_mock, predict_client_predict_mock):

test_endpoint = models.Endpoint(_TEST_ID)
Expand Down