Add test for sequence state after cancellation (#7167)

* Add test for sequence state after cancellation * Regroup infer calls * Remove unused variable
triton-inference-server · May 7, 2024 · d21685b · d21685b
1 parent 81313ed
commit d21685b
Show file tree

Hide file tree

Showing 5 changed files with 262 additions and 1 deletion.
diff --git a/qa/L0_request_cancellation/implicit_state_model/config.pbtxt b/qa/L0_request_cancellation/implicit_state_model/config.pbtxt
@@ -0,0 +1,77 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "pytorch"
+max_batch_size: 1
+
+input {
+    name: "DELAY_ITRS__0"
+    data_type: TYPE_INT64
+    dims: [ 1 ]
+}
+output {
+    name: "DUMMY_OUT__0"
+    data_type: TYPE_INT64
+    dims: [ 1 ]
+}
+
+sequence_batching {
+  max_sequence_idle_microseconds: 6000000
+  oldest { max_candidate_sequences: 1 }
+  control_input [
+    {
+      name: "SEQ_START__1"
+      control {
+        kind: CONTROL_SEQUENCE_START
+        fp32_false_true: [ 0, 1 ]
+      }
+    },
+    {
+      name: "SEQ_ID__2"
+      control {
+        kind: CONTROL_SEQUENCE_CORRID
+        data_type: TYPE_INT64
+      }
+    }
+  ]
+  state {
+    input_name: "SEQ_STATE_IN__3"
+    output_name: "SEQ_STATE_OUT__1"
+    data_type: TYPE_INT64
+    dims: 1
+    initial_state {
+      name: "initial_state"
+      data_type: TYPE_INT64
+      dims: 1
+      zero_data: true
+    }
+  }
+}
+
+instance_group {
+  kind: KIND_CPU
+  count: 1
+}
diff --git a/qa/L0_request_cancellation/implicit_state_model/gen_model.py b/qa/L0_request_cancellation/implicit_state_model/gen_model.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import torch
+
+
+class ImplicitStateModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, delay_itrs, seq_start, seq_id, seq_state_in):
+        # if not sequence start, verify sequence state match sequence id
+        if not seq_start and seq_id != seq_state_in:
+            print(
+                f"[MODEL ERROR] Invalid sequence state, expect {seq_id}, got {seq_state_in}"
+            )
+        # delay the execution
+        delay = 0
+        for i in range(int(delay_itrs)):
+            delay += i
+        # set sequence state, do not modify state unless sequence starting
+        if seq_start:
+            seq_state_out = seq_id
+        else:
+            seq_state_out = seq_state_in
+        dummy_out = seq_state_out
+        return dummy_out, seq_state_out
+
+
+if __name__ == "__main__":
+    torch.jit.save(torch.jit.script(ImplicitStateModel()), "model.pt")
diff --git a/qa/L0_request_cancellation/implicit_state_model/model.pt b/qa/L0_request_cancellation/implicit_state_model/model.pt
diff --git a/qa/L0_request_cancellation/implicit_state_test.py b/qa/L0_request_cancellation/implicit_state_test.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import time
+import unittest
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+
+
+class TestImplicitState(unittest.TestCase):
+    def _get_inputs(self, delay_itrs):
+        shape = [1, 1]
+        inputs = [grpcclient.InferInput("DELAY_ITRS__0", shape, "INT64")]
+        inputs[0].set_data_from_numpy(np.array([[delay_itrs]], np.int64))
+        return inputs
+
+    def _generate_streaming_callback_and_response_pair(self):
+        response = []  # [{"result": result, "error": error}, ...]
+
+        def callback(result, error):
+            response.append({"result": result, "error": error})
+
+        return callback, response
+
+    def _sequence_state_model_infer(self, num_reqs, seq_ids, delay_itrs, cancel_reqs):
+        model_name = "sequence_state"
+        callback, response = self._generate_streaming_callback_and_response_pair()
+        with grpcclient.InferenceServerClient("localhost:8001") as client:
+            client.start_stream(callback)
+            seq_start = True
+            for req_id in range(num_reqs):
+                for seq_id in seq_ids:
+                    client.async_stream_infer(
+                        model_name,
+                        self._get_inputs(delay_itrs),
+                        sequence_id=seq_id,
+                        sequence_start=seq_start,
+                    )
+                    time.sleep(0.1)
+                seq_start = False
+            client.stop_stream(cancel_requests=cancel_reqs)
+        return response
+
+    # Test timeout is reset for a sequence slot after its sequence is cancelled
+    def test_state_reset_after_cancel(self):
+        sequence_timeout = 6  # secs
+        # Start sequence 1 and cancel it
+        num_reqs = 10
+        response = self._sequence_state_model_infer(
+            num_reqs, seq_ids=[1], delay_itrs=5000000, cancel_reqs=True
+        )
+        self.assertLess(
+            len(response),
+            num_reqs,
+            "Precondition not met - sequence completed before cancellation",
+        )
+        # Wait for sequence 1 to timeout
+        time.sleep(sequence_timeout + 2)
+        # Start sequence 2 and 3
+        self._sequence_state_model_infer(
+            num_reqs=4, seq_ids=[2, 3], delay_itrs=0, cancel_reqs=False
+        )
+        # Check for any unexpected sequence state mixing
+        with open(os.environ["SERVER_LOG"]) as f:
+            server_log = f.read()
+        self.assertNotIn("[MODEL ERROR] Invalid sequence state", server_log)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_request_cancellation/test.sh b/qa/L0_request_cancellation/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -175,6 +175,37 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 
+#
+# Implicit state tests
+#
+rm -rf models && mkdir models
+mkdir -p models/sequence_state/1 && (cd models/sequence_state && \
+    cp ../../implicit_state_model/config.pbtxt . && \
+    cp ../../implicit_state_model/model.pt 1)
+
+TEST_LOG="implicit_state_test.log"
+SERVER_LOG="implicit_state_test.server.log"
+
+SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+SERVER_LOG=$SERVER_LOG python implicit_state_test.py > $TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Implicit State Tests Failed\n***"
+    cat $TEST_LOG
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
 if [ $RET -eq 0 ]; then
     echo -e "\n***\n*** Test Passed\n***"
 else