From f43cedd2b4fb8859282a5cabf5be83f4d61c4f96 Mon Sep 17 00:00:00 2001 From: Kris Hung Date: Tue, 18 Jun 2024 14:24:26 -0700 Subject: [PATCH 1/2] fix: Fix version for setuptools and grpcio-tools. Remove cudnn 8 installation (#7331) --- build.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/build.py b/build.py index 5f7200d2b6c..d2d2362e1f4 100755 --- a/build.py +++ b/build.py @@ -1082,9 +1082,9 @@ def create_dockerfile_linux( """ if "tensorrtllm" in backends: df += """ -# Remove TRT contents that are not needed in runtime -RUN apt-get update && apt-get install -y libcudnn8-dev && ldconfig +RUN ldconfig +# Remove contents that are not needed in runtime RUN ARCH="$(uname -i)" \\ && rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data \\ && rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python \\ @@ -1094,10 +1094,14 @@ def create_dockerfile_linux( RUN python3 -m pip install --upgrade pip \\ && pip3 install transformers -# Install TensorRT-LLM +# ldconfig for TRT-LLM RUN find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf RUN find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf +# Setuptools has breaking changes in version 70.0.0, so fix it to 69.5.1 +# The generated code in grpc_service_pb2_grpc.py depends on grpcio>=1.64.0, so fix it to 1.64.0 +RUN pip3 install setuptools==69.5.1 grpcio-tools==1.64.0 + ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH """ with open(os.path.join(ddir, dockerfile_name), "w") as dfile: From d5eb4676d2857f1fdc5dfd44b3f0384c11bbe184 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Thu, 20 Jun 2024 19:30:06 +0530 Subject: [PATCH 2/2] ci: Add INT64 Datatype Support for Shape Tensors in TensorRT Backend (#7329) --- qa/L0_model_config/test.sh | 2 +- qa/L0_perf_analyzer/test.sh | 4 +- qa/L0_perf_analyzer_capi/test.sh | 6 +- qa/L0_trt_shape_tensors/test.sh | 21 +- .../trt_shape_tensor_test.py | 1377 +++++++++-------- qa/common/gen_common.py | 4 +- qa/common/gen_qa_dyna_sequence_models.py | 94 +- qa/common/gen_qa_identity_models.py | 69 +- qa/common/gen_qa_sequence_models.py | 47 +- qa/common/infer_util.py | 59 +- qa/common/sequence_util.py | 55 +- 11 files changed, 1019 insertions(+), 719 deletions(-) diff --git a/qa/L0_model_config/test.sh b/qa/L0_model_config/test.sh index e36cbaedb3a..9220c4eafcf 100755 --- a/qa/L0_model_config/test.sh +++ b/qa/L0_model_config/test.sh @@ -88,7 +88,7 @@ for modelpath in \ autofill_noplatform/tensorrt/mixed_batch_hint_shape_values/1 \ autofill_noplatform_success/tensorrt/no_config_shape_tensor/1 ; do mkdir -p $modelpath - cp /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32/1/model.plan \ + cp /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32/1/model.plan \ $modelpath/. done diff --git a/qa/L0_perf_analyzer/test.sh b/qa/L0_perf_analyzer/test.sh index f2807824b97..49c7e72e481 100755 --- a/qa/L0_perf_analyzer/test.sh +++ b/qa/L0_perf_analyzer/test.sh @@ -93,7 +93,7 @@ cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphde cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_int32_int32_float32 $DATADIR/ # Copy shape tensor models -cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32 $DATADIR/ +cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32 $DATADIR/ # Copying ensemble including a sequential model cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/savedmodel_sequence_object $DATADIR @@ -564,7 +564,7 @@ for PROTOCOL in grpc http; do # Shape tensor I/O model (server needs the shape tensor on the CPU) for SHARED_MEMORY_TYPE in none system; do set +e - $PERF_ANALYZER -v -i $PROTOCOL -m plan_zero_1_float32 --input-data=$SHAPETENSORADTAFILE \ + $PERF_ANALYZER -v -i $PROTOCOL -m plan_zero_1_float32_int32 --input-data=$SHAPETENSORADTAFILE \ --shape DUMMY_INPUT0:4,4 -p2000 --shared-memory=$SHARED_MEMORY_TYPE -b 8 -s ${STABILITY_THRESHOLD} \ >$CLIENT_LOG 2>&1 if [ $? -ne 0 ]; then diff --git a/qa/L0_perf_analyzer_capi/test.sh b/qa/L0_perf_analyzer_capi/test.sh index f9fa3c078e2..d031e2cacf4 100755 --- a/qa/L0_perf_analyzer_capi/test.sh +++ b/qa/L0_perf_analyzer_capi/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -73,7 +73,7 @@ cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphde cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_int32_int32_float32 $DATADIR/ # Copy shape tensor models -cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32 $DATADIR/ +cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32 $DATADIR/ # Copying ensemble including a sequential model cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/savedmodel_sequence_object $DATADIR @@ -201,7 +201,7 @@ if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then fi # Shape tensor I/O model (server needs the shape tensor on the CPU) -$PERF_ANALYZER -v -m plan_zero_1_float32 --input-data=$SHAPETENSORADTAFILE \ +$PERF_ANALYZER -v -m plan_zero_1_float32_int32 --input-data=$SHAPETENSORADTAFILE \ --shape DUMMY_INPUT0:4,4 -p2000 -b 8 \ --service-kind=triton_c_api --model-repository=$DATADIR \ --triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \ diff --git a/qa/L0_trt_shape_tensors/test.sh b/qa/L0_trt_shape_tensors/test.sh index eed67d9dcb5..f08ed339b05 100755 --- a/qa/L0_trt_shape_tensors/test.sh +++ b/qa/L0_trt_shape_tensors/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -127,10 +127,13 @@ else fi # Prepare the config file for dynamic batching tests -CONFIG_FILE="models/plan_zero_1_float32/config.pbtxt" -sed -i "s/^max_batch_size:.*/max_batch_size: 8/" $CONFIG_FILE && \ -sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" $CONFIG_FILE && \ - echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> $CONFIG_FILE +for dtype in int32 int64; do + CONFIG_FILE="models/plan_zero_1_float32_${dtype}/config.pbtxt" + sed -i "s/^max_batch_size:.*/max_batch_size: 8/" "$CONFIG_FILE" + sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" "$CONFIG_FILE" + echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >>"$CONFIG_FILE" +done + for i in \ test_dynamic_different_shape_values \ test_dynamic_identical_shape_values; do @@ -202,9 +205,11 @@ for i in \ done # Prepare the config file for dynamic sequence batching tests -CONFIG_FILE="models/plan_dyna_sequence_float32/config.pbtxt" -sed -i "s/max_candidate_sequences:.*/max_candidate_sequences:4/" $CONFIG_FILE && \ -sed -i "s/max_queue_delay_microseconds:.*/max_queue_delay_microseconds:5000000/" $CONFIG_FILE +for dtype in int32 int64; do + CONFIG_FILE="models/plan_dyna_sequence_float32_${dtype}/config.pbtxt" + sed -i "s/max_candidate_sequences:.*/max_candidate_sequences:4/" "$CONFIG_FILE" + sed -i "s/max_queue_delay_microseconds:.*/max_queue_delay_microseconds:5000000/" "$CONFIG_FILE" +done export NO_BATCHING=0 diff --git a/qa/L0_trt_shape_tensors/trt_shape_tensor_test.py b/qa/L0_trt_shape_tensors/trt_shape_tensor_test.py index d3563dce9e1..551ee2f8c0d 100755 --- a/qa/L0_trt_shape_tensors/trt_shape_tensor_test.py +++ b/qa/L0_trt_shape_tensors/trt_shape_tensor_test.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -84,6 +84,7 @@ def check_response( shm_region_names=None, precreated_shm_regions=None, shm_suffix="", + shape_tensor_input_dtype=np.int32, ): try: # Add batch size to shape as full shape is expected @@ -104,6 +105,7 @@ def check_response( shm_suffix=shm_suffix, use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, batch_size=bs, + shape_tensor_input_dtype=shape_tensor_input_dtype, ) end_ms = int(round(time.time() * 1000)) @@ -222,80 +224,90 @@ def check_status(self, model_name, batch_exec, exec_cnt, infer_cnt): ) def test_static_batch(self): - iu.infer_shape_tensor( - self, - "plan", - np.float32, - [[32, 32]], - [[8, 4, 4]], - use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, - batch_size=8, - ) - iu.infer_shape_tensor( - self, - "plan", - np.float32, - [[4, 4]], - [[8, 32, 32]], - use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, - batch_size=8, - ) - iu.infer_shape_tensor( - self, - "plan", - np.float32, - [[4, 4]], - [[8, 4, 4]], - use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, - batch_size=8, - ) - - def test_nobatch(self): - iu.infer_shape_tensor( - self, - "plan_nobatch", - np.float32, - [[32, 32]], - [[4, 4]], - use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, - ) - iu.infer_shape_tensor( - self, - "plan_nobatch", - np.float32, - [[4, 4]], - [[32, 32]], - use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, - ) - iu.infer_shape_tensor( - self, - "plan_nobatch", - np.float32, - [[4, 4]], - [[4, 4]], - use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, - ) - - def test_wrong_shape_values(self): - over_shape_values = [[32, 33]] - try: + for shape_tensor_input_dtype in [np.int32, np.int64]: iu.infer_shape_tensor( self, "plan", np.float32, - over_shape_values, + [[32, 32]], [[8, 4, 4]], use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, batch_size=8, + shape_tensor_input_dtype=shape_tensor_input_dtype, ) - # InferenceServerException will be raised from different namespace, - # use dynamic type characteristic to catch both ex - except Exception as ex: - self.assertTrue( - "The shape value at index 2 is expected to be in range from 1 to 32, Got: 33" - in ex.message() + iu.infer_shape_tensor( + self, + "plan", + np.float32, + [[4, 4]], + [[8, 32, 32]], + use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, + batch_size=8, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + iu.infer_shape_tensor( + self, + "plan", + np.float32, + [[4, 4]], + [[8, 4, 4]], + use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, + batch_size=8, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + + def test_nobatch(self): + for shape_tensor_input_dtype in [np.int32, np.int64]: + iu.infer_shape_tensor( + self, + "plan_nobatch", + np.float32, + [[32, 32]], + [[4, 4]], + use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + iu.infer_shape_tensor( + self, + "plan_nobatch", + np.float32, + [[4, 4]], + [[32, 32]], + use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + iu.infer_shape_tensor( + self, + "plan_nobatch", + np.float32, + [[4, 4]], + [[4, 4]], + use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, + shape_tensor_input_dtype=shape_tensor_input_dtype, ) + def test_wrong_shape_values(self): + over_shape_values = [[32, 33]] + for shape_tensor_input_dtype in [np.int32, np.int64]: + try: + iu.infer_shape_tensor( + self, + "plan", + np.float32, + over_shape_values, + [[8, 4, 4]], + use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY, + batch_size=8, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + # InferenceServerException will be raised from different namespace, + # use dynamic type characteristic to catch both ex + except Exception as ex: + self.assertIn( + "The shape value at index 2 is expected to be in range from 1 to 32, Got: 33", + ex.message(), + ) + # Dynamic Batcher tests def test_dynamic_different_shape_values(self): # Send two requests with sum of static batch sizes == @@ -303,86 +315,96 @@ def test_dynamic_different_shape_values(self): # should cause the requests to not be batched. The first # response will come back immediately and the second # delayed by the max batch queue delay - try: - model_name = tu.get_zero_model_name("plan", 1, np.float32) - self.check_setup(model_name) - self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ) - - threads = [] - threads.append( - threading.Thread( - target=self.check_response, - args=(3, (6000, None)), - kwargs={ - "shape_values": [[2, 2]], - "dummy_input_shapes": [[16, 16]], - "shm_suffix": "{}".format(len(threads)), - }, + for shape_tensor_input_dtype in [np.int32, np.int64]: + try: + model_name = tu.get_zero_model_name("plan", 1, np.float32) + model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name + + self.check_setup(model_name) + self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ) + + threads = [] + threads.append( + threading.Thread( + target=self.check_response, + args=(3, (6000, None)), + kwargs={ + "shape_values": [[2, 2]], + "dummy_input_shapes": [[16, 16]], + "shm_suffix": "{}".format(len(threads)), + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) ) - ) - threads.append( - threading.Thread( - target=self.check_response, - args=(3, (_max_queue_delay_ms * 1.5, _max_queue_delay_ms)), - kwargs={ - "shape_values": [[4, 4]], - "dummy_input_shapes": [[16, 16]], - "shm_suffix": "{}".format(len(threads)), - }, + threads.append( + threading.Thread( + target=self.check_response, + args=(3, (_max_queue_delay_ms * 1.5, _max_queue_delay_ms)), + kwargs={ + "shape_values": [[4, 4]], + "dummy_input_shapes": [[16, 16]], + "shm_suffix": "{}".format(len(threads)), + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) ) - ) - threads[0].start() - time.sleep(1) - threads[1].start() - for t in threads: - t.join() - self.check_deferred_exception() - self.check_status(model_name, {3: 2}, 2, 6) - except Exception as ex: - self.assertTrue(False, "unexpected error {}".format(ex)) + threads[0].start() + time.sleep(1) + threads[1].start() + for t in threads: + t.join() + self.check_deferred_exception() + self.check_status(model_name, {3: 2}, 2, 6) + except Exception as ex: + self.assertTrue(False, "unexpected error {}".format(ex)) def test_dynamic_identical_shape_values(self): # Send two requests with sum of static batch sizes == # preferred size, but with identical shape values. This # should cause the requests to get batched. Both # responses should come back immediately. - try: - model_name = tu.get_zero_model_name("plan", 1, np.float32) - self.check_setup(model_name) - self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ) - - threads = [] - threads.append( - threading.Thread( - target=self.check_response, - args=(4, (6000, None)), - kwargs={ - "shape_values": [[4, 4]], - "dummy_input_shapes": [[16, 16]], - "shm_suffix": "{}".format(len(threads)), - }, + for shape_tensor_input_dtype in [np.int32, np.int64]: + try: + model_name = tu.get_zero_model_name("plan", 1, np.float32) + model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name + + self.check_setup(model_name) + self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ) + + threads = [] + threads.append( + threading.Thread( + target=self.check_response, + args=(4, (6000, None)), + kwargs={ + "shape_values": [[4, 4]], + "dummy_input_shapes": [[16, 16]], + "shm_suffix": "{}".format(len(threads)), + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) ) - ) - threads.append( - threading.Thread( - target=self.check_response, - args=(2, (6000, None)), - kwargs={ - "shape_values": [[4, 4]], - "dummy_input_shapes": [[16, 16]], - "shm_suffix": "{}".format(len(threads)), - }, + threads.append( + threading.Thread( + target=self.check_response, + args=(2, (6000, None)), + kwargs={ + "shape_values": [[4, 4]], + "dummy_input_shapes": [[16, 16]], + "shm_suffix": "{}".format(len(threads)), + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) ) - ) - threads[0].start() - time.sleep(1) - threads[1].start() - for t in threads: - t.join() - self.check_deferred_exception() - self.check_status(model_name, {6: 1}, 1, 6) - except Exception as ex: - self.assertTrue(False, "unexpected error {}".format(ex)) + threads[0].start() + time.sleep(1) + threads[1].start() + for t in threads: + t.join() + self.check_deferred_exception() + self.check_status(model_name, {6: 1}, 1, 6) + except Exception as ex: + self.assertTrue(False, "unexpected error {}".format(ex)) class SequenceBatcherShapeTensorTest(su.SequenceBatcherTestUtil): @@ -401,124 +423,152 @@ def test_sequence_identical_shape_values(self): # inferences. self.clear_deferred_exceptions() dtype = np.float32 - try: - model_name = tu.get_sequence_model_name("plan", dtype) - self.check_setup(model_name) - - # Need scheduler to wait for queue to contain all - # inferences for both sequences. - self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ) - self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12) - self.assertTrue("TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ) - self.assertEqual(int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0) - precreated_shm0_handles = self.precreate_register_shape_tensor_regions( - ((2, 1), (4, 2), (8, 3)), dtype, 0 - ) - precreated_shm1_handles = self.precreate_register_shape_tensor_regions( - ((2, 11), (4, 12), (8, 13)), dtype, 1 - ) - precreated_shm2_handles = self.precreate_register_shape_tensor_regions( - ((2, 111), (4, 112), (8, 113)), dtype, 2 - ) - precreated_shm3_handles = self.precreate_register_shape_tensor_regions( - ((2, 1111), (4, 1112), (8, 1113)), dtype, 3 - ) - threads = [] - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - 1001, - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 2, 1, None), - (None, 4, 2, None), - ("end", 8, 3, None), + for shape_tensor_input_dtype in [np.int32, np.int64]: + try: + model_name = tu.get_sequence_model_name("plan", dtype) + model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name + self.check_setup(model_name) + + # Need scheduler to wait for queue to contain all + # inferences for both sequences. + self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ) + self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12) + self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ) + self.assertEqual( + int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0 + ) + precreated_shm0_handles = self.precreate_register_shape_tensor_regions( + value_list=((2, 1), (4, 2), (8, 3)), + dtype=dtype, + i=0, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + precreated_shm1_handles = self.precreate_register_shape_tensor_regions( + value_list=((2, 11), (4, 12), (8, 13)), + dtype=dtype, + i=1, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + precreated_shm2_handles = self.precreate_register_shape_tensor_regions( + value_list=((2, 111), (4, 112), (8, 113)), + dtype=dtype, + i=2, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + precreated_shm3_handles = self.precreate_register_shape_tensor_regions( + value_list=((2, 1111), (4, 1112), (8, 1113)), + dtype=dtype, + i=3, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + threads = [] + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + 1001, + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 2, 1, None), + (None, 4, 2, None), + ("end", 8, 3, None), + ), + self.get_expected_result(6, 3, "end"), + precreated_shm0_handles, ), - self.get_expected_result(6, 3, "end"), - precreated_shm0_handles, - ), - kwargs={"sequence_name": "{}".format(self._testMethodName)}, + kwargs={ + "sequence_name": "{}".format(self._testMethodName), + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) ) - ) - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - 1002, - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 2, 11, None), - (None, 4, 12, None), - ("end", 8, 13, None), + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + 1002, + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 2, 11, None), + (None, 4, 12, None), + ("end", 8, 13, None), + ), + self.get_expected_result(36, 13, "end"), + precreated_shm1_handles, ), - self.get_expected_result(36, 13, "end"), - precreated_shm1_handles, - ), - kwargs={"sequence_name": "{}".format(self._testMethodName)}, + kwargs={ + "sequence_name": "{}".format(self._testMethodName), + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) ) - ) - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - 1003, - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 2, 111, None), - (None, 4, 112, None), - ("end", 8, 113, None), + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + 1003, + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 2, 111, None), + (None, 4, 112, None), + ("end", 8, 113, None), + ), + self.get_expected_result(336, 113, "end"), + precreated_shm2_handles, ), - self.get_expected_result(336, 113, "end"), - precreated_shm2_handles, - ), - kwargs={"sequence_name": "{}".format(self._testMethodName)}, + kwargs={ + "sequence_name": "{}".format(self._testMethodName), + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) ) - ) - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - 1004, - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 2, 1111, None), - (None, 4, 1112, None), - ("end", 8, 1113, None), + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + 1004, + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 2, 1111, None), + (None, 4, 1112, None), + ("end", 8, 1113, None), + ), + self.get_expected_result(3336, 1113, "end"), + precreated_shm3_handles, ), - self.get_expected_result(3336, 1113, "end"), - precreated_shm3_handles, - ), - kwargs={"sequence_name": "{}".format(self._testMethodName)}, + kwargs={ + "sequence_name": "{}".format(self._testMethodName), + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) ) - ) - for t in threads: - t.start() - for t in threads: - t.join() - self.check_deferred_exception() - self.check_status(model_name, {4: 3}, 3, 12) - except Exception as ex: - self.assertTrue(False, "unexpected error {}".format(ex)) - finally: - if TEST_SYSTEM_SHARED_MEMORY: - self.cleanup_shm_regions(precreated_shm0_handles) - self.cleanup_shm_regions(precreated_shm1_handles) - self.cleanup_shm_regions(precreated_shm2_handles) - self.cleanup_shm_regions(precreated_shm3_handles) + for t in threads: + t.start() + for t in threads: + t.join() + self.check_deferred_exception() + self.check_status(model_name, {4: 3}, 3, 12) + except Exception as ex: + self.assertTrue(False, "unexpected error {}".format(ex)) + finally: + if TEST_SYSTEM_SHARED_MEMORY: + self.cleanup_shm_regions(precreated_shm0_handles) + self.cleanup_shm_regions(precreated_shm1_handles) + self.cleanup_shm_regions(precreated_shm2_handles) + self.cleanup_shm_regions(precreated_shm3_handles) def test_sequence_different_shape_values(self): # Test model instances together are configured with @@ -530,127 +580,155 @@ def test_sequence_different_shape_values(self): self.clear_deferred_exceptions() dtype = np.float32 - precreated_shm0_handles = self.precreate_register_shape_tensor_regions( - ((1, 1), (1, 2), (1, 3)), dtype, 0 - ) - precreated_shm1_handles = self.precreate_register_shape_tensor_regions( - ((32, 11), (32, 12), (32, 13)), dtype, 1 - ) - precreated_shm2_handles = self.precreate_register_shape_tensor_regions( - ((16, 111), (16, 112), (16, 113)), dtype, 2 - ) - precreated_shm3_handles = self.precreate_register_shape_tensor_regions( - ((1, 1111), (1, 1112), (1, 1113)), dtype, 3 - ) - try: - model_name = tu.get_sequence_model_name("plan", dtype) - self.check_setup(model_name) - - # Need scheduler to wait for queue to contain all - # inferences for both sequences. - self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ) - self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12) - self.assertTrue("TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ) - self.assertEqual(int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0) - - threads = [] - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - 1001, - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 1, 1, None), - (None, 1, 2, None), - ("end", 1, 3, None), + for shape_tensor_input_dtype in [np.int32, np.int64]: + precreated_shm0_handles = self.precreate_register_shape_tensor_regions( + value_list=((1, 1), (1, 2), (1, 3)), + dtype=dtype, + i=0, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + precreated_shm1_handles = self.precreate_register_shape_tensor_regions( + value_list=((32, 11), (32, 12), (32, 13)), + dtype=dtype, + i=1, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + precreated_shm2_handles = self.precreate_register_shape_tensor_regions( + value_list=((16, 111), (16, 112), (16, 113)), + dtype=dtype, + i=2, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + precreated_shm3_handles = self.precreate_register_shape_tensor_regions( + value_list=((1, 1111), (1, 1112), (1, 1113)), + dtype=dtype, + i=3, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + try: + model_name = tu.get_sequence_model_name("plan", dtype) + model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name + self.check_setup(model_name) + + # Need scheduler to wait for queue to contain all + # inferences for both sequences. + self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ) + self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12) + self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ) + self.assertEqual( + int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0 + ) + + threads = [] + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + 1001, + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 1, 1, None), + (None, 1, 2, None), + ("end", 1, 3, None), + ), + self.get_expected_result(6, 3, "end"), + precreated_shm0_handles, ), - self.get_expected_result(6, 3, "end"), - precreated_shm0_handles, - ), - kwargs={"sequence_name": "{}".format(self._testMethodName)}, + kwargs={ + "sequence_name": "{}".format(self._testMethodName), + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) ) - ) - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - 1002, - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 32, 11, None), - (None, 32, 12, None), - ("end", 32, 13, None), + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + 1002, + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 32, 11, None), + (None, 32, 12, None), + ("end", 32, 13, None), + ), + self.get_expected_result(36, 13, "end"), + precreated_shm1_handles, ), - self.get_expected_result(36, 13, "end"), - precreated_shm1_handles, - ), - kwargs={"sequence_name": "{}".format(self._testMethodName)}, + kwargs={ + "sequence_name": "{}".format(self._testMethodName), + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) ) - ) - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - 1003, - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 16, 111, None), - (None, 16, 112, None), - ("end", 16, 113, None), + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + 1003, + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 16, 111, None), + (None, 16, 112, None), + ("end", 16, 113, None), + ), + self.get_expected_result(336, 113, "end"), + precreated_shm2_handles, ), - self.get_expected_result(336, 113, "end"), - precreated_shm2_handles, - ), - kwargs={"sequence_name": "{}".format(self._testMethodName)}, + kwargs={ + "sequence_name": "{}".format(self._testMethodName), + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) ) - ) - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - 1004, - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 1, 1111, None), - (None, 1, 1112, None), - ("end", 1, 1113, None), + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + 1004, + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 1, 1111, None), + (None, 1, 1112, None), + ("end", 1, 1113, None), + ), + self.get_expected_result(3336, 1113, "end"), + precreated_shm3_handles, ), - self.get_expected_result(3336, 1113, "end"), - precreated_shm3_handles, - ), - kwargs={"sequence_name": "{}".format(self._testMethodName)}, + kwargs={ + "sequence_name": "{}".format(self._testMethodName), + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) ) - ) - for t in threads: - t.start() - time.sleep(1) - for t in threads: - t.join() + for t in threads: + t.start() + time.sleep(1) + for t in threads: + t.join() - self.check_deferred_exception() - self.check_status(model_name, {4: 3, 3: 6}, 9, 12) - except Exception as ex: - self.assertTrue(False, "unexpected error {}".format(ex)) - finally: - if TEST_SYSTEM_SHARED_MEMORY: - self.cleanup_shm_regions(precreated_shm0_handles) - self.cleanup_shm_regions(precreated_shm1_handles) - self.cleanup_shm_regions(precreated_shm2_handles) - self.cleanup_shm_regions(precreated_shm3_handles) + self.check_deferred_exception() + self.check_status(model_name, {4: 3, 3: 6}, 9, 12) + except Exception as ex: + self.assertTrue(False, "unexpected error {}".format(ex)) + finally: + if TEST_SYSTEM_SHARED_MEMORY: + self.cleanup_shm_regions(precreated_shm0_handles) + self.cleanup_shm_regions(precreated_shm1_handles) + self.cleanup_shm_regions(precreated_shm2_handles) + self.cleanup_shm_regions(precreated_shm3_handles) class DynaSequenceBatcherTest(su.SequenceBatcherTestUtil): @@ -667,300 +745,355 @@ def _multi_sequence_different_shape_impl(self, sleep_secs): self.clear_deferred_exceptions() dtype = np.float32 - precreated_shm0_handles = self.precreate_register_dynaseq_shape_tensor_regions( - ((1, 1), (12, 2), (2, 3)), dtype, 0 - ) - precreated_shm1_handles = self.precreate_register_dynaseq_shape_tensor_regions( - ((3, 11), (4, 12), (5, 13)), dtype, 1 - ) - precreated_shm2_handles = self.precreate_register_dynaseq_shape_tensor_regions( - ((6, 111), (7, 112), (8, 113)), dtype, 2 - ) - precreated_shm3_handles = self.precreate_register_dynaseq_shape_tensor_regions( - ((9, 1111), (10, 1112), (11, 1113)), dtype, 3 - ) - - try: - model_name = tu.get_dyna_sequence_model_name("plan", dtype) - self.check_setup(model_name) - self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ) - self.assertFalse("TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ) - - corrids = [1001, 1002, 1003, 1004] - threads = [] - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - corrids[0], - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 1, 1, None), - (None, 12, 2, None), - ("end", 2, 3, None), - ), - self.get_expected_result(4 + corrids[0], corrids[0], 3, "end"), - precreated_shm0_handles, - ), - kwargs={ - "sequence_name": "{}_{}".format( - self._testMethodName, corrids[0] - ), - "using_dynamic_batcher": True, - }, + for shape_tensor_input_dtype in [np.int32, np.int64]: + precreated_shm0_handles = ( + self.precreate_register_dynaseq_shape_tensor_regions( + value_list=((1, 1), (12, 2), (2, 3)), + dtype=dtype, + i=0, + shape_tensor_input_dtype=shape_tensor_input_dtype, ) ) - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - corrids[1], - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 3, 11, None), - (None, 4, 12, None), - ("end", 5, 13, None), - ), - self.get_expected_result( - 36 + corrids[1], corrids[1], 13, "end" - ), - precreated_shm1_handles, - ), - kwargs={ - "sequence_name": "{}_{}".format( - self._testMethodName, corrids[1] - ), - "using_dynamic_batcher": True, - }, + precreated_shm1_handles = ( + self.precreate_register_dynaseq_shape_tensor_regions( + value_list=((3, 11), (4, 12), (5, 13)), + dtype=dtype, + i=1, + shape_tensor_input_dtype=shape_tensor_input_dtype, ) ) - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - corrids[2], - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 6, 111, None), - (None, 7, 112, None), - ("end", 8, 113, None), - ), - self.get_expected_result( - 336 + corrids[2], corrids[2], 113, "end" - ), - precreated_shm2_handles, - ), - kwargs={ - "sequence_name": "{}_{}".format( - self._testMethodName, corrids[2] - ), - "using_dynamic_batcher": True, - }, + precreated_shm2_handles = ( + self.precreate_register_dynaseq_shape_tensor_regions( + value_list=((6, 111), (7, 112), (8, 113)), + dtype=dtype, + i=2, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + ) + precreated_shm3_handles = ( + self.precreate_register_dynaseq_shape_tensor_regions( + value_list=((9, 1111), (10, 1112), (11, 1113)), + dtype=dtype, + i=3, + shape_tensor_input_dtype=shape_tensor_input_dtype, ) ) - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - corrids[3], - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 9, 1111, None), - (None, 10, 1112, None), - ("end", 11, 1113, None), + + try: + model_name = tu.get_dyna_sequence_model_name("plan", dtype) + model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name + self.check_setup(model_name) + self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ) + self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ) + + corrids = [1001, 1002, 1003, 1004] + threads = [] + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + corrids[0], + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 1, 1, None), + (None, 12, 2, None), + ("end", 2, 3, None), + ), + self.get_expected_result( + 4 + corrids[0], corrids[0], 3, "end" + ), + precreated_shm0_handles, ), - self.get_expected_result( - 3336 + corrids[3], corrids[3], 1113, "end" + kwargs={ + "sequence_name": "{}_{}".format( + self._testMethodName, corrids[0] + ), + "using_dynamic_batcher": True, + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) + ) + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + corrids[1], + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 3, 11, None), + (None, 4, 12, None), + ("end", 5, 13, None), + ), + self.get_expected_result( + 36 + corrids[1], corrids[1], 13, "end" + ), + precreated_shm1_handles, ), - precreated_shm3_handles, - ), - kwargs={ - "sequence_name": "{}_{}".format( - self._testMethodName, corrids[3] + kwargs={ + "sequence_name": "{}_{}".format( + self._testMethodName, corrids[1] + ), + "using_dynamic_batcher": True, + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) + ) + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + corrids[2], + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 6, 111, None), + (None, 7, 112, None), + ("end", 8, 113, None), + ), + self.get_expected_result( + 336 + corrids[2], corrids[2], 113, "end" + ), + precreated_shm2_handles, ), - "using_dynamic_batcher": True, - }, + kwargs={ + "sequence_name": "{}_{}".format( + self._testMethodName, corrids[2] + ), + "using_dynamic_batcher": True, + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) + ) + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + corrids[3], + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 9, 1111, None), + (None, 10, 1112, None), + ("end", 11, 1113, None), + ), + self.get_expected_result( + 3336 + corrids[3], corrids[3], 1113, "end" + ), + precreated_shm3_handles, + ), + kwargs={ + "sequence_name": "{}_{}".format( + self._testMethodName, corrids[3] + ), + "using_dynamic_batcher": True, + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) ) - ) - for t in threads: - t.start() - if sleep_secs > 0: - time.sleep(sleep_secs) - for t in threads: - t.join() - self.check_deferred_exception() - self.check_status(model_name, {1: 12}, 12, 12) - except Exception as ex: - self.assertTrue(False, "unexpected error {}".format(ex)) - finally: - if TEST_SYSTEM_SHARED_MEMORY: - self.cleanup_shm_regions(precreated_shm0_handles) - self.cleanup_shm_regions(precreated_shm1_handles) - self.cleanup_shm_regions(precreated_shm2_handles) - self.cleanup_shm_regions(precreated_shm3_handles) + for t in threads: + t.start() + if sleep_secs > 0: + time.sleep(sleep_secs) + for t in threads: + t.join() + self.check_deferred_exception() + self.check_status(model_name, {1: 12}, 12, 12) + except Exception as ex: + self.assertTrue(False, "unexpected error {}".format(ex)) + finally: + if TEST_SYSTEM_SHARED_MEMORY: + self.cleanup_shm_regions(precreated_shm0_handles) + self.cleanup_shm_regions(precreated_shm1_handles) + self.cleanup_shm_regions(precreated_shm2_handles) + self.cleanup_shm_regions(precreated_shm3_handles) def _multi_sequence_identical_shape_impl(self, sleep_secs): self.clear_deferred_exceptions() dtype = np.float32 - precreated_shm0_handles = self.precreate_register_dynaseq_shape_tensor_regions( - ((2, 1), (4, 2), (8, 3)), dtype, 0 - ) - precreated_shm1_handles = self.precreate_register_dynaseq_shape_tensor_regions( - ((2, 11), (4, 12), (8, 13)), dtype, 1 - ) - precreated_shm2_handles = self.precreate_register_dynaseq_shape_tensor_regions( - ((2, 111), (4, 112), (8, 113)), dtype, 2 - ) - precreated_shm3_handles = self.precreate_register_dynaseq_shape_tensor_regions( - ((2, 1111), (4, 1112), (8, 1113)), dtype, 3 - ) - - try: - model_name = tu.get_dyna_sequence_model_name("plan", dtype) - - self.check_setup(model_name) - self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ) - self.assertFalse("TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ) - - corrids = [1001, 1002, 1003, 1004] - threads = [] - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - corrids[0], - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 2, 1, None), - (None, 4, 2, None), - ("end", 8, 3, None), - ), - self.get_expected_result(4 + corrids[0], corrids[0], 3, "end"), - precreated_shm0_handles, - ), - kwargs={ - "sequence_name": "{}_{}".format( - self._testMethodName, corrids[0] - ), - "using_dynamic_batcher": True, - }, + for shape_tensor_input_dtype in [np.int32, np.int64]: + precreated_shm0_handles = ( + self.precreate_register_dynaseq_shape_tensor_regions( + value_list=((2, 1), (4, 2), (8, 3)), + dtype=dtype, + i=0, + shape_tensor_input_dtype=shape_tensor_input_dtype, ) ) - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - corrids[1], - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 2, 11, None), - (None, 4, 12, None), - ("end", 8, 13, None), - ), - self.get_expected_result( - 36 + corrids[1], corrids[1], 13, "end" - ), - precreated_shm1_handles, - ), - kwargs={ - "sequence_name": "{}_{}".format( - self._testMethodName, corrids[1] - ), - "using_dynamic_batcher": True, - }, + precreated_shm1_handles = ( + self.precreate_register_dynaseq_shape_tensor_regions( + value_list=((2, 11), (4, 12), (8, 13)), + dtype=dtype, + i=1, + shape_tensor_input_dtype=shape_tensor_input_dtype, ) ) - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - corrids[2], - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 2, 111, None), - (None, 4, 112, None), - ("end", 8, 113, None), - ), - self.get_expected_result( - 336 + corrids[2], corrids[2], 113, "end" - ), - precreated_shm2_handles, - ), - kwargs={ - "sequence_name": "{}_{}".format( - self._testMethodName, corrids[2] - ), - "using_dynamic_batcher": True, - }, + precreated_shm2_handles = ( + self.precreate_register_dynaseq_shape_tensor_regions( + value_list=((2, 111), (4, 112), (8, 113)), + dtype=dtype, + i=2, + shape_tensor_input_dtype=shape_tensor_input_dtype, ) ) - threads.append( - threading.Thread( - target=self.check_sequence_shape_tensor_io, - args=( - model_name, - dtype, - corrids[3], - (None, None), - # (flag_str, shape_value, value, pre_delay_ms) - ( - ("start", 2, 1111, None), - (None, 4, 1112, None), - ("end", 8, 1113, None), + precreated_shm3_handles = ( + self.precreate_register_dynaseq_shape_tensor_regions( + value_list=((2, 1111), (4, 1112), (8, 1113)), + dtype=dtype, + i=3, + shape_tensor_input_dtype=shape_tensor_input_dtype, + ) + ) + + try: + model_name = tu.get_dyna_sequence_model_name("plan", dtype) + model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name + self.check_setup(model_name) + self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ) + self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ) + + corrids = [1001, 1002, 1003, 1004] + threads = [] + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + corrids[0], + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 2, 1, None), + (None, 4, 2, None), + ("end", 8, 3, None), + ), + self.get_expected_result( + 4 + corrids[0], corrids[0], 3, "end" + ), + precreated_shm0_handles, ), - self.get_expected_result( - 3336 + corrids[3], corrids[3], 1113, "end" + kwargs={ + "sequence_name": "{}_{}".format( + self._testMethodName, corrids[0] + ), + "using_dynamic_batcher": True, + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) + ) + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + corrids[1], + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 2, 11, None), + (None, 4, 12, None), + ("end", 8, 13, None), + ), + self.get_expected_result( + 36 + corrids[1], corrids[1], 13, "end" + ), + precreated_shm1_handles, ), - precreated_shm3_handles, - ), - kwargs={ - "sequence_name": "{}_{}".format( - self._testMethodName, corrids[3] + kwargs={ + "sequence_name": "{}_{}".format( + self._testMethodName, corrids[1] + ), + "using_dynamic_batcher": True, + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) + ) + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + corrids[2], + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 2, 111, None), + (None, 4, 112, None), + ("end", 8, 113, None), + ), + self.get_expected_result( + 336 + corrids[2], corrids[2], 113, "end" + ), + precreated_shm2_handles, ), - "using_dynamic_batcher": True, - }, + kwargs={ + "sequence_name": "{}_{}".format( + self._testMethodName, corrids[2] + ), + "using_dynamic_batcher": True, + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) + ) + threads.append( + threading.Thread( + target=self.check_sequence_shape_tensor_io, + args=( + model_name, + dtype, + corrids[3], + (None, None), + # (flag_str, shape_value, value, pre_delay_ms) + ( + ("start", 2, 1111, None), + (None, 4, 1112, None), + ("end", 8, 1113, None), + ), + self.get_expected_result( + 3336 + corrids[3], corrids[3], 1113, "end" + ), + precreated_shm3_handles, + ), + kwargs={ + "sequence_name": "{}_{}".format( + self._testMethodName, corrids[3] + ), + "using_dynamic_batcher": True, + "shape_tensor_input_dtype": shape_tensor_input_dtype, + }, + ) ) - ) - for t in threads: - t.start() - if sleep_secs > 0: - time.sleep(sleep_secs) - for t in threads: - t.join() - self.check_deferred_exception() - self.check_status(model_name, {4: 3}, 3, 12) - except Exception as ex: - self.assertTrue(False, "unexpected error {}".format(ex)) - finally: - if TEST_SYSTEM_SHARED_MEMORY: - self.cleanup_shm_regions(precreated_shm0_handles) - self.cleanup_shm_regions(precreated_shm1_handles) - self.cleanup_shm_regions(precreated_shm2_handles) - self.cleanup_shm_regions(precreated_shm3_handles) + for t in threads: + t.start() + if sleep_secs > 0: + time.sleep(sleep_secs) + for t in threads: + t.join() + self.check_deferred_exception() + self.check_status(model_name, {4: 3}, 3, 12) + except Exception as ex: + self.assertTrue(False, "unexpected error {}".format(ex)) + finally: + if TEST_SYSTEM_SHARED_MEMORY: + self.cleanup_shm_regions(precreated_shm0_handles) + self.cleanup_shm_regions(precreated_shm1_handles) + self.cleanup_shm_regions(precreated_shm2_handles) + self.cleanup_shm_regions(precreated_shm3_handles) def test_dynaseq_identical_shape_values_series(self): # Send four sequences with identical shape values in series diff --git a/qa/common/gen_common.py b/qa/common/gen_common.py index 5bb751f3c8a..417ad9477ae 100644 --- a/qa/common/gen_common.py +++ b/qa/common/gen_common.py @@ -1,4 +1,4 @@ -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -101,6 +101,8 @@ def np_to_trt_dtype(np_dtype): return trt.int8 elif np_dtype == np.int32: return trt.int32 + elif np_dtype == np.int64: + return trt.int64 elif np_dtype == np.uint8: return trt.uint8 elif np_dtype == np.float16: diff --git a/qa/common/gen_qa_dyna_sequence_models.py b/qa/common/gen_qa_dyna_sequence_models.py index 89d20df2e3d..bcb5a3a2b44 100755 --- a/qa/common/gen_qa_dyna_sequence_models.py +++ b/qa/common/gen_qa_dyna_sequence_models.py @@ -291,9 +291,11 @@ def create_tf_modelconfig( model_name, "tensorflow_savedmodel" if create_savedmodel else "tensorflow_graphdef", max_batch, - "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}" - if max_batch > 0 - else "", + ( + "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}" + if max_batch > 0 + else "" + ), "fp32" if dtype == np.float32 else "int32", "fp32" if dtype == np.float32 else "int32", "fp32" if dtype == np.float32 else "int32", @@ -312,7 +314,7 @@ def create_tf_modelconfig( def create_plan_shape_tensor_modelfile( - models_dir, model_version, max_batch, dtype, shape + models_dir, model_version, max_batch, dtype, shape, shape_tensor_input_dtype ): # Note that resize layer does not support int tensors. # The model takes three inputs (INPUT, DUMMY_INPUT and SHAPE_INPUT) @@ -325,6 +327,7 @@ def create_plan_shape_tensor_modelfile( # SHAPE_OUTPUT : The shape values of resized output trt_dtype = np_to_trt_dtype(dtype) + trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype) trt_memory_format = trt.TensorFormat.LINEAR TRT_LOGGER = trt.Logger(trt.Logger.INFO) @@ -336,7 +339,7 @@ def create_plan_shape_tensor_modelfile( if max_batch != 0: in0 = network.add_input("INPUT", trt.int32, [-1] + dummy_shape) dummy_in0 = network.add_input("DUMMY_INPUT", trt_dtype, [-1] + dummy_shape) - shape_in0 = network.add_input("SHAPE_INPUT", trt.int32, [1 + len(shape)]) + shape_in0 = network.add_input("SHAPE_INPUT", trt_shape_dtype, [1 + len(shape)]) start0 = network.add_input("START", trt.int32, [-1] + unit_shape) end0 = network.add_input("END", trt.int32, [-1] + unit_shape) ready0 = network.add_input("READY", trt.int32, [-1] + unit_shape) @@ -344,7 +347,7 @@ def create_plan_shape_tensor_modelfile( else: in0 = network.add_input("INPUT", trt.int32, dummy_shape) dummy_in0 = network.add_input("DUMMY_INPUT", trt_dtype, dummy_shape) - shape_in0 = network.add_input("SHAPE_INPUT", trt.int32, [len(shape)]) + shape_in0 = network.add_input("SHAPE_INPUT", trt_shape_dtype, [len(shape)]) start0 = network.add_input("START", trt.int32, unit_shape) end0 = network.add_input("END", trt.int32, unit_shape) ready0 = network.add_input("READY", trt.int32, unit_shape) @@ -453,6 +456,7 @@ def create_plan_shape_tensor_modelfile( model_name = tu.get_dyna_sequence_model_name( "plan_nobatch" if max_batch == 0 else "plan", dtype ) + model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name model_version_dir = models_dir + "/" + model_name + "/" + str(model_version) try: @@ -713,13 +717,17 @@ def create_plan_models(models_dir, model_version, max_batch, dtype, shape): create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape) -def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape): +def create_plan_modelconfig( + models_dir, model_version, max_batch, dtype, shape, shape_tensor_input_dtype=None +): if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape): return model_name = tu.get_dyna_sequence_model_name( "plan_nobatch" if max_batch == 0 else "plan", dtype ) + if shape_tensor_input_dtype: + model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name config_dir = models_dir + "/" + model_name if FLAGS.tensorrt_shape_io: @@ -787,7 +795,7 @@ def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape): input [ {{ name: "SHAPE_INPUT" - data_type: TYPE_INT32 + data_type: {} dims: [ {} ] is_shape_tensor: true }} @@ -822,15 +830,18 @@ def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape): """.format( model_name, max_batch, - "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}" - if max_batch > 0 - else "", + ( + "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}" + if max_batch > 0 + else "" + ), "int32", "int32", "int32", tu.shape_to_dims_str(shape), np_to_model_dtype(dtype), tu.shape_to_dims_str(shape), + np_to_model_dtype(shape_tensor_input_dtype), shape_tensor_dim, tu.shape_to_dims_str(shape), np_to_model_dtype(dtype), @@ -907,9 +918,11 @@ def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape): """.format( model_name, max_batch, - "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}" - if max_batch > 0 - else "", + ( + "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}" + if max_batch > 0 + else "" + ), "int32" if dtype == np.int32 else "fp32", "int32" if dtype == np.int32 else "fp32", "int32" if dtype == np.int32 else "fp32", @@ -1097,9 +1110,11 @@ def create_onnx_modelconfig(models_dir, model_version, max_batch, dtype, shape): """.format( model_name, max_batch, - "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}" - if max_batch > 0 - else "", + ( + "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}" + if max_batch > 0 + else "" + ), np_to_model_dtype(dtype), tu.shape_to_dims_str(shape), np_to_model_dtype(dtype), @@ -1237,9 +1252,11 @@ def create_libtorch_modelconfig(models_dir, model_version, max_batch, dtype, sha """.format( model_name, max_batch, - "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}" - if max_batch > 0 - else "", + ( + "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}" + if max_batch > 0 + else "" + ), "int32" if dtype == np.int32 else "fp32", "int32" if dtype == np.int32 else "fp32", "int32" if dtype == np.int32 else "fp32", @@ -1379,9 +1396,11 @@ def create_openvino_modelconfig(models_dir, model_version, max_batch, dtype, sha """.format( model_name, max_batch, - "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}" - if max_batch > 0 - else "", + ( + "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}" + if max_batch > 0 + else "" + ), "int32" if dtype == np.int32 else "fp32", "int32" if dtype == np.int32 else "fp32", "int32" if dtype == np.int32 else "fp32", @@ -1399,14 +1418,24 @@ def create_openvino_modelconfig(models_dir, model_version, max_batch, dtype, sha cfile.write(config) -def create_shape_tensor_models(models_dir, dtype, shape, no_batch=True): +def create_shape_tensor_models( + models_dir, dtype, shape, shape_tensor_input_dtype, no_batch=True +): model_version = 1 - create_plan_modelconfig(models_dir, model_version, 8, dtype, shape) - create_plan_shape_tensor_modelfile(models_dir, model_version, 8, dtype, shape) + create_plan_modelconfig( + models_dir, model_version, 8, dtype, shape, shape_tensor_input_dtype + ) + create_plan_shape_tensor_modelfile( + models_dir, model_version, 8, dtype, shape, shape_tensor_input_dtype + ) if no_batch: - create_plan_modelconfig(models_dir, model_version, 0, dtype, shape) - create_plan_shape_tensor_modelfile(models_dir, model_version, 0, dtype, shape) + create_plan_modelconfig( + models_dir, model_version, 0, dtype, shape, shape_tensor_input_dtype + ) + create_plan_shape_tensor_modelfile( + models_dir, model_version, 0, dtype, shape, shape_tensor_input_dtype + ) def create_models(models_dir, dtype, shape, no_batch=True): @@ -1543,6 +1572,15 @@ def create_models(models_dir, dtype, shape, no_batch=True): [ -1, ], + np.int32, + ) + create_shape_tensor_models( + FLAGS.models_dir, + np.float32, + [ + -1, + ], + np.int64, ) else: # Tests with models that accept fixed-shape input/output tensors diff --git a/qa/common/gen_qa_identity_models.py b/qa/common/gen_qa_identity_models.py index 277c5536ba4..7b7066dbb9f 100755 --- a/qa/common/gen_qa_identity_models.py +++ b/qa/common/gen_qa_identity_models.py @@ -762,7 +762,14 @@ def create_plan_dynamic_rf_modelfile( def create_plan_shape_tensor_modelfile( - models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size + models_dir, + model_version, + io_cnt, + max_batch, + dtype, + shape, + profile_max_size, + shape_tensor_input_dtype, ): # Note that resize layer does not support int tensors. # The model takes two inputs (INPUT and DUMMY_INPUT) @@ -785,10 +792,11 @@ def create_plan_shape_tensor_modelfile( dummy_shape = [-1] * shape_with_batchsize trt_dtype = np_to_trt_dtype(dtype) + trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype) trt_memory_format = trt.TensorFormat.LINEAR for io_num in range(io_cnt): in_node = network.add_input( - "INPUT{}".format(io_num), trt.int32, [shape_with_batchsize] + "INPUT{}".format(io_num), trt_shape_dtype, [shape_with_batchsize] ) in_node.allowed_formats = 1 << int(trt_memory_format) dummy_in_node = network.add_input( @@ -864,6 +872,7 @@ def create_plan_shape_tensor_modelfile( model_name = tu.get_zero_model_name( "plan_nobatch" if max_batch == 0 else "plan", io_cnt, dtype ) + model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name model_version_dir = os.path.join(models_dir, model_name, str(model_version)) os.makedirs(model_version_dir, exist_ok=True) @@ -941,7 +950,14 @@ def create_plan_dynamic_modelfile( def create_plan_modelconfig( - create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape + create_savedmodel, + models_dir, + model_version, + io_cnt, + max_batch, + dtype, + shape, + shape_tensor_input_dtype=None, ): if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape): return @@ -954,6 +970,8 @@ def create_plan_modelconfig( if FLAGS.tensorrt_compat: model_name_base += "_compatible" model_name = tu.get_zero_model_name(model_name_base, io_cnt, dtype) + if shape_tensor_input_dtype: + model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name config_dir = os.path.join(models_dir, model_name) if FLAGS.tensorrt_shape_io: @@ -976,7 +994,7 @@ def create_plan_modelconfig( }}, {{ name: "INPUT{}" - data_type: TYPE_INT32 + data_type: {} dims: [ {} ] is_shape_tensor: true }} @@ -999,6 +1017,7 @@ def create_plan_modelconfig( np_to_model_dtype(dtype), shape_str, io_num, + np_to_model_dtype(shape_tensor_input_dtype), shape_tensor_dim, io_num, np_to_model_dtype(dtype), @@ -1047,19 +1066,44 @@ def create_plan_modelconfig( cfile.write(config) -def create_shape_tensor_models(models_dir, dtype, shape, io_cnt=1, no_batch=True): +def create_shape_tensor_models( + models_dir, dtype, shape, shape_tensor_input_dtype, io_cnt=1, no_batch=True +): model_version = 1 - create_plan_modelconfig(True, models_dir, model_version, io_cnt, 8, dtype, shape) + create_plan_modelconfig( + True, + models_dir, + model_version, + io_cnt, + 8, + dtype, + shape, + shape_tensor_input_dtype, + ) create_plan_shape_tensor_modelfile( - models_dir, model_version, io_cnt, 8, dtype, shape, 32 + models_dir, model_version, io_cnt, 8, dtype, shape, 32, shape_tensor_input_dtype ) if no_batch: create_plan_modelconfig( - True, models_dir, model_version, io_cnt, 0, dtype, shape + True, + models_dir, + model_version, + io_cnt, + 0, + dtype, + shape, + shape_tensor_input_dtype, ) create_plan_shape_tensor_modelfile( - models_dir, model_version, io_cnt, 0, dtype, shape, 32 + models_dir, + model_version, + io_cnt, + 0, + dtype, + shape, + 32, + shape_tensor_input_dtype, ) @@ -1286,7 +1330,12 @@ def create_models(models_dir, dtype, shape, io_cnt=1, no_batch=True): elif FLAGS.tensorrt_compat: create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=1, no_batch=False) elif FLAGS.tensorrt_shape_io: - create_shape_tensor_models(FLAGS.models_dir, np.float32, [-1, -1], io_cnt=1) + create_shape_tensor_models( + FLAGS.models_dir, np.float32, [-1, -1], np.int32, io_cnt=1 + ) + create_shape_tensor_models( + FLAGS.models_dir, np.float32, [-1, -1], np.int64, io_cnt=1 + ) else: create_models(FLAGS.models_dir, bool, [-1], io_cnt=1) create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=1) diff --git a/qa/common/gen_qa_sequence_models.py b/qa/common/gen_qa_sequence_models.py index 9bf63518f13..42557c7747b 100755 --- a/qa/common/gen_qa_sequence_models.py +++ b/qa/common/gen_qa_sequence_models.py @@ -296,7 +296,7 @@ def create_tf_modelconfig( def create_plan_shape_tensor_modelfile( - models_dir, model_version, max_batch, dtype, shape + models_dir, model_version, max_batch, dtype, shape, shape_tensor_input_dtype ): # Note that resize layer does not support int tensors. # The model takes two inputs (INPUT and SHAPE_INPUT) @@ -308,6 +308,7 @@ def create_plan_shape_tensor_modelfile( # SHAPE_OUTPUT : The shape values of resized output trt_dtype = np_to_trt_dtype(dtype) + trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype) trt_memory_format = trt.TensorFormat.LINEAR TRT_LOGGER = trt.Logger(trt.Logger.INFO) @@ -316,12 +317,12 @@ def create_plan_shape_tensor_modelfile( unit_shape = [1] * len(shape) if max_batch != 0: - shape_in0 = network.add_input("SHAPE_INPUT", trt.int32, [1 + len(shape)]) + shape_in0 = network.add_input("SHAPE_INPUT", trt_shape_dtype, [1 + len(shape)]) in0 = network.add_input("INPUT", trt_dtype, [-1] + shape) start0 = network.add_input("START", trt_dtype, [-1] + unit_shape) ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape) else: - shape_in0 = network.add_input("SHAPE_INPUT", trt.int32, [len(shape)]) + shape_in0 = network.add_input("SHAPE_INPUT", trt_shape_dtype, [len(shape)]) in0 = network.add_input("INPUT", trt_dtype, shape) start0 = network.add_input("START", trt_dtype, unit_shape) ready0 = network.add_input("READY", trt_dtype, unit_shape) @@ -416,6 +417,7 @@ def create_plan_shape_tensor_modelfile( model_name = tu.get_sequence_model_name( "plan_nobatch" if max_batch == 0 else "plan", dtype ) + model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name model_version_dir = models_dir + "/" + model_name + "/" + str(model_version) try: @@ -637,13 +639,18 @@ def create_plan_models(models_dir, model_version, max_batch, dtype, shape): create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape) -def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape): +def create_plan_modelconfig( + models_dir, model_version, max_batch, dtype, shape, shape_tensor_input_dtype=None +): if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape): return model_name = tu.get_sequence_model_name( "plan_nobatch" if max_batch == 0 else "plan", dtype ) + if shape_tensor_input_dtype: + model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name + config_dir = models_dir + "/" + model_name if FLAGS.tensorrt_shape_io: shape_tensor_dim = len(shape) @@ -684,7 +691,7 @@ def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape): input [ {{ name: "SHAPE_INPUT" - data_type: TYPE_INT32 + data_type: {} dims: [ {} ] is_shape_tensor: true }} @@ -723,6 +730,7 @@ def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape): "int32" if dtype == np.int32 else "fp32", np_to_model_dtype(dtype), tu.shape_to_dims_str(shape), + np_to_model_dtype(shape_tensor_input_dtype), shape_tensor_dim, np_to_model_dtype(dtype), tu.shape_to_dims_str(shape), @@ -1234,14 +1242,24 @@ def create_openvino_modelconfig(models_dir, model_version, max_batch, dtype, sha cfile.write(config) -def create_shape_tensor_models(models_dir, dtype, shape, no_batch=True): +def create_shape_tensor_models( + models_dir, dtype, shape, shape_tensor_input_dtype, no_batch=True +): model_version = 1 - create_plan_modelconfig(models_dir, model_version, 8, dtype, shape) - create_plan_shape_tensor_modelfile(models_dir, model_version, 8, dtype, shape) + create_plan_modelconfig( + models_dir, model_version, 8, dtype, shape, shape_tensor_input_dtype + ) + create_plan_shape_tensor_modelfile( + models_dir, model_version, 8, dtype, shape, shape_tensor_input_dtype + ) if no_batch: - create_plan_modelconfig(models_dir, model_version, 0, dtype, shape) - create_plan_shape_tensor_modelfile(models_dir, model_version, 0, dtype, shape) + create_plan_modelconfig( + models_dir, model_version, 0, dtype, shape, shape_tensor_input_dtype + ) + create_plan_shape_tensor_modelfile( + models_dir, model_version, 0, dtype, shape, shape_tensor_input_dtype + ) def create_models(models_dir, dtype, shape, no_batch=True): @@ -1415,6 +1433,15 @@ def create_models(models_dir, dtype, shape, no_batch=True): [ -1, ], + np.int32, + ) + create_shape_tensor_models( + FLAGS.models_dir, + np.float32, + [ + -1, + ], + np.int64, ) else: # Tests with models that accept fixed-shape input/output tensors diff --git a/qa/common/infer_util.py b/qa/common/infer_util.py index f724a90cad9..4e19b5733c8 100755 --- a/qa/common/infer_util.py +++ b/qa/common/infer_util.py @@ -729,6 +729,7 @@ def infer_shape_tensor( priority=0, timeout_us=0, batch_size=1, + shape_tensor_input_dtype=np.int32, ): # Lazy shm imports... if use_system_shared_memory: @@ -784,7 +785,7 @@ def infer_shape_tensor( dummy_input_list.append(dummy_in0) # Prepare shape input tensor - in0 = np.asarray(input_shape_values[io_num], dtype=np.int32) + in0 = np.asarray(input_shape_values[io_num], dtype=shape_tensor_input_dtype) input_list.append(in0) # Prepare the expected value for the output. Skip dummy output as we @@ -792,12 +793,14 @@ def infer_shape_tensor( expected_dict[output_name] = np.ndarray.copy(in0) # Only need to create region once - # FIXME DLIS-6653: Currently in our test cases we are - # using int32 inputs and int64 outputs for shape tensors - # hence there is a multiple of 2 to compute the byte size - # properly. - input_byte_size = in0.size * np.dtype(np.int32).itemsize - output_byte_size = input_byte_size * batch_size * 2 + input_byte_size = in0.size * np.dtype(shape_tensor_input_dtype).itemsize + output_byte_size = input_byte_size * batch_size + if shape_tensor_input_dtype == np.int32: + # Currently in our test cases we are + # using int64 outputs for shape tensors + # hence there is a multiple of 2 to compute the byte size + # properly. + output_byte_size = output_byte_size * 2 if use_system_shared_memory: input_shm_handle_list.append( ( @@ -827,6 +830,7 @@ def infer_shape_tensor( ) model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype) + model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name # Run inference and check results for each config for config in configs: client_utils = grpcclient if config[1] == "grpc" else httpclient @@ -850,7 +854,11 @@ def infer_shape_tensor( ) ) inputs.append( - client_utils.InferInput(input_name, input_list[io_num].shape, "INT32") + client_utils.InferInput( + input_name, + input_list[io_num].shape, + np_to_triton_dtype(shape_tensor_input_dtype), + ) ) outputs.append(client_utils.InferRequestedOutput(dummy_output_name)) outputs.append(client_utils.InferRequestedOutput(output_name)) @@ -896,13 +904,30 @@ def infer_shape_tensor( if error is not None: raise error else: - results = triton_client.infer( - model_name, - inputs, - outputs=outputs, - priority=priority, - timeout=timeout_us, - ) + try: + results = triton_client.infer( + model_name, + inputs, + outputs=outputs, + priority=priority, + timeout=timeout_us, + ) + except Exception as e: + if use_system_shared_memory: + for io_num in range(io_cnt): + shm.destroy_shared_memory_region( + input_shm_handle_list[io_num][0] + ) + triton_client.unregister_system_shared_memory( + f"INPUT{io_num}" + shm_suffix + ) + shm.destroy_shared_memory_region( + output_shm_handle_list[io_num][0] + ) + triton_client.unregister_system_shared_memory( + f"OUTPUT{io_num}" + shm_suffix + ) + raise e for io_num in range(io_cnt): output_name = "OUTPUT{}".format(io_num) @@ -919,8 +944,8 @@ def infer_shape_tensor( output_shape = output.shape else: output_shape = output["shape"] - # FIXME DLIS-6653: Currently in our test cases we are - # using int32 inputs and int64 outputs for shape tensors + # Currently in our test cases we are + # using int64 outputs for shape tensors # hence passing int64 as datatype. out = shm.get_contents_as_numpy( output_shm_handle_list[io_num][0], np.int64, output_shape diff --git a/qa/common/sequence_util.py b/qa/common/sequence_util.py index b331a7572f1..1b2560538d8 100755 --- a/qa/common/sequence_util.py +++ b/qa/common/sequence_util.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -195,7 +195,13 @@ def precreate_register_regions( # Returns (name, byte size, shm_handle) def precreate_register_shape_tensor_regions( - self, value_list, dtype, i, batch_size=1, tensor_shape=(1,) + self, + value_list, + dtype, + i, + batch_size=1, + tensor_shape=(1,), + shape_tensor_input_dtype=np.int32, ): self.assertFalse( _test_cuda_shared_memory, @@ -220,7 +226,7 @@ def precreate_register_shape_tensor_regions( # Only one shape tensor input per batch shape_input_list.append( - np.full(tensor_shape, shape_value, dtype=np.int32) + np.full(tensor_shape, shape_value, dtype=shape_tensor_input_dtype) ) if dtype == np.object_: @@ -233,11 +239,13 @@ def precreate_register_shape_tensor_regions( input_byte_size = sum([i0.nbytes for i0 in input_list_tmp]) shape_input_byte_size = sum([i0.nbytes for i0 in shape_input_list]) - # FIXME DLIS-6653: Currently in our test cases we are - # using int32 inputs and int64 outputs for shape tensors - # hence there is a multiple of 2 to compute the byte size - # properly. - shape_output_byte_size = shape_input_byte_size * 2 + shape_output_byte_size = shape_input_byte_size + if shape_tensor_input_dtype == np.int32: + # Currently in our test cases we are + # using int64 outputs for shape tensors + # hence there is a multiple of 2 to compute the byte size + # properly. + shape_output_byte_size = shape_output_byte_size * 2 output_byte_size = np.dtype(dtype).itemsize + 2 resized_output_byte_size = 32 * shape_value @@ -298,7 +306,13 @@ def precreate_register_shape_tensor_regions( # Returns (name, byte size, shm_handle) def precreate_register_dynaseq_shape_tensor_regions( - self, value_list, dtype, i, batch_size=1, tensor_shape=(1,) + self, + value_list, + dtype, + i, + batch_size=1, + tensor_shape=(1,), + shape_tensor_input_dtype=np.int32, ): self.assertFalse( _test_cuda_shared_memory, @@ -326,7 +340,7 @@ def precreate_register_dynaseq_shape_tensor_regions( # Only one shape tensor input per batch shape_input_list.append( - np.full(tensor_shape, shape_value, dtype=np.int32) + np.full(tensor_shape, shape_value, dtype=shape_tensor_input_dtype) ) if dtype == np.object_: @@ -341,11 +355,13 @@ def precreate_register_dynaseq_shape_tensor_regions( dummy_input_byte_size = sum([i0.nbytes for i0 in dummy_input_list]) shape_input_byte_size = sum([i0.nbytes for i0 in shape_input_list]) - # FIXME DLIS-6653: Currently in our test cases we are - # using int32 inputs and int64 outputs for shape tensors - # hence there is a multiple of 2 to compute the byte size - # properly. - shape_output_byte_size = shape_input_byte_size * 2 + shape_output_byte_size = shape_input_byte_size + if shape_tensor_input_dtype == np.int32: + # Currently in our test cases we are + # using int64 outputs for shape tensors + # hence there is a multiple of 2 to compute the byte size + # properly. + shape_output_byte_size = shape_output_byte_size * 2 output_byte_size = np.dtype(np.int32).itemsize + 2 resized_output_byte_size = 32 * shape_value @@ -894,6 +910,7 @@ def check_sequence_shape_tensor_io( shm_region_handles, using_dynamic_batcher=False, sequence_name="", + shape_tensor_input_dtype=np.int32, ): """Perform sequence of inferences using async run. The 'values' holds a list of tuples, one for each inference with format: @@ -943,7 +960,9 @@ def check_sequence_shape_tensor_io( ) inputs.append( client_utils.InferInput( - "SHAPE_INPUT", shape_tensor_shape, np_to_triton_dtype(np.int32) + "SHAPE_INPUT", + shape_tensor_shape, + np_to_triton_dtype(shape_tensor_input_dtype), ) ) if using_dynamic_batcher: @@ -959,7 +978,9 @@ def check_sequence_shape_tensor_io( # Set IO values shape_values.append( - np.full(shape_tensor_shape, shape_value, dtype=np.int32) + np.full( + shape_tensor_shape, shape_value, dtype=shape_tensor_input_dtype + ) ) if not _test_system_shared_memory: if using_dynamic_batcher: