From f43cedd2b4fb8859282a5cabf5be83f4d61c4f96 Mon Sep 17 00:00:00 2001
From: Kris Hung <krish@nvidia.com>
Date: Tue, 18 Jun 2024 14:24:26 -0700
Subject: [PATCH 1/2] fix: Fix version for setuptools and grpcio-tools. Remove
 cudnn 8 installation (#7331)

---
 build.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/build.py b/build.py
index 5f7200d2b6c..d2d2362e1f4 100755
--- a/build.py
+++ b/build.py
@@ -1082,9 +1082,9 @@ def create_dockerfile_linux(
 """
     if "tensorrtllm" in backends:
         df += """
-# Remove TRT contents that are not needed in runtime
-RUN apt-get update && apt-get install -y libcudnn8-dev && ldconfig
 
+RUN ldconfig
+# Remove contents that are not needed in runtime
 RUN ARCH="$(uname -i)" \\
       && rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data \\
       && rm -fr  ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python \\
@@ -1094,10 +1094,14 @@ def create_dockerfile_linux(
 RUN python3 -m pip install --upgrade pip \\
       && pip3 install transformers
 
-# Install TensorRT-LLM
+# ldconfig for TRT-LLM
 RUN find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf
 RUN find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf
 
+# Setuptools has breaking changes in version 70.0.0, so fix it to 69.5.1
+# The generated code in grpc_service_pb2_grpc.py depends on grpcio>=1.64.0, so fix it to 1.64.0
+RUN pip3 install setuptools==69.5.1 grpcio-tools==1.64.0
+
 ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
 """
     with open(os.path.join(ddir, dockerfile_name), "w") as dfile:

From d5eb4676d2857f1fdc5dfd44b3f0384c11bbe184 Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Thu, 20 Jun 2024 19:30:06 +0530
Subject: [PATCH 2/2] ci: Add INT64 Datatype Support for Shape Tensors in
 TensorRT Backend (#7329)

---
 qa/L0_model_config/test.sh                    |    2 +-
 qa/L0_perf_analyzer/test.sh                   |    4 +-
 qa/L0_perf_analyzer_capi/test.sh              |    6 +-
 qa/L0_trt_shape_tensors/test.sh               |   21 +-
 .../trt_shape_tensor_test.py                  | 1377 +++++++++--------
 qa/common/gen_common.py                       |    4 +-
 qa/common/gen_qa_dyna_sequence_models.py      |   94 +-
 qa/common/gen_qa_identity_models.py           |   69 +-
 qa/common/gen_qa_sequence_models.py           |   47 +-
 qa/common/infer_util.py                       |   59 +-
 qa/common/sequence_util.py                    |   55 +-
 11 files changed, 1019 insertions(+), 719 deletions(-)

diff --git a/qa/L0_model_config/test.sh b/qa/L0_model_config/test.sh
index e36cbaedb3a..9220c4eafcf 100755
--- a/qa/L0_model_config/test.sh
+++ b/qa/L0_model_config/test.sh
@@ -88,7 +88,7 @@ for modelpath in \
         autofill_noplatform/tensorrt/mixed_batch_hint_shape_values/1 \
         autofill_noplatform_success/tensorrt/no_config_shape_tensor/1 ; do
     mkdir -p $modelpath
-    cp /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32/1/model.plan \
+    cp /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32/1/model.plan \
        $modelpath/.
 done
 
diff --git a/qa/L0_perf_analyzer/test.sh b/qa/L0_perf_analyzer/test.sh
index f2807824b97..49c7e72e481 100755
--- a/qa/L0_perf_analyzer/test.sh
+++ b/qa/L0_perf_analyzer/test.sh
@@ -93,7 +93,7 @@ cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphde
 cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_int32_int32_float32 $DATADIR/
 
 # Copy shape tensor models
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32 $DATADIR/
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32 $DATADIR/
 
 # Copying ensemble including a sequential model
 cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/savedmodel_sequence_object $DATADIR
@@ -564,7 +564,7 @@ for PROTOCOL in grpc http; do
     # Shape tensor I/O model (server needs the shape tensor on the CPU)
     for SHARED_MEMORY_TYPE in none system; do
         set +e
-        $PERF_ANALYZER -v -i $PROTOCOL -m plan_zero_1_float32 --input-data=$SHAPETENSORADTAFILE \
+        $PERF_ANALYZER -v -i $PROTOCOL -m plan_zero_1_float32_int32 --input-data=$SHAPETENSORADTAFILE \
     --shape DUMMY_INPUT0:4,4 -p2000 --shared-memory=$SHARED_MEMORY_TYPE -b 8 -s ${STABILITY_THRESHOLD} \
     >$CLIENT_LOG 2>&1
         if [ $? -ne 0 ]; then
diff --git a/qa/L0_perf_analyzer_capi/test.sh b/qa/L0_perf_analyzer_capi/test.sh
index f9fa3c078e2..d031e2cacf4 100755
--- a/qa/L0_perf_analyzer_capi/test.sh
+++ b/qa/L0_perf_analyzer_capi/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -73,7 +73,7 @@ cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphde
 cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_int32_int32_float32 $DATADIR/
 
 # Copy shape tensor models
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32 $DATADIR/
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32 $DATADIR/
 
 # Copying ensemble including a sequential model
 cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/savedmodel_sequence_object $DATADIR
@@ -201,7 +201,7 @@ if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
 fi
 
 # Shape tensor I/O model (server needs the shape tensor on the CPU)
-$PERF_ANALYZER -v -m plan_zero_1_float32 --input-data=$SHAPETENSORADTAFILE \
+$PERF_ANALYZER -v -m plan_zero_1_float32_int32 --input-data=$SHAPETENSORADTAFILE \
 --shape DUMMY_INPUT0:4,4 -p2000 -b 8 \
 --service-kind=triton_c_api --model-repository=$DATADIR \
 --triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
diff --git a/qa/L0_trt_shape_tensors/test.sh b/qa/L0_trt_shape_tensors/test.sh
index eed67d9dcb5..f08ed339b05 100755
--- a/qa/L0_trt_shape_tensors/test.sh
+++ b/qa/L0_trt_shape_tensors/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -127,10 +127,13 @@ else
 fi
 
 # Prepare the config file for dynamic batching tests
-CONFIG_FILE="models/plan_zero_1_float32/config.pbtxt"
-sed -i "s/^max_batch_size:.*/max_batch_size: 8/" $CONFIG_FILE && \
-sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" $CONFIG_FILE && \
-                echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> $CONFIG_FILE
+for dtype in int32 int64; do
+    CONFIG_FILE="models/plan_zero_1_float32_${dtype}/config.pbtxt"
+    sed -i "s/^max_batch_size:.*/max_batch_size: 8/" "$CONFIG_FILE"
+    sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" "$CONFIG_FILE"
+    echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >>"$CONFIG_FILE"
+done
+
 for i in \
             test_dynamic_different_shape_values \
             test_dynamic_identical_shape_values; do
@@ -202,9 +205,11 @@ for i in \
     done
 
 # Prepare the config file for dynamic sequence batching tests
-CONFIG_FILE="models/plan_dyna_sequence_float32/config.pbtxt"
-sed -i "s/max_candidate_sequences:.*/max_candidate_sequences:4/" $CONFIG_FILE && \
-sed -i "s/max_queue_delay_microseconds:.*/max_queue_delay_microseconds:5000000/" $CONFIG_FILE
+for dtype in int32 int64; do
+    CONFIG_FILE="models/plan_dyna_sequence_float32_${dtype}/config.pbtxt"
+    sed -i "s/max_candidate_sequences:.*/max_candidate_sequences:4/" "$CONFIG_FILE"
+    sed -i "s/max_queue_delay_microseconds:.*/max_queue_delay_microseconds:5000000/" "$CONFIG_FILE"
+done
 
 export NO_BATCHING=0
 
diff --git a/qa/L0_trt_shape_tensors/trt_shape_tensor_test.py b/qa/L0_trt_shape_tensors/trt_shape_tensor_test.py
index d3563dce9e1..551ee2f8c0d 100755
--- a/qa/L0_trt_shape_tensors/trt_shape_tensor_test.py
+++ b/qa/L0_trt_shape_tensors/trt_shape_tensor_test.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -84,6 +84,7 @@ def check_response(
         shm_region_names=None,
         precreated_shm_regions=None,
         shm_suffix="",
+        shape_tensor_input_dtype=np.int32,
     ):
         try:
             # Add batch size to shape as full shape is expected
@@ -104,6 +105,7 @@ def check_response(
                 shm_suffix=shm_suffix,
                 use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                 batch_size=bs,
+                shape_tensor_input_dtype=shape_tensor_input_dtype,
             )
 
             end_ms = int(round(time.time() * 1000))
@@ -222,80 +224,90 @@ def check_status(self, model_name, batch_exec, exec_cnt, infer_cnt):
         )
 
     def test_static_batch(self):
-        iu.infer_shape_tensor(
-            self,
-            "plan",
-            np.float32,
-            [[32, 32]],
-            [[8, 4, 4]],
-            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
-            batch_size=8,
-        )
-        iu.infer_shape_tensor(
-            self,
-            "plan",
-            np.float32,
-            [[4, 4]],
-            [[8, 32, 32]],
-            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
-            batch_size=8,
-        )
-        iu.infer_shape_tensor(
-            self,
-            "plan",
-            np.float32,
-            [[4, 4]],
-            [[8, 4, 4]],
-            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
-            batch_size=8,
-        )
-
-    def test_nobatch(self):
-        iu.infer_shape_tensor(
-            self,
-            "plan_nobatch",
-            np.float32,
-            [[32, 32]],
-            [[4, 4]],
-            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
-        )
-        iu.infer_shape_tensor(
-            self,
-            "plan_nobatch",
-            np.float32,
-            [[4, 4]],
-            [[32, 32]],
-            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
-        )
-        iu.infer_shape_tensor(
-            self,
-            "plan_nobatch",
-            np.float32,
-            [[4, 4]],
-            [[4, 4]],
-            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
-        )
-
-    def test_wrong_shape_values(self):
-        over_shape_values = [[32, 33]]
-        try:
+        for shape_tensor_input_dtype in [np.int32, np.int64]:
             iu.infer_shape_tensor(
                 self,
                 "plan",
                 np.float32,
-                over_shape_values,
+                [[32, 32]],
                 [[8, 4, 4]],
                 use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
                 batch_size=8,
+                shape_tensor_input_dtype=shape_tensor_input_dtype,
             )
-        # InferenceServerException will be raised from different namespace,
-        # use dynamic type characteristic to catch both ex
-        except Exception as ex:
-            self.assertTrue(
-                "The shape value at index 2 is expected to be in range from 1 to 32, Got: 33"
-                in ex.message()
+            iu.infer_shape_tensor(
+                self,
+                "plan",
+                np.float32,
+                [[4, 4]],
+                [[8, 32, 32]],
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                batch_size=8,
+                shape_tensor_input_dtype=shape_tensor_input_dtype,
+            )
+            iu.infer_shape_tensor(
+                self,
+                "plan",
+                np.float32,
+                [[4, 4]],
+                [[8, 4, 4]],
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                batch_size=8,
+                shape_tensor_input_dtype=shape_tensor_input_dtype,
+            )
+
+    def test_nobatch(self):
+        for shape_tensor_input_dtype in [np.int32, np.int64]:
+            iu.infer_shape_tensor(
+                self,
+                "plan_nobatch",
+                np.float32,
+                [[32, 32]],
+                [[4, 4]],
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                shape_tensor_input_dtype=shape_tensor_input_dtype,
+            )
+            iu.infer_shape_tensor(
+                self,
+                "plan_nobatch",
+                np.float32,
+                [[4, 4]],
+                [[32, 32]],
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                shape_tensor_input_dtype=shape_tensor_input_dtype,
+            )
+            iu.infer_shape_tensor(
+                self,
+                "plan_nobatch",
+                np.float32,
+                [[4, 4]],
+                [[4, 4]],
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                shape_tensor_input_dtype=shape_tensor_input_dtype,
             )
 
+    def test_wrong_shape_values(self):
+        over_shape_values = [[32, 33]]
+        for shape_tensor_input_dtype in [np.int32, np.int64]:
+            try:
+                iu.infer_shape_tensor(
+                    self,
+                    "plan",
+                    np.float32,
+                    over_shape_values,
+                    [[8, 4, 4]],
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    batch_size=8,
+                    shape_tensor_input_dtype=shape_tensor_input_dtype,
+                )
+            # InferenceServerException will be raised from different namespace,
+            # use dynamic type characteristic to catch both ex
+            except Exception as ex:
+                self.assertIn(
+                    "The shape value at index 2 is expected to be in range from 1 to 32, Got: 33",
+                    ex.message(),
+                )
+
     # Dynamic Batcher tests
     def test_dynamic_different_shape_values(self):
         # Send two requests with sum of static batch sizes ==
@@ -303,86 +315,96 @@ def test_dynamic_different_shape_values(self):
         # should cause the requests to not be batched. The first
         # response will come back immediately and the second
         # delayed by the max batch queue delay
-        try:
-            model_name = tu.get_zero_model_name("plan", 1, np.float32)
-            self.check_setup(model_name)
-            self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
-
-            threads = []
-            threads.append(
-                threading.Thread(
-                    target=self.check_response,
-                    args=(3, (6000, None)),
-                    kwargs={
-                        "shape_values": [[2, 2]],
-                        "dummy_input_shapes": [[16, 16]],
-                        "shm_suffix": "{}".format(len(threads)),
-                    },
+        for shape_tensor_input_dtype in [np.int32, np.int64]:
+            try:
+                model_name = tu.get_zero_model_name("plan", 1, np.float32)
+                model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
+
+                self.check_setup(model_name)
+                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(3, (6000, None)),
+                        kwargs={
+                            "shape_values": [[2, 2]],
+                            "dummy_input_shapes": [[16, 16]],
+                            "shm_suffix": "{}".format(len(threads)),
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
                 )
-            )
-            threads.append(
-                threading.Thread(
-                    target=self.check_response,
-                    args=(3, (_max_queue_delay_ms * 1.5, _max_queue_delay_ms)),
-                    kwargs={
-                        "shape_values": [[4, 4]],
-                        "dummy_input_shapes": [[16, 16]],
-                        "shm_suffix": "{}".format(len(threads)),
-                    },
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(3, (_max_queue_delay_ms * 1.5, _max_queue_delay_ms)),
+                        kwargs={
+                            "shape_values": [[4, 4]],
+                            "dummy_input_shapes": [[16, 16]],
+                            "shm_suffix": "{}".format(len(threads)),
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
                 )
-            )
-            threads[0].start()
-            time.sleep(1)
-            threads[1].start()
-            for t in threads:
-                t.join()
-            self.check_deferred_exception()
-            self.check_status(model_name, {3: 2}, 2, 6)
-        except Exception as ex:
-            self.assertTrue(False, "unexpected error {}".format(ex))
+                threads[0].start()
+                time.sleep(1)
+                threads[1].start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {3: 2}, 2, 6)
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_dynamic_identical_shape_values(self):
         # Send two requests with sum of static batch sizes ==
         # preferred size, but with identical shape values. This
         # should cause the requests to get batched. Both
         # responses should come back immediately.
-        try:
-            model_name = tu.get_zero_model_name("plan", 1, np.float32)
-            self.check_setup(model_name)
-            self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
-
-            threads = []
-            threads.append(
-                threading.Thread(
-                    target=self.check_response,
-                    args=(4, (6000, None)),
-                    kwargs={
-                        "shape_values": [[4, 4]],
-                        "dummy_input_shapes": [[16, 16]],
-                        "shm_suffix": "{}".format(len(threads)),
-                    },
+        for shape_tensor_input_dtype in [np.int32, np.int64]:
+            try:
+                model_name = tu.get_zero_model_name("plan", 1, np.float32)
+                model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
+
+                self.check_setup(model_name)
+                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(4, (6000, None)),
+                        kwargs={
+                            "shape_values": [[4, 4]],
+                            "dummy_input_shapes": [[16, 16]],
+                            "shm_suffix": "{}".format(len(threads)),
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
                 )
-            )
-            threads.append(
-                threading.Thread(
-                    target=self.check_response,
-                    args=(2, (6000, None)),
-                    kwargs={
-                        "shape_values": [[4, 4]],
-                        "dummy_input_shapes": [[16, 16]],
-                        "shm_suffix": "{}".format(len(threads)),
-                    },
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(2, (6000, None)),
+                        kwargs={
+                            "shape_values": [[4, 4]],
+                            "dummy_input_shapes": [[16, 16]],
+                            "shm_suffix": "{}".format(len(threads)),
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
                 )
-            )
-            threads[0].start()
-            time.sleep(1)
-            threads[1].start()
-            for t in threads:
-                t.join()
-            self.check_deferred_exception()
-            self.check_status(model_name, {6: 1}, 1, 6)
-        except Exception as ex:
-            self.assertTrue(False, "unexpected error {}".format(ex))
+                threads[0].start()
+                time.sleep(1)
+                threads[1].start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {6: 1}, 1, 6)
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
 
 
 class SequenceBatcherShapeTensorTest(su.SequenceBatcherTestUtil):
@@ -401,124 +423,152 @@ def test_sequence_identical_shape_values(self):
         # inferences.
         self.clear_deferred_exceptions()
         dtype = np.float32
-        try:
-            model_name = tu.get_sequence_model_name("plan", dtype)
-            self.check_setup(model_name)
-
-            # Need scheduler to wait for queue to contain all
-            # inferences for both sequences.
-            self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
-            self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12)
-            self.assertTrue("TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)
-            self.assertEqual(int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0)
-            precreated_shm0_handles = self.precreate_register_shape_tensor_regions(
-                ((2, 1), (4, 2), (8, 3)), dtype, 0
-            )
-            precreated_shm1_handles = self.precreate_register_shape_tensor_regions(
-                ((2, 11), (4, 12), (8, 13)), dtype, 1
-            )
-            precreated_shm2_handles = self.precreate_register_shape_tensor_regions(
-                ((2, 111), (4, 112), (8, 113)), dtype, 2
-            )
-            precreated_shm3_handles = self.precreate_register_shape_tensor_regions(
-                ((2, 1111), (4, 1112), (8, 1113)), dtype, 3
-            )
-            threads = []
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        1001,
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 2, 1, None),
-                            (None, 4, 2, None),
-                            ("end", 8, 3, None),
+        for shape_tensor_input_dtype in [np.int32, np.int64]:
+            try:
+                model_name = tu.get_sequence_model_name("plan", dtype)
+                model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
+                self.check_setup(model_name)
+
+                # Need scheduler to wait for queue to contain all
+                # inferences for both sequences.
+                self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12)
+                self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                self.assertEqual(
+                    int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
+                )
+                precreated_shm0_handles = self.precreate_register_shape_tensor_regions(
+                    value_list=((2, 1), (4, 2), (8, 3)),
+                    dtype=dtype,
+                    i=0,
+                    shape_tensor_input_dtype=shape_tensor_input_dtype,
+                )
+                precreated_shm1_handles = self.precreate_register_shape_tensor_regions(
+                    value_list=((2, 11), (4, 12), (8, 13)),
+                    dtype=dtype,
+                    i=1,
+                    shape_tensor_input_dtype=shape_tensor_input_dtype,
+                )
+                precreated_shm2_handles = self.precreate_register_shape_tensor_regions(
+                    value_list=((2, 111), (4, 112), (8, 113)),
+                    dtype=dtype,
+                    i=2,
+                    shape_tensor_input_dtype=shape_tensor_input_dtype,
+                )
+                precreated_shm3_handles = self.precreate_register_shape_tensor_regions(
+                    value_list=((2, 1111), (4, 1112), (8, 1113)),
+                    dtype=dtype,
+                    i=3,
+                    shape_tensor_input_dtype=shape_tensor_input_dtype,
+                )
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            1001,
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 2, 1, None),
+                                (None, 4, 2, None),
+                                ("end", 8, 3, None),
+                            ),
+                            self.get_expected_result(6, 3, "end"),
+                            precreated_shm0_handles,
                         ),
-                        self.get_expected_result(6, 3, "end"),
-                        precreated_shm0_handles,
-                    ),
-                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        kwargs={
+                            "sequence_name": "{}".format(self._testMethodName),
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
                 )
-            )
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        1002,
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 2, 11, None),
-                            (None, 4, 12, None),
-                            ("end", 8, 13, None),
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            1002,
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 2, 11, None),
+                                (None, 4, 12, None),
+                                ("end", 8, 13, None),
+                            ),
+                            self.get_expected_result(36, 13, "end"),
+                            precreated_shm1_handles,
                         ),
-                        self.get_expected_result(36, 13, "end"),
-                        precreated_shm1_handles,
-                    ),
-                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        kwargs={
+                            "sequence_name": "{}".format(self._testMethodName),
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
                 )
-            )
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        1003,
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 2, 111, None),
-                            (None, 4, 112, None),
-                            ("end", 8, 113, None),
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            1003,
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 2, 111, None),
+                                (None, 4, 112, None),
+                                ("end", 8, 113, None),
+                            ),
+                            self.get_expected_result(336, 113, "end"),
+                            precreated_shm2_handles,
                         ),
-                        self.get_expected_result(336, 113, "end"),
-                        precreated_shm2_handles,
-                    ),
-                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        kwargs={
+                            "sequence_name": "{}".format(self._testMethodName),
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
                 )
-            )
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        1004,
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 2, 1111, None),
-                            (None, 4, 1112, None),
-                            ("end", 8, 1113, None),
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            1004,
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 2, 1111, None),
+                                (None, 4, 1112, None),
+                                ("end", 8, 1113, None),
+                            ),
+                            self.get_expected_result(3336, 1113, "end"),
+                            precreated_shm3_handles,
                         ),
-                        self.get_expected_result(3336, 1113, "end"),
-                        precreated_shm3_handles,
-                    ),
-                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        kwargs={
+                            "sequence_name": "{}".format(self._testMethodName),
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
                 )
-            )
 
-            for t in threads:
-                t.start()
-            for t in threads:
-                t.join()
-            self.check_deferred_exception()
-            self.check_status(model_name, {4: 3}, 3, 12)
-        except Exception as ex:
-            self.assertTrue(False, "unexpected error {}".format(ex))
-        finally:
-            if TEST_SYSTEM_SHARED_MEMORY:
-                self.cleanup_shm_regions(precreated_shm0_handles)
-                self.cleanup_shm_regions(precreated_shm1_handles)
-                self.cleanup_shm_regions(precreated_shm2_handles)
-                self.cleanup_shm_regions(precreated_shm3_handles)
+                for t in threads:
+                    t.start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {4: 3}, 3, 12)
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+            finally:
+                if TEST_SYSTEM_SHARED_MEMORY:
+                    self.cleanup_shm_regions(precreated_shm0_handles)
+                    self.cleanup_shm_regions(precreated_shm1_handles)
+                    self.cleanup_shm_regions(precreated_shm2_handles)
+                    self.cleanup_shm_regions(precreated_shm3_handles)
 
     def test_sequence_different_shape_values(self):
         # Test model instances together are configured with
@@ -530,127 +580,155 @@ def test_sequence_different_shape_values(self):
         self.clear_deferred_exceptions()
         dtype = np.float32
 
-        precreated_shm0_handles = self.precreate_register_shape_tensor_regions(
-            ((1, 1), (1, 2), (1, 3)), dtype, 0
-        )
-        precreated_shm1_handles = self.precreate_register_shape_tensor_regions(
-            ((32, 11), (32, 12), (32, 13)), dtype, 1
-        )
-        precreated_shm2_handles = self.precreate_register_shape_tensor_regions(
-            ((16, 111), (16, 112), (16, 113)), dtype, 2
-        )
-        precreated_shm3_handles = self.precreate_register_shape_tensor_regions(
-            ((1, 1111), (1, 1112), (1, 1113)), dtype, 3
-        )
-        try:
-            model_name = tu.get_sequence_model_name("plan", dtype)
-            self.check_setup(model_name)
-
-            # Need scheduler to wait for queue to contain all
-            # inferences for both sequences.
-            self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
-            self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12)
-            self.assertTrue("TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)
-            self.assertEqual(int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0)
-
-            threads = []
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        1001,
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 1, 1, None),
-                            (None, 1, 2, None),
-                            ("end", 1, 3, None),
+        for shape_tensor_input_dtype in [np.int32, np.int64]:
+            precreated_shm0_handles = self.precreate_register_shape_tensor_regions(
+                value_list=((1, 1), (1, 2), (1, 3)),
+                dtype=dtype,
+                i=0,
+                shape_tensor_input_dtype=shape_tensor_input_dtype,
+            )
+            precreated_shm1_handles = self.precreate_register_shape_tensor_regions(
+                value_list=((32, 11), (32, 12), (32, 13)),
+                dtype=dtype,
+                i=1,
+                shape_tensor_input_dtype=shape_tensor_input_dtype,
+            )
+            precreated_shm2_handles = self.precreate_register_shape_tensor_regions(
+                value_list=((16, 111), (16, 112), (16, 113)),
+                dtype=dtype,
+                i=2,
+                shape_tensor_input_dtype=shape_tensor_input_dtype,
+            )
+            precreated_shm3_handles = self.precreate_register_shape_tensor_regions(
+                value_list=((1, 1111), (1, 1112), (1, 1113)),
+                dtype=dtype,
+                i=3,
+                shape_tensor_input_dtype=shape_tensor_input_dtype,
+            )
+            try:
+                model_name = tu.get_sequence_model_name("plan", dtype)
+                model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
+                self.check_setup(model_name)
+
+                # Need scheduler to wait for queue to contain all
+                # inferences for both sequences.
+                self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12)
+                self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                self.assertEqual(
+                    int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
+                )
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            1001,
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 1, 1, None),
+                                (None, 1, 2, None),
+                                ("end", 1, 3, None),
+                            ),
+                            self.get_expected_result(6, 3, "end"),
+                            precreated_shm0_handles,
                         ),
-                        self.get_expected_result(6, 3, "end"),
-                        precreated_shm0_handles,
-                    ),
-                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        kwargs={
+                            "sequence_name": "{}".format(self._testMethodName),
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
                 )
-            )
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        1002,
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 32, 11, None),
-                            (None, 32, 12, None),
-                            ("end", 32, 13, None),
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            1002,
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 32, 11, None),
+                                (None, 32, 12, None),
+                                ("end", 32, 13, None),
+                            ),
+                            self.get_expected_result(36, 13, "end"),
+                            precreated_shm1_handles,
                         ),
-                        self.get_expected_result(36, 13, "end"),
-                        precreated_shm1_handles,
-                    ),
-                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        kwargs={
+                            "sequence_name": "{}".format(self._testMethodName),
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
                 )
-            )
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        1003,
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 16, 111, None),
-                            (None, 16, 112, None),
-                            ("end", 16, 113, None),
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            1003,
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 16, 111, None),
+                                (None, 16, 112, None),
+                                ("end", 16, 113, None),
+                            ),
+                            self.get_expected_result(336, 113, "end"),
+                            precreated_shm2_handles,
                         ),
-                        self.get_expected_result(336, 113, "end"),
-                        precreated_shm2_handles,
-                    ),
-                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        kwargs={
+                            "sequence_name": "{}".format(self._testMethodName),
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
                 )
-            )
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        1004,
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 1, 1111, None),
-                            (None, 1, 1112, None),
-                            ("end", 1, 1113, None),
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            1004,
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 1, 1111, None),
+                                (None, 1, 1112, None),
+                                ("end", 1, 1113, None),
+                            ),
+                            self.get_expected_result(3336, 1113, "end"),
+                            precreated_shm3_handles,
                         ),
-                        self.get_expected_result(3336, 1113, "end"),
-                        precreated_shm3_handles,
-                    ),
-                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        kwargs={
+                            "sequence_name": "{}".format(self._testMethodName),
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
                 )
-            )
 
-            for t in threads:
-                t.start()
-                time.sleep(1)
-            for t in threads:
-                t.join()
+                for t in threads:
+                    t.start()
+                    time.sleep(1)
+                for t in threads:
+                    t.join()
 
-            self.check_deferred_exception()
-            self.check_status(model_name, {4: 3, 3: 6}, 9, 12)
-        except Exception as ex:
-            self.assertTrue(False, "unexpected error {}".format(ex))
-        finally:
-            if TEST_SYSTEM_SHARED_MEMORY:
-                self.cleanup_shm_regions(precreated_shm0_handles)
-                self.cleanup_shm_regions(precreated_shm1_handles)
-                self.cleanup_shm_regions(precreated_shm2_handles)
-                self.cleanup_shm_regions(precreated_shm3_handles)
+                self.check_deferred_exception()
+                self.check_status(model_name, {4: 3, 3: 6}, 9, 12)
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+            finally:
+                if TEST_SYSTEM_SHARED_MEMORY:
+                    self.cleanup_shm_regions(precreated_shm0_handles)
+                    self.cleanup_shm_regions(precreated_shm1_handles)
+                    self.cleanup_shm_regions(precreated_shm2_handles)
+                    self.cleanup_shm_regions(precreated_shm3_handles)
 
 
 class DynaSequenceBatcherTest(su.SequenceBatcherTestUtil):
@@ -667,300 +745,355 @@ def _multi_sequence_different_shape_impl(self, sleep_secs):
         self.clear_deferred_exceptions()
         dtype = np.float32
 
-        precreated_shm0_handles = self.precreate_register_dynaseq_shape_tensor_regions(
-            ((1, 1), (12, 2), (2, 3)), dtype, 0
-        )
-        precreated_shm1_handles = self.precreate_register_dynaseq_shape_tensor_regions(
-            ((3, 11), (4, 12), (5, 13)), dtype, 1
-        )
-        precreated_shm2_handles = self.precreate_register_dynaseq_shape_tensor_regions(
-            ((6, 111), (7, 112), (8, 113)), dtype, 2
-        )
-        precreated_shm3_handles = self.precreate_register_dynaseq_shape_tensor_regions(
-            ((9, 1111), (10, 1112), (11, 1113)), dtype, 3
-        )
-
-        try:
-            model_name = tu.get_dyna_sequence_model_name("plan", dtype)
-            self.check_setup(model_name)
-            self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
-            self.assertFalse("TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)
-
-            corrids = [1001, 1002, 1003, 1004]
-            threads = []
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        corrids[0],
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 1, 1, None),
-                            (None, 12, 2, None),
-                            ("end", 2, 3, None),
-                        ),
-                        self.get_expected_result(4 + corrids[0], corrids[0], 3, "end"),
-                        precreated_shm0_handles,
-                    ),
-                    kwargs={
-                        "sequence_name": "{}_{}".format(
-                            self._testMethodName, corrids[0]
-                        ),
-                        "using_dynamic_batcher": True,
-                    },
+        for shape_tensor_input_dtype in [np.int32, np.int64]:
+            precreated_shm0_handles = (
+                self.precreate_register_dynaseq_shape_tensor_regions(
+                    value_list=((1, 1), (12, 2), (2, 3)),
+                    dtype=dtype,
+                    i=0,
+                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                 )
             )
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        corrids[1],
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 3, 11, None),
-                            (None, 4, 12, None),
-                            ("end", 5, 13, None),
-                        ),
-                        self.get_expected_result(
-                            36 + corrids[1], corrids[1], 13, "end"
-                        ),
-                        precreated_shm1_handles,
-                    ),
-                    kwargs={
-                        "sequence_name": "{}_{}".format(
-                            self._testMethodName, corrids[1]
-                        ),
-                        "using_dynamic_batcher": True,
-                    },
+            precreated_shm1_handles = (
+                self.precreate_register_dynaseq_shape_tensor_regions(
+                    value_list=((3, 11), (4, 12), (5, 13)),
+                    dtype=dtype,
+                    i=1,
+                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                 )
             )
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        corrids[2],
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 6, 111, None),
-                            (None, 7, 112, None),
-                            ("end", 8, 113, None),
-                        ),
-                        self.get_expected_result(
-                            336 + corrids[2], corrids[2], 113, "end"
-                        ),
-                        precreated_shm2_handles,
-                    ),
-                    kwargs={
-                        "sequence_name": "{}_{}".format(
-                            self._testMethodName, corrids[2]
-                        ),
-                        "using_dynamic_batcher": True,
-                    },
+            precreated_shm2_handles = (
+                self.precreate_register_dynaseq_shape_tensor_regions(
+                    value_list=((6, 111), (7, 112), (8, 113)),
+                    dtype=dtype,
+                    i=2,
+                    shape_tensor_input_dtype=shape_tensor_input_dtype,
+                )
+            )
+            precreated_shm3_handles = (
+                self.precreate_register_dynaseq_shape_tensor_regions(
+                    value_list=((9, 1111), (10, 1112), (11, 1113)),
+                    dtype=dtype,
+                    i=3,
+                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                 )
             )
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        corrids[3],
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 9, 1111, None),
-                            (None, 10, 1112, None),
-                            ("end", 11, 1113, None),
+
+            try:
+                model_name = tu.get_dyna_sequence_model_name("plan", dtype)
+                model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
+                self.check_setup(model_name)
+                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+
+                corrids = [1001, 1002, 1003, 1004]
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            corrids[0],
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 1, 1, None),
+                                (None, 12, 2, None),
+                                ("end", 2, 3, None),
+                            ),
+                            self.get_expected_result(
+                                4 + corrids[0], corrids[0], 3, "end"
+                            ),
+                            precreated_shm0_handles,
                         ),
-                        self.get_expected_result(
-                            3336 + corrids[3], corrids[3], 1113, "end"
+                        kwargs={
+                            "sequence_name": "{}_{}".format(
+                                self._testMethodName, corrids[0]
+                            ),
+                            "using_dynamic_batcher": True,
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            corrids[1],
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 3, 11, None),
+                                (None, 4, 12, None),
+                                ("end", 5, 13, None),
+                            ),
+                            self.get_expected_result(
+                                36 + corrids[1], corrids[1], 13, "end"
+                            ),
+                            precreated_shm1_handles,
                         ),
-                        precreated_shm3_handles,
-                    ),
-                    kwargs={
-                        "sequence_name": "{}_{}".format(
-                            self._testMethodName, corrids[3]
+                        kwargs={
+                            "sequence_name": "{}_{}".format(
+                                self._testMethodName, corrids[1]
+                            ),
+                            "using_dynamic_batcher": True,
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            corrids[2],
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 6, 111, None),
+                                (None, 7, 112, None),
+                                ("end", 8, 113, None),
+                            ),
+                            self.get_expected_result(
+                                336 + corrids[2], corrids[2], 113, "end"
+                            ),
+                            precreated_shm2_handles,
                         ),
-                        "using_dynamic_batcher": True,
-                    },
+                        kwargs={
+                            "sequence_name": "{}_{}".format(
+                                self._testMethodName, corrids[2]
+                            ),
+                            "using_dynamic_batcher": True,
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            corrids[3],
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 9, 1111, None),
+                                (None, 10, 1112, None),
+                                ("end", 11, 1113, None),
+                            ),
+                            self.get_expected_result(
+                                3336 + corrids[3], corrids[3], 1113, "end"
+                            ),
+                            precreated_shm3_handles,
+                        ),
+                        kwargs={
+                            "sequence_name": "{}_{}".format(
+                                self._testMethodName, corrids[3]
+                            ),
+                            "using_dynamic_batcher": True,
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
                 )
-            )
 
-            for t in threads:
-                t.start()
-                if sleep_secs > 0:
-                    time.sleep(sleep_secs)
-            for t in threads:
-                t.join()
-            self.check_deferred_exception()
-            self.check_status(model_name, {1: 12}, 12, 12)
-        except Exception as ex:
-            self.assertTrue(False, "unexpected error {}".format(ex))
-        finally:
-            if TEST_SYSTEM_SHARED_MEMORY:
-                self.cleanup_shm_regions(precreated_shm0_handles)
-                self.cleanup_shm_regions(precreated_shm1_handles)
-                self.cleanup_shm_regions(precreated_shm2_handles)
-                self.cleanup_shm_regions(precreated_shm3_handles)
+                for t in threads:
+                    t.start()
+                    if sleep_secs > 0:
+                        time.sleep(sleep_secs)
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {1: 12}, 12, 12)
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+            finally:
+                if TEST_SYSTEM_SHARED_MEMORY:
+                    self.cleanup_shm_regions(precreated_shm0_handles)
+                    self.cleanup_shm_regions(precreated_shm1_handles)
+                    self.cleanup_shm_regions(precreated_shm2_handles)
+                    self.cleanup_shm_regions(precreated_shm3_handles)
 
     def _multi_sequence_identical_shape_impl(self, sleep_secs):
         self.clear_deferred_exceptions()
         dtype = np.float32
 
-        precreated_shm0_handles = self.precreate_register_dynaseq_shape_tensor_regions(
-            ((2, 1), (4, 2), (8, 3)), dtype, 0
-        )
-        precreated_shm1_handles = self.precreate_register_dynaseq_shape_tensor_regions(
-            ((2, 11), (4, 12), (8, 13)), dtype, 1
-        )
-        precreated_shm2_handles = self.precreate_register_dynaseq_shape_tensor_regions(
-            ((2, 111), (4, 112), (8, 113)), dtype, 2
-        )
-        precreated_shm3_handles = self.precreate_register_dynaseq_shape_tensor_regions(
-            ((2, 1111), (4, 1112), (8, 1113)), dtype, 3
-        )
-
-        try:
-            model_name = tu.get_dyna_sequence_model_name("plan", dtype)
-
-            self.check_setup(model_name)
-            self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
-            self.assertFalse("TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)
-
-            corrids = [1001, 1002, 1003, 1004]
-            threads = []
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        corrids[0],
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 2, 1, None),
-                            (None, 4, 2, None),
-                            ("end", 8, 3, None),
-                        ),
-                        self.get_expected_result(4 + corrids[0], corrids[0], 3, "end"),
-                        precreated_shm0_handles,
-                    ),
-                    kwargs={
-                        "sequence_name": "{}_{}".format(
-                            self._testMethodName, corrids[0]
-                        ),
-                        "using_dynamic_batcher": True,
-                    },
+        for shape_tensor_input_dtype in [np.int32, np.int64]:
+            precreated_shm0_handles = (
+                self.precreate_register_dynaseq_shape_tensor_regions(
+                    value_list=((2, 1), (4, 2), (8, 3)),
+                    dtype=dtype,
+                    i=0,
+                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                 )
             )
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        corrids[1],
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 2, 11, None),
-                            (None, 4, 12, None),
-                            ("end", 8, 13, None),
-                        ),
-                        self.get_expected_result(
-                            36 + corrids[1], corrids[1], 13, "end"
-                        ),
-                        precreated_shm1_handles,
-                    ),
-                    kwargs={
-                        "sequence_name": "{}_{}".format(
-                            self._testMethodName, corrids[1]
-                        ),
-                        "using_dynamic_batcher": True,
-                    },
+            precreated_shm1_handles = (
+                self.precreate_register_dynaseq_shape_tensor_regions(
+                    value_list=((2, 11), (4, 12), (8, 13)),
+                    dtype=dtype,
+                    i=1,
+                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                 )
             )
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        corrids[2],
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 2, 111, None),
-                            (None, 4, 112, None),
-                            ("end", 8, 113, None),
-                        ),
-                        self.get_expected_result(
-                            336 + corrids[2], corrids[2], 113, "end"
-                        ),
-                        precreated_shm2_handles,
-                    ),
-                    kwargs={
-                        "sequence_name": "{}_{}".format(
-                            self._testMethodName, corrids[2]
-                        ),
-                        "using_dynamic_batcher": True,
-                    },
+            precreated_shm2_handles = (
+                self.precreate_register_dynaseq_shape_tensor_regions(
+                    value_list=((2, 111), (4, 112), (8, 113)),
+                    dtype=dtype,
+                    i=2,
+                    shape_tensor_input_dtype=shape_tensor_input_dtype,
                 )
             )
-            threads.append(
-                threading.Thread(
-                    target=self.check_sequence_shape_tensor_io,
-                    args=(
-                        model_name,
-                        dtype,
-                        corrids[3],
-                        (None, None),
-                        # (flag_str, shape_value, value, pre_delay_ms)
-                        (
-                            ("start", 2, 1111, None),
-                            (None, 4, 1112, None),
-                            ("end", 8, 1113, None),
+            precreated_shm3_handles = (
+                self.precreate_register_dynaseq_shape_tensor_regions(
+                    value_list=((2, 1111), (4, 1112), (8, 1113)),
+                    dtype=dtype,
+                    i=3,
+                    shape_tensor_input_dtype=shape_tensor_input_dtype,
+                )
+            )
+
+            try:
+                model_name = tu.get_dyna_sequence_model_name("plan", dtype)
+                model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
+                self.check_setup(model_name)
+                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+
+                corrids = [1001, 1002, 1003, 1004]
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            corrids[0],
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 2, 1, None),
+                                (None, 4, 2, None),
+                                ("end", 8, 3, None),
+                            ),
+                            self.get_expected_result(
+                                4 + corrids[0], corrids[0], 3, "end"
+                            ),
+                            precreated_shm0_handles,
                         ),
-                        self.get_expected_result(
-                            3336 + corrids[3], corrids[3], 1113, "end"
+                        kwargs={
+                            "sequence_name": "{}_{}".format(
+                                self._testMethodName, corrids[0]
+                            ),
+                            "using_dynamic_batcher": True,
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            corrids[1],
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 2, 11, None),
+                                (None, 4, 12, None),
+                                ("end", 8, 13, None),
+                            ),
+                            self.get_expected_result(
+                                36 + corrids[1], corrids[1], 13, "end"
+                            ),
+                            precreated_shm1_handles,
                         ),
-                        precreated_shm3_handles,
-                    ),
-                    kwargs={
-                        "sequence_name": "{}_{}".format(
-                            self._testMethodName, corrids[3]
+                        kwargs={
+                            "sequence_name": "{}_{}".format(
+                                self._testMethodName, corrids[1]
+                            ),
+                            "using_dynamic_batcher": True,
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            corrids[2],
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 2, 111, None),
+                                (None, 4, 112, None),
+                                ("end", 8, 113, None),
+                            ),
+                            self.get_expected_result(
+                                336 + corrids[2], corrids[2], 113, "end"
+                            ),
+                            precreated_shm2_handles,
                         ),
-                        "using_dynamic_batcher": True,
-                    },
+                        kwargs={
+                            "sequence_name": "{}_{}".format(
+                                self._testMethodName, corrids[2]
+                            ),
+                            "using_dynamic_batcher": True,
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_shape_tensor_io,
+                        args=(
+                            model_name,
+                            dtype,
+                            corrids[3],
+                            (None, None),
+                            # (flag_str, shape_value, value, pre_delay_ms)
+                            (
+                                ("start", 2, 1111, None),
+                                (None, 4, 1112, None),
+                                ("end", 8, 1113, None),
+                            ),
+                            self.get_expected_result(
+                                3336 + corrids[3], corrids[3], 1113, "end"
+                            ),
+                            precreated_shm3_handles,
+                        ),
+                        kwargs={
+                            "sequence_name": "{}_{}".format(
+                                self._testMethodName, corrids[3]
+                            ),
+                            "using_dynamic_batcher": True,
+                            "shape_tensor_input_dtype": shape_tensor_input_dtype,
+                        },
+                    )
                 )
-            )
 
-            for t in threads:
-                t.start()
-                if sleep_secs > 0:
-                    time.sleep(sleep_secs)
-            for t in threads:
-                t.join()
-            self.check_deferred_exception()
-            self.check_status(model_name, {4: 3}, 3, 12)
-        except Exception as ex:
-            self.assertTrue(False, "unexpected error {}".format(ex))
-        finally:
-            if TEST_SYSTEM_SHARED_MEMORY:
-                self.cleanup_shm_regions(precreated_shm0_handles)
-                self.cleanup_shm_regions(precreated_shm1_handles)
-                self.cleanup_shm_regions(precreated_shm2_handles)
-                self.cleanup_shm_regions(precreated_shm3_handles)
+                for t in threads:
+                    t.start()
+                    if sleep_secs > 0:
+                        time.sleep(sleep_secs)
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {4: 3}, 3, 12)
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+            finally:
+                if TEST_SYSTEM_SHARED_MEMORY:
+                    self.cleanup_shm_regions(precreated_shm0_handles)
+                    self.cleanup_shm_regions(precreated_shm1_handles)
+                    self.cleanup_shm_regions(precreated_shm2_handles)
+                    self.cleanup_shm_regions(precreated_shm3_handles)
 
     def test_dynaseq_identical_shape_values_series(self):
         # Send four sequences with identical shape values in series
diff --git a/qa/common/gen_common.py b/qa/common/gen_common.py
index 5bb751f3c8a..417ad9477ae 100644
--- a/qa/common/gen_common.py
+++ b/qa/common/gen_common.py
@@ -1,4 +1,4 @@
-# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -101,6 +101,8 @@ def np_to_trt_dtype(np_dtype):
         return trt.int8
     elif np_dtype == np.int32:
         return trt.int32
+    elif np_dtype == np.int64:
+        return trt.int64
     elif np_dtype == np.uint8:
         return trt.uint8
     elif np_dtype == np.float16:
diff --git a/qa/common/gen_qa_dyna_sequence_models.py b/qa/common/gen_qa_dyna_sequence_models.py
index 89d20df2e3d..bcb5a3a2b44 100755
--- a/qa/common/gen_qa_dyna_sequence_models.py
+++ b/qa/common/gen_qa_dyna_sequence_models.py
@@ -291,9 +291,11 @@ def create_tf_modelconfig(
         model_name,
         "tensorflow_savedmodel" if create_savedmodel else "tensorflow_graphdef",
         max_batch,
-        "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
-        if max_batch > 0
-        else "",
+        (
+            "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
+            if max_batch > 0
+            else ""
+        ),
         "fp32" if dtype == np.float32 else "int32",
         "fp32" if dtype == np.float32 else "int32",
         "fp32" if dtype == np.float32 else "int32",
@@ -312,7 +314,7 @@ def create_tf_modelconfig(
 
 
 def create_plan_shape_tensor_modelfile(
-    models_dir, model_version, max_batch, dtype, shape
+    models_dir, model_version, max_batch, dtype, shape, shape_tensor_input_dtype
 ):
     # Note that resize layer does not support int tensors.
     # The model takes three inputs (INPUT, DUMMY_INPUT and SHAPE_INPUT)
@@ -325,6 +327,7 @@ def create_plan_shape_tensor_modelfile(
     # SHAPE_OUTPUT : The shape values of resized output
 
     trt_dtype = np_to_trt_dtype(dtype)
+    trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype)
     trt_memory_format = trt.TensorFormat.LINEAR
 
     TRT_LOGGER = trt.Logger(trt.Logger.INFO)
@@ -336,7 +339,7 @@ def create_plan_shape_tensor_modelfile(
     if max_batch != 0:
         in0 = network.add_input("INPUT", trt.int32, [-1] + dummy_shape)
         dummy_in0 = network.add_input("DUMMY_INPUT", trt_dtype, [-1] + dummy_shape)
-        shape_in0 = network.add_input("SHAPE_INPUT", trt.int32, [1 + len(shape)])
+        shape_in0 = network.add_input("SHAPE_INPUT", trt_shape_dtype, [1 + len(shape)])
         start0 = network.add_input("START", trt.int32, [-1] + unit_shape)
         end0 = network.add_input("END", trt.int32, [-1] + unit_shape)
         ready0 = network.add_input("READY", trt.int32, [-1] + unit_shape)
@@ -344,7 +347,7 @@ def create_plan_shape_tensor_modelfile(
     else:
         in0 = network.add_input("INPUT", trt.int32, dummy_shape)
         dummy_in0 = network.add_input("DUMMY_INPUT", trt_dtype, dummy_shape)
-        shape_in0 = network.add_input("SHAPE_INPUT", trt.int32, [len(shape)])
+        shape_in0 = network.add_input("SHAPE_INPUT", trt_shape_dtype, [len(shape)])
         start0 = network.add_input("START", trt.int32, unit_shape)
         end0 = network.add_input("END", trt.int32, unit_shape)
         ready0 = network.add_input("READY", trt.int32, unit_shape)
@@ -453,6 +456,7 @@ def create_plan_shape_tensor_modelfile(
     model_name = tu.get_dyna_sequence_model_name(
         "plan_nobatch" if max_batch == 0 else "plan", dtype
     )
+    model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
     model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
 
     try:
@@ -713,13 +717,17 @@ def create_plan_models(models_dir, model_version, max_batch, dtype, shape):
         create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape)
 
 
-def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape):
+def create_plan_modelconfig(
+    models_dir, model_version, max_batch, dtype, shape, shape_tensor_input_dtype=None
+):
     if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
         return
 
     model_name = tu.get_dyna_sequence_model_name(
         "plan_nobatch" if max_batch == 0 else "plan", dtype
     )
+    if shape_tensor_input_dtype:
+        model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
     config_dir = models_dir + "/" + model_name
 
     if FLAGS.tensorrt_shape_io:
@@ -787,7 +795,7 @@ def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape):
 input [
   {{
     name: "SHAPE_INPUT"
-    data_type: TYPE_INT32
+    data_type: {}
     dims: [ {} ]
     is_shape_tensor: true
   }}
@@ -822,15 +830,18 @@ def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape):
 """.format(
             model_name,
             max_batch,
-            "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
-            if max_batch > 0
-            else "",
+            (
+                "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
+                if max_batch > 0
+                else ""
+            ),
             "int32",
             "int32",
             "int32",
             tu.shape_to_dims_str(shape),
             np_to_model_dtype(dtype),
             tu.shape_to_dims_str(shape),
+            np_to_model_dtype(shape_tensor_input_dtype),
             shape_tensor_dim,
             tu.shape_to_dims_str(shape),
             np_to_model_dtype(dtype),
@@ -907,9 +918,11 @@ def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape):
 """.format(
             model_name,
             max_batch,
-            "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
-            if max_batch > 0
-            else "",
+            (
+                "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
+                if max_batch > 0
+                else ""
+            ),
             "int32" if dtype == np.int32 else "fp32",
             "int32" if dtype == np.int32 else "fp32",
             "int32" if dtype == np.int32 else "fp32",
@@ -1097,9 +1110,11 @@ def create_onnx_modelconfig(models_dir, model_version, max_batch, dtype, shape):
 """.format(
         model_name,
         max_batch,
-        "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
-        if max_batch > 0
-        else "",
+        (
+            "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
+            if max_batch > 0
+            else ""
+        ),
         np_to_model_dtype(dtype),
         tu.shape_to_dims_str(shape),
         np_to_model_dtype(dtype),
@@ -1237,9 +1252,11 @@ def create_libtorch_modelconfig(models_dir, model_version, max_batch, dtype, sha
 """.format(
         model_name,
         max_batch,
-        "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
-        if max_batch > 0
-        else "",
+        (
+            "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
+            if max_batch > 0
+            else ""
+        ),
         "int32" if dtype == np.int32 else "fp32",
         "int32" if dtype == np.int32 else "fp32",
         "int32" if dtype == np.int32 else "fp32",
@@ -1379,9 +1396,11 @@ def create_openvino_modelconfig(models_dir, model_version, max_batch, dtype, sha
 """.format(
         model_name,
         max_batch,
-        "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
-        if max_batch > 0
-        else "",
+        (
+            "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
+            if max_batch > 0
+            else ""
+        ),
         "int32" if dtype == np.int32 else "fp32",
         "int32" if dtype == np.int32 else "fp32",
         "int32" if dtype == np.int32 else "fp32",
@@ -1399,14 +1418,24 @@ def create_openvino_modelconfig(models_dir, model_version, max_batch, dtype, sha
         cfile.write(config)
 
 
-def create_shape_tensor_models(models_dir, dtype, shape, no_batch=True):
+def create_shape_tensor_models(
+    models_dir, dtype, shape, shape_tensor_input_dtype, no_batch=True
+):
     model_version = 1
 
-    create_plan_modelconfig(models_dir, model_version, 8, dtype, shape)
-    create_plan_shape_tensor_modelfile(models_dir, model_version, 8, dtype, shape)
+    create_plan_modelconfig(
+        models_dir, model_version, 8, dtype, shape, shape_tensor_input_dtype
+    )
+    create_plan_shape_tensor_modelfile(
+        models_dir, model_version, 8, dtype, shape, shape_tensor_input_dtype
+    )
     if no_batch:
-        create_plan_modelconfig(models_dir, model_version, 0, dtype, shape)
-        create_plan_shape_tensor_modelfile(models_dir, model_version, 0, dtype, shape)
+        create_plan_modelconfig(
+            models_dir, model_version, 0, dtype, shape, shape_tensor_input_dtype
+        )
+        create_plan_shape_tensor_modelfile(
+            models_dir, model_version, 0, dtype, shape, shape_tensor_input_dtype
+        )
 
 
 def create_models(models_dir, dtype, shape, no_batch=True):
@@ -1543,6 +1572,15 @@ def create_models(models_dir, dtype, shape, no_batch=True):
             [
                 -1,
             ],
+            np.int32,
+        )
+        create_shape_tensor_models(
+            FLAGS.models_dir,
+            np.float32,
+            [
+                -1,
+            ],
+            np.int64,
         )
     else:
         # Tests with models that accept fixed-shape input/output tensors
diff --git a/qa/common/gen_qa_identity_models.py b/qa/common/gen_qa_identity_models.py
index 277c5536ba4..7b7066dbb9f 100755
--- a/qa/common/gen_qa_identity_models.py
+++ b/qa/common/gen_qa_identity_models.py
@@ -762,7 +762,14 @@ def create_plan_dynamic_rf_modelfile(
 
 
 def create_plan_shape_tensor_modelfile(
-    models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
+    models_dir,
+    model_version,
+    io_cnt,
+    max_batch,
+    dtype,
+    shape,
+    profile_max_size,
+    shape_tensor_input_dtype,
 ):
     # Note that resize layer does not support int tensors.
     # The model takes two inputs (INPUT and DUMMY_INPUT)
@@ -785,10 +792,11 @@ def create_plan_shape_tensor_modelfile(
         dummy_shape = [-1] * shape_with_batchsize
 
     trt_dtype = np_to_trt_dtype(dtype)
+    trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype)
     trt_memory_format = trt.TensorFormat.LINEAR
     for io_num in range(io_cnt):
         in_node = network.add_input(
-            "INPUT{}".format(io_num), trt.int32, [shape_with_batchsize]
+            "INPUT{}".format(io_num), trt_shape_dtype, [shape_with_batchsize]
         )
         in_node.allowed_formats = 1 << int(trt_memory_format)
         dummy_in_node = network.add_input(
@@ -864,6 +872,7 @@ def create_plan_shape_tensor_modelfile(
     model_name = tu.get_zero_model_name(
         "plan_nobatch" if max_batch == 0 else "plan", io_cnt, dtype
     )
+    model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
     model_version_dir = os.path.join(models_dir, model_name, str(model_version))
     os.makedirs(model_version_dir, exist_ok=True)
 
@@ -941,7 +950,14 @@ def create_plan_dynamic_modelfile(
 
 
 def create_plan_modelconfig(
-    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
+    create_savedmodel,
+    models_dir,
+    model_version,
+    io_cnt,
+    max_batch,
+    dtype,
+    shape,
+    shape_tensor_input_dtype=None,
 ):
     if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
         return
@@ -954,6 +970,8 @@ def create_plan_modelconfig(
     if FLAGS.tensorrt_compat:
         model_name_base += "_compatible"
     model_name = tu.get_zero_model_name(model_name_base, io_cnt, dtype)
+    if shape_tensor_input_dtype:
+        model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
     config_dir = os.path.join(models_dir, model_name)
 
     if FLAGS.tensorrt_shape_io:
@@ -976,7 +994,7 @@ def create_plan_modelconfig(
   }},
   {{
     name: "INPUT{}"
-    data_type: TYPE_INT32
+    data_type: {}
     dims: [ {} ]
     is_shape_tensor: true
   }}
@@ -999,6 +1017,7 @@ def create_plan_modelconfig(
                 np_to_model_dtype(dtype),
                 shape_str,
                 io_num,
+                np_to_model_dtype(shape_tensor_input_dtype),
                 shape_tensor_dim,
                 io_num,
                 np_to_model_dtype(dtype),
@@ -1047,19 +1066,44 @@ def create_plan_modelconfig(
         cfile.write(config)
 
 
-def create_shape_tensor_models(models_dir, dtype, shape, io_cnt=1, no_batch=True):
+def create_shape_tensor_models(
+    models_dir, dtype, shape, shape_tensor_input_dtype, io_cnt=1, no_batch=True
+):
     model_version = 1
 
-    create_plan_modelconfig(True, models_dir, model_version, io_cnt, 8, dtype, shape)
+    create_plan_modelconfig(
+        True,
+        models_dir,
+        model_version,
+        io_cnt,
+        8,
+        dtype,
+        shape,
+        shape_tensor_input_dtype,
+    )
     create_plan_shape_tensor_modelfile(
-        models_dir, model_version, io_cnt, 8, dtype, shape, 32
+        models_dir, model_version, io_cnt, 8, dtype, shape, 32, shape_tensor_input_dtype
     )
     if no_batch:
         create_plan_modelconfig(
-            True, models_dir, model_version, io_cnt, 0, dtype, shape
+            True,
+            models_dir,
+            model_version,
+            io_cnt,
+            0,
+            dtype,
+            shape,
+            shape_tensor_input_dtype,
         )
         create_plan_shape_tensor_modelfile(
-            models_dir, model_version, io_cnt, 0, dtype, shape, 32
+            models_dir,
+            model_version,
+            io_cnt,
+            0,
+            dtype,
+            shape,
+            32,
+            shape_tensor_input_dtype,
         )
 
 
@@ -1286,7 +1330,12 @@ def create_models(models_dir, dtype, shape, io_cnt=1, no_batch=True):
     elif FLAGS.tensorrt_compat:
         create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=1, no_batch=False)
     elif FLAGS.tensorrt_shape_io:
-        create_shape_tensor_models(FLAGS.models_dir, np.float32, [-1, -1], io_cnt=1)
+        create_shape_tensor_models(
+            FLAGS.models_dir, np.float32, [-1, -1], np.int32, io_cnt=1
+        )
+        create_shape_tensor_models(
+            FLAGS.models_dir, np.float32, [-1, -1], np.int64, io_cnt=1
+        )
     else:
         create_models(FLAGS.models_dir, bool, [-1], io_cnt=1)
         create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=1)
diff --git a/qa/common/gen_qa_sequence_models.py b/qa/common/gen_qa_sequence_models.py
index 9bf63518f13..42557c7747b 100755
--- a/qa/common/gen_qa_sequence_models.py
+++ b/qa/common/gen_qa_sequence_models.py
@@ -296,7 +296,7 @@ def create_tf_modelconfig(
 
 
 def create_plan_shape_tensor_modelfile(
-    models_dir, model_version, max_batch, dtype, shape
+    models_dir, model_version, max_batch, dtype, shape, shape_tensor_input_dtype
 ):
     # Note that resize layer does not support int tensors.
     # The model takes two inputs (INPUT and SHAPE_INPUT)
@@ -308,6 +308,7 @@ def create_plan_shape_tensor_modelfile(
     # SHAPE_OUTPUT : The shape values of resized output
 
     trt_dtype = np_to_trt_dtype(dtype)
+    trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype)
     trt_memory_format = trt.TensorFormat.LINEAR
 
     TRT_LOGGER = trt.Logger(trt.Logger.INFO)
@@ -316,12 +317,12 @@ def create_plan_shape_tensor_modelfile(
 
     unit_shape = [1] * len(shape)
     if max_batch != 0:
-        shape_in0 = network.add_input("SHAPE_INPUT", trt.int32, [1 + len(shape)])
+        shape_in0 = network.add_input("SHAPE_INPUT", trt_shape_dtype, [1 + len(shape)])
         in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
         start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
         ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
     else:
-        shape_in0 = network.add_input("SHAPE_INPUT", trt.int32, [len(shape)])
+        shape_in0 = network.add_input("SHAPE_INPUT", trt_shape_dtype, [len(shape)])
         in0 = network.add_input("INPUT", trt_dtype, shape)
         start0 = network.add_input("START", trt_dtype, unit_shape)
         ready0 = network.add_input("READY", trt_dtype, unit_shape)
@@ -416,6 +417,7 @@ def create_plan_shape_tensor_modelfile(
     model_name = tu.get_sequence_model_name(
         "plan_nobatch" if max_batch == 0 else "plan", dtype
     )
+    model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
     model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
 
     try:
@@ -637,13 +639,18 @@ def create_plan_models(models_dir, model_version, max_batch, dtype, shape):
         create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape)
 
 
-def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape):
+def create_plan_modelconfig(
+    models_dir, model_version, max_batch, dtype, shape, shape_tensor_input_dtype=None
+):
     if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
         return
 
     model_name = tu.get_sequence_model_name(
         "plan_nobatch" if max_batch == 0 else "plan", dtype
     )
+    if shape_tensor_input_dtype:
+        model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
+
     config_dir = models_dir + "/" + model_name
     if FLAGS.tensorrt_shape_io:
         shape_tensor_dim = len(shape)
@@ -684,7 +691,7 @@ def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape):
 input [
   {{
     name: "SHAPE_INPUT"
-    data_type: TYPE_INT32
+    data_type: {}
     dims: [ {} ]
     is_shape_tensor: true
   }}
@@ -723,6 +730,7 @@ def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape):
             "int32" if dtype == np.int32 else "fp32",
             np_to_model_dtype(dtype),
             tu.shape_to_dims_str(shape),
+            np_to_model_dtype(shape_tensor_input_dtype),
             shape_tensor_dim,
             np_to_model_dtype(dtype),
             tu.shape_to_dims_str(shape),
@@ -1234,14 +1242,24 @@ def create_openvino_modelconfig(models_dir, model_version, max_batch, dtype, sha
         cfile.write(config)
 
 
-def create_shape_tensor_models(models_dir, dtype, shape, no_batch=True):
+def create_shape_tensor_models(
+    models_dir, dtype, shape, shape_tensor_input_dtype, no_batch=True
+):
     model_version = 1
 
-    create_plan_modelconfig(models_dir, model_version, 8, dtype, shape)
-    create_plan_shape_tensor_modelfile(models_dir, model_version, 8, dtype, shape)
+    create_plan_modelconfig(
+        models_dir, model_version, 8, dtype, shape, shape_tensor_input_dtype
+    )
+    create_plan_shape_tensor_modelfile(
+        models_dir, model_version, 8, dtype, shape, shape_tensor_input_dtype
+    )
     if no_batch:
-        create_plan_modelconfig(models_dir, model_version, 0, dtype, shape)
-        create_plan_shape_tensor_modelfile(models_dir, model_version, 0, dtype, shape)
+        create_plan_modelconfig(
+            models_dir, model_version, 0, dtype, shape, shape_tensor_input_dtype
+        )
+        create_plan_shape_tensor_modelfile(
+            models_dir, model_version, 0, dtype, shape, shape_tensor_input_dtype
+        )
 
 
 def create_models(models_dir, dtype, shape, no_batch=True):
@@ -1415,6 +1433,15 @@ def create_models(models_dir, dtype, shape, no_batch=True):
             [
                 -1,
             ],
+            np.int32,
+        )
+        create_shape_tensor_models(
+            FLAGS.models_dir,
+            np.float32,
+            [
+                -1,
+            ],
+            np.int64,
         )
     else:
         # Tests with models that accept fixed-shape input/output tensors
diff --git a/qa/common/infer_util.py b/qa/common/infer_util.py
index f724a90cad9..4e19b5733c8 100755
--- a/qa/common/infer_util.py
+++ b/qa/common/infer_util.py
@@ -729,6 +729,7 @@ def infer_shape_tensor(
     priority=0,
     timeout_us=0,
     batch_size=1,
+    shape_tensor_input_dtype=np.int32,
 ):
     # Lazy shm imports...
     if use_system_shared_memory:
@@ -784,7 +785,7 @@ def infer_shape_tensor(
         dummy_input_list.append(dummy_in0)
 
         # Prepare shape input tensor
-        in0 = np.asarray(input_shape_values[io_num], dtype=np.int32)
+        in0 = np.asarray(input_shape_values[io_num], dtype=shape_tensor_input_dtype)
         input_list.append(in0)
 
         # Prepare the expected value for the output. Skip dummy output as we
@@ -792,12 +793,14 @@ def infer_shape_tensor(
         expected_dict[output_name] = np.ndarray.copy(in0)
 
         # Only need to create region once
-        # FIXME DLIS-6653: Currently in our test cases we are
-        # using int32 inputs and int64 outputs for shape tensors
-        # hence there is a multiple of 2 to compute the byte size
-        # properly.
-        input_byte_size = in0.size * np.dtype(np.int32).itemsize
-        output_byte_size = input_byte_size * batch_size * 2
+        input_byte_size = in0.size * np.dtype(shape_tensor_input_dtype).itemsize
+        output_byte_size = input_byte_size * batch_size
+        if shape_tensor_input_dtype == np.int32:
+            # Currently in our test cases we are
+            # using int64 outputs for shape tensors
+            # hence there is a multiple of 2 to compute the byte size
+            # properly.
+            output_byte_size = output_byte_size * 2
         if use_system_shared_memory:
             input_shm_handle_list.append(
                 (
@@ -827,6 +830,7 @@ def infer_shape_tensor(
             )
 
     model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype)
+    model_name = model_name + "_" + np.dtype(shape_tensor_input_dtype).name
     # Run inference and check results for each config
     for config in configs:
         client_utils = grpcclient if config[1] == "grpc" else httpclient
@@ -850,7 +854,11 @@ def infer_shape_tensor(
                 )
             )
             inputs.append(
-                client_utils.InferInput(input_name, input_list[io_num].shape, "INT32")
+                client_utils.InferInput(
+                    input_name,
+                    input_list[io_num].shape,
+                    np_to_triton_dtype(shape_tensor_input_dtype),
+                )
             )
             outputs.append(client_utils.InferRequestedOutput(dummy_output_name))
             outputs.append(client_utils.InferRequestedOutput(output_name))
@@ -896,13 +904,30 @@ def infer_shape_tensor(
             if error is not None:
                 raise error
         else:
-            results = triton_client.infer(
-                model_name,
-                inputs,
-                outputs=outputs,
-                priority=priority,
-                timeout=timeout_us,
-            )
+            try:
+                results = triton_client.infer(
+                    model_name,
+                    inputs,
+                    outputs=outputs,
+                    priority=priority,
+                    timeout=timeout_us,
+                )
+            except Exception as e:
+                if use_system_shared_memory:
+                    for io_num in range(io_cnt):
+                        shm.destroy_shared_memory_region(
+                            input_shm_handle_list[io_num][0]
+                        )
+                        triton_client.unregister_system_shared_memory(
+                            f"INPUT{io_num}" + shm_suffix
+                        )
+                        shm.destroy_shared_memory_region(
+                            output_shm_handle_list[io_num][0]
+                        )
+                        triton_client.unregister_system_shared_memory(
+                            f"OUTPUT{io_num}" + shm_suffix
+                        )
+                raise e
 
         for io_num in range(io_cnt):
             output_name = "OUTPUT{}".format(io_num)
@@ -919,8 +944,8 @@ def infer_shape_tensor(
                     output_shape = output.shape
                 else:
                     output_shape = output["shape"]
-                # FIXME DLIS-6653: Currently in our test cases we are
-                # using int32 inputs and int64 outputs for shape tensors
+                # Currently in our test cases we are
+                # using int64 outputs for shape tensors
                 # hence passing int64 as datatype.
                 out = shm.get_contents_as_numpy(
                     output_shm_handle_list[io_num][0], np.int64, output_shape
diff --git a/qa/common/sequence_util.py b/qa/common/sequence_util.py
index b331a7572f1..1b2560538d8 100755
--- a/qa/common/sequence_util.py
+++ b/qa/common/sequence_util.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -195,7 +195,13 @@ def precreate_register_regions(
 
     # Returns (name, byte size, shm_handle)
     def precreate_register_shape_tensor_regions(
-        self, value_list, dtype, i, batch_size=1, tensor_shape=(1,)
+        self,
+        value_list,
+        dtype,
+        i,
+        batch_size=1,
+        tensor_shape=(1,),
+        shape_tensor_input_dtype=np.int32,
     ):
         self.assertFalse(
             _test_cuda_shared_memory,
@@ -220,7 +226,7 @@ def precreate_register_shape_tensor_regions(
 
                 # Only one shape tensor input per batch
                 shape_input_list.append(
-                    np.full(tensor_shape, shape_value, dtype=np.int32)
+                    np.full(tensor_shape, shape_value, dtype=shape_tensor_input_dtype)
                 )
 
                 if dtype == np.object_:
@@ -233,11 +239,13 @@ def precreate_register_shape_tensor_regions(
                     input_byte_size = sum([i0.nbytes for i0 in input_list_tmp])
 
                 shape_input_byte_size = sum([i0.nbytes for i0 in shape_input_list])
-                # FIXME DLIS-6653: Currently in our test cases we are
-                # using int32 inputs and int64 outputs for shape tensors
-                # hence there is a multiple of 2 to compute the byte size
-                # properly.
-                shape_output_byte_size = shape_input_byte_size * 2
+                shape_output_byte_size = shape_input_byte_size
+                if shape_tensor_input_dtype == np.int32:
+                    # Currently in our test cases we are
+                    # using int64 outputs for shape tensors
+                    # hence there is a multiple of 2 to compute the byte size
+                    # properly.
+                    shape_output_byte_size = shape_output_byte_size * 2
                 output_byte_size = np.dtype(dtype).itemsize + 2
                 resized_output_byte_size = 32 * shape_value
 
@@ -298,7 +306,13 @@ def precreate_register_shape_tensor_regions(
 
     # Returns (name, byte size, shm_handle)
     def precreate_register_dynaseq_shape_tensor_regions(
-        self, value_list, dtype, i, batch_size=1, tensor_shape=(1,)
+        self,
+        value_list,
+        dtype,
+        i,
+        batch_size=1,
+        tensor_shape=(1,),
+        shape_tensor_input_dtype=np.int32,
     ):
         self.assertFalse(
             _test_cuda_shared_memory,
@@ -326,7 +340,7 @@ def precreate_register_dynaseq_shape_tensor_regions(
 
                 # Only one shape tensor input per batch
                 shape_input_list.append(
-                    np.full(tensor_shape, shape_value, dtype=np.int32)
+                    np.full(tensor_shape, shape_value, dtype=shape_tensor_input_dtype)
                 )
 
                 if dtype == np.object_:
@@ -341,11 +355,13 @@ def precreate_register_dynaseq_shape_tensor_regions(
                 dummy_input_byte_size = sum([i0.nbytes for i0 in dummy_input_list])
 
                 shape_input_byte_size = sum([i0.nbytes for i0 in shape_input_list])
-                # FIXME DLIS-6653: Currently in our test cases we are
-                # using int32 inputs and int64 outputs for shape tensors
-                # hence there is a multiple of 2 to compute the byte size
-                # properly.
-                shape_output_byte_size = shape_input_byte_size * 2
+                shape_output_byte_size = shape_input_byte_size
+                if shape_tensor_input_dtype == np.int32:
+                    # Currently in our test cases we are
+                    # using int64 outputs for shape tensors
+                    # hence there is a multiple of 2 to compute the byte size
+                    # properly.
+                    shape_output_byte_size = shape_output_byte_size * 2
                 output_byte_size = np.dtype(np.int32).itemsize + 2
                 resized_output_byte_size = 32 * shape_value
 
@@ -894,6 +910,7 @@ def check_sequence_shape_tensor_io(
         shm_region_handles,
         using_dynamic_batcher=False,
         sequence_name="<unknown>",
+        shape_tensor_input_dtype=np.int32,
     ):
         """Perform sequence of inferences using async run. The 'values' holds
         a list of tuples, one for each inference with format:
@@ -943,7 +960,9 @@ def check_sequence_shape_tensor_io(
                 )
                 inputs.append(
                     client_utils.InferInput(
-                        "SHAPE_INPUT", shape_tensor_shape, np_to_triton_dtype(np.int32)
+                        "SHAPE_INPUT",
+                        shape_tensor_shape,
+                        np_to_triton_dtype(shape_tensor_input_dtype),
                     )
                 )
                 if using_dynamic_batcher:
@@ -959,7 +978,9 @@ def check_sequence_shape_tensor_io(
 
                 # Set IO values
                 shape_values.append(
-                    np.full(shape_tensor_shape, shape_value, dtype=np.int32)
+                    np.full(
+                        shape_tensor_shape, shape_value, dtype=shape_tensor_input_dtype
+                    )
                 )
                 if not _test_system_shared_memory:
                     if using_dynamic_batcher: