Merge branch 'main' of github.com:triton-inference-server/server into…

… yinggeh-DLIS-6658-input-byte-size-tests
triton-inference-server · Jun 25, 2024 · 1039879 · 1039879
2 parents 92c0422 + d5eb467
commit 1039879
Show file tree

Hide file tree

Showing 12 changed files with 1,026 additions and 722 deletions.
diff --git a/build.py b/build.py
@@ -1082,9 +1082,9 @@ def create_dockerfile_linux(
 """
     if "tensorrtllm" in backends:
         df += """
-# Remove TRT contents that are not needed in runtime
-RUN apt-get update && apt-get install -y libcudnn8-dev && ldconfig
 
+RUN ldconfig
+# Remove contents that are not needed in runtime
 RUN ARCH="$(uname -i)" \\
       && rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data \\
       && rm -fr  ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python \\
@@ -1094,10 +1094,14 @@ def create_dockerfile_linux(
 RUN python3 -m pip install --upgrade pip \\
       && pip3 install transformers
 
-# Install TensorRT-LLM
+# ldconfig for TRT-LLM
 RUN find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf
 RUN find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf
 
+# Setuptools has breaking changes in version 70.0.0, so fix it to 69.5.1
+# The generated code in grpc_service_pb2_grpc.py depends on grpcio>=1.64.0, so fix it to 1.64.0
+RUN pip3 install setuptools==69.5.1 grpcio-tools==1.64.0
+
 ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
 """
     with open(os.path.join(ddir, dockerfile_name), "w") as dfile:

diff --git a/qa/L0_model_config/test.sh b/qa/L0_model_config/test.sh
@@ -88,7 +88,7 @@ for modelpath in \
         autofill_noplatform/tensorrt/mixed_batch_hint_shape_values/1 \
         autofill_noplatform_success/tensorrt/no_config_shape_tensor/1 ; do
     mkdir -p $modelpath
-    cp /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32/1/model.plan \
+    cp /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32/1/model.plan \
        $modelpath/.
 done
 

diff --git a/qa/L0_perf_analyzer/test.sh b/qa/L0_perf_analyzer/test.sh
@@ -93,7 +93,7 @@ cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphde
 cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_int32_int32_float32 $DATADIR/
 
 # Copy shape tensor models
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32 $DATADIR/
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32 $DATADIR/
 
 # Copying ensemble including a sequential model
 cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/savedmodel_sequence_object $DATADIR
@@ -564,7 +564,7 @@ for PROTOCOL in grpc http; do
     # Shape tensor I/O model (server needs the shape tensor on the CPU)
     for SHARED_MEMORY_TYPE in none system; do
         set +e
-        $PERF_ANALYZER -v -i $PROTOCOL -m plan_zero_1_float32 --input-data=$SHAPETENSORADTAFILE \
+        $PERF_ANALYZER -v -i $PROTOCOL -m plan_zero_1_float32_int32 --input-data=$SHAPETENSORADTAFILE \
     --shape DUMMY_INPUT0:4,4 -p2000 --shared-memory=$SHARED_MEMORY_TYPE -b 8 -s ${STABILITY_THRESHOLD} \
     >$CLIENT_LOG 2>&1
         if [ $? -ne 0 ]; then

diff --git a/qa/L0_perf_analyzer_capi/test.sh b/qa/L0_perf_analyzer_capi/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -73,7 +73,7 @@ cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphde
 cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_int32_int32_float32 $DATADIR/
 
 # Copy shape tensor models
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32 $DATADIR/
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32 $DATADIR/
 
 # Copying ensemble including a sequential model
 cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/savedmodel_sequence_object $DATADIR
@@ -201,7 +201,7 @@ if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
 fi
 
 # Shape tensor I/O model (server needs the shape tensor on the CPU)
-$PERF_ANALYZER -v -m plan_zero_1_float32 --input-data=$SHAPETENSORADTAFILE \
+$PERF_ANALYZER -v -m plan_zero_1_float32_int32 --input-data=$SHAPETENSORADTAFILE \
 --shape DUMMY_INPUT0:4,4 -p2000 -b 8 \
 --service-kind=triton_c_api --model-repository=$DATADIR \
 --triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \

diff --git a/qa/L0_trt_shape_tensors/test.sh b/qa/L0_trt_shape_tensors/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -127,10 +127,13 @@ else
 fi
 
 # Prepare the config file for dynamic batching tests
-CONFIG_FILE="models/plan_zero_1_float32/config.pbtxt"
-sed -i "s/^max_batch_size:.*/max_batch_size: 8/" $CONFIG_FILE && \
-sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" $CONFIG_FILE && \
-                echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> $CONFIG_FILE
+for dtype in int32 int64; do
+    CONFIG_FILE="models/plan_zero_1_float32_${dtype}/config.pbtxt"
+    sed -i "s/^max_batch_size:.*/max_batch_size: 8/" "$CONFIG_FILE"
+    sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" "$CONFIG_FILE"
+    echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >>"$CONFIG_FILE"
+done
+
 for i in \
             test_dynamic_different_shape_values \
             test_dynamic_identical_shape_values; do
@@ -202,9 +205,11 @@ for i in \
     done
 
 # Prepare the config file for dynamic sequence batching tests
-CONFIG_FILE="models/plan_dyna_sequence_float32/config.pbtxt"
-sed -i "s/max_candidate_sequences:.*/max_candidate_sequences:4/" $CONFIG_FILE && \
-sed -i "s/max_queue_delay_microseconds:.*/max_queue_delay_microseconds:5000000/" $CONFIG_FILE
+for dtype in int32 int64; do
+    CONFIG_FILE="models/plan_dyna_sequence_float32_${dtype}/config.pbtxt"
+    sed -i "s/max_candidate_sequences:.*/max_candidate_sequences:4/" "$CONFIG_FILE"
+    sed -i "s/max_queue_delay_microseconds:.*/max_queue_delay_microseconds:5000000/" "$CONFIG_FILE"
+done
 
 export NO_BATCHING=0