Skip to content

Commit

Permalink
Merge branch 'main' of github.com:triton-inference-server/server into…
Browse files Browse the repository at this point in the history
… yinggeh-DLIS-6658-input-byte-size-tests
  • Loading branch information
yinggeh committed Jun 25, 2024
2 parents 92c0422 + d5eb467 commit 1039879
Show file tree
Hide file tree
Showing 12 changed files with 1,026 additions and 722 deletions.
10 changes: 7 additions & 3 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,9 +1082,9 @@ def create_dockerfile_linux(
"""
if "tensorrtllm" in backends:
df += """
# Remove TRT contents that are not needed in runtime
RUN apt-get update && apt-get install -y libcudnn8-dev && ldconfig
RUN ldconfig
# Remove contents that are not needed in runtime
RUN ARCH="$(uname -i)" \\
&& rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data \\
&& rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python \\
Expand All @@ -1094,10 +1094,14 @@ def create_dockerfile_linux(
RUN python3 -m pip install --upgrade pip \\
&& pip3 install transformers
# Install TensorRT-LLM
# ldconfig for TRT-LLM
RUN find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf
RUN find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf
# Setuptools has breaking changes in version 70.0.0, so fix it to 69.5.1
# The generated code in grpc_service_pb2_grpc.py depends on grpcio>=1.64.0, so fix it to 1.64.0
RUN pip3 install setuptools==69.5.1 grpcio-tools==1.64.0
ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
"""
with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
Expand Down
2 changes: 1 addition & 1 deletion qa/L0_model_config/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ for modelpath in \
autofill_noplatform/tensorrt/mixed_batch_hint_shape_values/1 \
autofill_noplatform_success/tensorrt/no_config_shape_tensor/1 ; do
mkdir -p $modelpath
cp /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32/1/model.plan \
cp /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32/1/model.plan \
$modelpath/.
done

Expand Down
4 changes: 2 additions & 2 deletions qa/L0_perf_analyzer/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphde
cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_int32_int32_float32 $DATADIR/

# Copy shape tensor models
cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32 $DATADIR/
cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32 $DATADIR/

# Copying ensemble including a sequential model
cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/savedmodel_sequence_object $DATADIR
Expand Down Expand Up @@ -564,7 +564,7 @@ for PROTOCOL in grpc http; do
# Shape tensor I/O model (server needs the shape tensor on the CPU)
for SHARED_MEMORY_TYPE in none system; do
set +e
$PERF_ANALYZER -v -i $PROTOCOL -m plan_zero_1_float32 --input-data=$SHAPETENSORADTAFILE \
$PERF_ANALYZER -v -i $PROTOCOL -m plan_zero_1_float32_int32 --input-data=$SHAPETENSORADTAFILE \
--shape DUMMY_INPUT0:4,4 -p2000 --shared-memory=$SHARED_MEMORY_TYPE -b 8 -s ${STABILITY_THRESHOLD} \
>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
Expand Down
6 changes: 3 additions & 3 deletions qa/L0_perf_analyzer_capi/test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -73,7 +73,7 @@ cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphde
cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_int32_int32_float32 $DATADIR/

# Copy shape tensor models
cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32 $DATADIR/
cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32 $DATADIR/

# Copying ensemble including a sequential model
cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/savedmodel_sequence_object $DATADIR
Expand Down Expand Up @@ -201,7 +201,7 @@ if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
fi

# Shape tensor I/O model (server needs the shape tensor on the CPU)
$PERF_ANALYZER -v -m plan_zero_1_float32 --input-data=$SHAPETENSORADTAFILE \
$PERF_ANALYZER -v -m plan_zero_1_float32_int32 --input-data=$SHAPETENSORADTAFILE \
--shape DUMMY_INPUT0:4,4 -p2000 -b 8 \
--service-kind=triton_c_api --model-repository=$DATADIR \
--triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
Expand Down
21 changes: 13 additions & 8 deletions qa/L0_trt_shape_tensors/test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -127,10 +127,13 @@ else
fi

# Prepare the config file for dynamic batching tests
CONFIG_FILE="models/plan_zero_1_float32/config.pbtxt"
sed -i "s/^max_batch_size:.*/max_batch_size: 8/" $CONFIG_FILE && \
sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" $CONFIG_FILE && \
echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> $CONFIG_FILE
for dtype in int32 int64; do
CONFIG_FILE="models/plan_zero_1_float32_${dtype}/config.pbtxt"
sed -i "s/^max_batch_size:.*/max_batch_size: 8/" "$CONFIG_FILE"
sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" "$CONFIG_FILE"
echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >>"$CONFIG_FILE"
done

for i in \
test_dynamic_different_shape_values \
test_dynamic_identical_shape_values; do
Expand Down Expand Up @@ -202,9 +205,11 @@ for i in \
done

# Prepare the config file for dynamic sequence batching tests
CONFIG_FILE="models/plan_dyna_sequence_float32/config.pbtxt"
sed -i "s/max_candidate_sequences:.*/max_candidate_sequences:4/" $CONFIG_FILE && \
sed -i "s/max_queue_delay_microseconds:.*/max_queue_delay_microseconds:5000000/" $CONFIG_FILE
for dtype in int32 int64; do
CONFIG_FILE="models/plan_dyna_sequence_float32_${dtype}/config.pbtxt"
sed -i "s/max_candidate_sequences:.*/max_candidate_sequences:4/" "$CONFIG_FILE"
sed -i "s/max_queue_delay_microseconds:.*/max_queue_delay_microseconds:5000000/" "$CONFIG_FILE"
done

export NO_BATCHING=0

Expand Down
Loading

0 comments on commit 1039879

Please sign in to comment.