From 2f4bfae460a7026a7ec3ca23890a6999053e97da Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Mon, 3 Jun 2024 20:01:41 -0700 Subject: [PATCH 01/10] Fix byte size handling for raw binary requests where size and data are split across buffers --- src/infer_request.cc | 98 +++++++++++++++++++++++++++++++------------- 1 file changed, 70 insertions(+), 28 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index 0c85051ff..34322294d 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -1269,47 +1269,89 @@ InferenceRequest::ValidateBytesInputs( const std::string& input_id, const Input& input, int64_t* const expected_byte_size) const { + *expected_byte_size = 0; const auto& input_dims = input.ShapeWithBatchDim(); + int64_t element_count = triton::common::GetElementCount(input_dims); int64_t element_idx = 0; - *expected_byte_size = 0; - for (size_t i = 0; i < input.Data()->BufferCount(); ++i) { - size_t content_byte_size; - TRITONSERVER_MemoryType content_memory_type; - int64_t content_memory_id; - const char* content = input.Data()->BufferAt( - i, &content_byte_size, &content_memory_type, &content_memory_id); - - while (content_byte_size >= sizeof(uint32_t)) { - if (element_idx >= element_count) { - return Status( - Status::Code::INVALID_ARG, - LogRequest() + "unexpected number of string elements " + - std::to_string(element_idx + 1) + " for inference input '" + - input_id + "', expecting " + std::to_string(element_count)); + size_t remaining_element_size = 0; + + size_t buffer_idx = 0; + const size_t buffer_count = input.Data()->BufferCount(); + + const char* buffer = nullptr; + size_t remaining_buffer_size = 0; + TRITONSERVER_MemoryType buffer_memory_type; + int64_t buffer_memory_id; + + // Validate elements until all buffers have been fully processed. + while (remaining_buffer_size || buffer_idx < buffer_count) { + // Get the next buffer if not currently processing one. + if (!remaining_buffer_size) { + // Reset remaining buffer size and pointers for next buffer. + buffer = input.Data()->BufferAt( + buffer_idx++, &remaining_buffer_size, &buffer_memory_type, + &buffer_memory_id); + *expected_byte_size += remaining_buffer_size; + + // FIXME: Handle GPU buffers + if (buffer_memory_type == TRITONSERVER_MEMORY_GPU) { + LOG_WARNING << "Validation of GPU byte size buffers is not implemented " + "yet, skipping to the next buffer."; + buffer = nullptr; + remaining_buffer_size = 0; + continue; } + } - const uint32_t len = *(reinterpret_cast(content)); - content += sizeof(uint32_t); - content_byte_size -= sizeof(uint32_t); - *expected_byte_size += sizeof(uint32_t); - - if (content_byte_size < len) { + constexpr size_t kElementSizeIndicator = sizeof(uint32_t); + // Get the next element if not currently processing one. + if (!remaining_element_size) { + // FIXME: Assume the string element's byte size indicator is not spread + // across buffer boundaries for simplicity. + if (remaining_buffer_size < kElementSizeIndicator) { return Status( Status::Code::INVALID_ARG, - LogRequest() + "incomplete string data for inference input '" + - input_id + "', expecting string of length " + - std::to_string(len) + " but only " + - std::to_string(content_byte_size) + " bytes available"); + LogRequest() + + "Element byte size indicator exceeds the end of the buffer."); } - content += len; - content_byte_size -= len; - *expected_byte_size += len; + // Start the next element and reset the remaining element size. + remaining_element_size = *(reinterpret_cast(buffer)); element_idx++; + + // Advance pointer and remainder by the indicator size. + buffer += kElementSizeIndicator; + remaining_buffer_size -= kElementSizeIndicator; + } + + // If the remaining buffer fits it: consume the rest of the element, proceed + // to the next element. + if (remaining_buffer_size >= remaining_element_size) { + buffer += remaining_element_size; + remaining_buffer_size -= remaining_element_size; + remaining_element_size = 0; + } + // Otherwise the remaining element is larger: consume the rest of the + // buffer, proceed to the next buffer. + else { + remaining_element_size -= remaining_buffer_size; + remaining_buffer_size = 0; } } + // Validate the number of processed buffers exactly match expectations. + if (buffer_idx != buffer_count) { + return Status( + Status::Code::INVALID_ARG, + LogRequest() + "expected to process " + std::to_string(buffer_count) + + " buffers for inference input '" + input_id + "', only processed " + + std::to_string(buffer_idx)); + } + + // FIXME: If the input contains GPU buffers that get skipped, the element + // count will likely not match expectations. + // Validate the number of processed elements exactly match expectations. if (element_idx != element_count) { return Status( Status::Code::INVALID_ARG, From 0a79085e98e8eecd149fa21b952885d11bb0ebd7 Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Mon, 3 Jun 2024 20:27:53 -0700 Subject: [PATCH 02/10] Be more explicit about unhandled case and just skip validation for GPU buffers --- src/infer_request.cc | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index 34322294d..fb7dc14d5 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -1186,6 +1186,11 @@ InferenceRequest::Normalize() if (data_type == inference::DataType::TYPE_STRING) { RETURN_IF_ERROR( ValidateBytesInputs(input_id, input, &expected_byte_size)); + // FIXME: -1 is used as a signal to skip total byte size validation for + // unhandled cases in ValidateBytesInputs. + if (expected_byte_size == -1) { + return Status::Success; + } } else { expected_byte_size = triton::common::GetByteSize(data_type, input_dims); } @@ -1294,13 +1299,15 @@ InferenceRequest::ValidateBytesInputs( &buffer_memory_id); *expected_byte_size += remaining_buffer_size; - // FIXME: Handle GPU buffers + // FIXME: Skip GPU buffers for now, return an expected_byte_size of -1 as + // a signal to skip validation. if (buffer_memory_type == TRITONSERVER_MEMORY_GPU) { - LOG_WARNING << "Validation of GPU byte size buffers is not implemented " - "yet, skipping to the next buffer."; - buffer = nullptr; - remaining_buffer_size = 0; - continue; + LOG_WARNING << LogRequest() + << "Validation of GPU byte size buffers is not implemented " + "yet, skipping validation for input: " + << input_id; + *expected_byte_size = -1; + return Status::Success; } } @@ -1349,8 +1356,6 @@ InferenceRequest::ValidateBytesInputs( std::to_string(buffer_idx)); } - // FIXME: If the input contains GPU buffers that get skipped, the element - // count will likely not match expectations. // Validate the number of processed elements exactly match expectations. if (element_idx != element_count) { return Status( From 70220019b67109bb87809c71a5f0208e0ba5a6f2 Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Tue, 4 Jun 2024 15:37:19 -0700 Subject: [PATCH 03/10] Remove warning log, don't return early --- src/infer_request.cc | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index fb7dc14d5..b280e5593 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -1180,6 +1180,7 @@ InferenceRequest::Normalize() const auto& data_type = input.DType(); const auto& input_dims = input.ShapeWithBatchDim(); int64_t expected_byte_size = INT_MAX; + bool skip_byte_size_check = false; // Because Triton expects STRING type to be in special format // (prepend 4 bytes to specify string length), so need to add all the // first 4 bytes for each element to find expected byte size @@ -1188,14 +1189,15 @@ InferenceRequest::Normalize() ValidateBytesInputs(input_id, input, &expected_byte_size)); // FIXME: -1 is used as a signal to skip total byte size validation for // unhandled cases in ValidateBytesInputs. - if (expected_byte_size == -1) { - return Status::Success; - } + skip_byte_size_check = (expected_byte_size == -1); } else { expected_byte_size = triton::common::GetByteSize(data_type, input_dims); } - if ((byte_size > INT_MAX) || - (static_cast(byte_size) != expected_byte_size)) { + + bool byte_size_valid = + (byte_size > INT_MAX) || + (static_cast(byte_size) != expected_byte_size); + if (!skip_byte_size_check && byte_size_valid) { return Status( Status::Code::INVALID_ARG, LogRequest() + "input byte size mismatch for input '" + input_id + @@ -1302,10 +1304,6 @@ InferenceRequest::ValidateBytesInputs( // FIXME: Skip GPU buffers for now, return an expected_byte_size of -1 as // a signal to skip validation. if (buffer_memory_type == TRITONSERVER_MEMORY_GPU) { - LOG_WARNING << LogRequest() - << "Validation of GPU byte size buffers is not implemented " - "yet, skipping validation for input: " - << input_id; *expected_byte_size = -1; return Status::Success; } From ef9daa26866d7f1d2b3fab040b081d5e7c103818 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Tue, 4 Jun 2024 19:30:32 -0700 Subject: [PATCH 04/10] Update input calls --- src/infer_request.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index b280e5593..0ab0a2970 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -1284,7 +1284,7 @@ InferenceRequest::ValidateBytesInputs( size_t remaining_element_size = 0; size_t buffer_idx = 0; - const size_t buffer_count = input.Data()->BufferCount(); + const size_t buffer_count = input.DataBufferCount(); const char* buffer = nullptr; size_t remaining_buffer_size = 0; @@ -1296,9 +1296,9 @@ InferenceRequest::ValidateBytesInputs( // Get the next buffer if not currently processing one. if (!remaining_buffer_size) { // Reset remaining buffer size and pointers for next buffer. - buffer = input.Data()->BufferAt( - buffer_idx++, &remaining_buffer_size, &buffer_memory_type, - &buffer_memory_id); + RETURN_IF_ERROR(input.DataBuffer( + buffer_idx++, (const void**)(&buffer), &remaining_buffer_size, + &buffer_memory_type, &buffer_memory_id)); *expected_byte_size += remaining_buffer_size; // FIXME: Skip GPU buffers for now, return an expected_byte_size of -1 as From 6a171fcef1b99406481e061c077e5af717ca747b Mon Sep 17 00:00:00 2001 From: Yingge He Date: Wed, 5 Jun 2024 01:23:07 -0700 Subject: [PATCH 05/10] update function ValidateBytesInputs --- src/infer_request.cc | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index 0ab0a2970..0934fc8fd 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -1301,6 +1301,13 @@ InferenceRequest::ValidateBytesInputs( &buffer_memory_type, &buffer_memory_id)); *expected_byte_size += remaining_buffer_size; + if (buffer_idx > buffer_count) { + return Status( + Status::Code::INVALID_ARG, + LogRequest() + + "element strings exceed the end of the last buffer."); + } + // FIXME: Skip GPU buffers for now, return an expected_byte_size of -1 as // a signal to skip validation. if (buffer_memory_type == TRITONSERVER_MEMORY_GPU) { @@ -1318,7 +1325,7 @@ InferenceRequest::ValidateBytesInputs( return Status( Status::Code::INVALID_ARG, LogRequest() + - "Element byte size indicator exceeds the end of the buffer."); + "element byte size indicator exceeds the end of the buffer."); } // Start the next element and reset the remaining element size. @@ -1349,8 +1356,8 @@ InferenceRequest::ValidateBytesInputs( if (buffer_idx != buffer_count) { return Status( Status::Code::INVALID_ARG, - LogRequest() + "expected to process " + std::to_string(buffer_count) + - " buffers for inference input '" + input_id + "', only processed " + + LogRequest() + "expected " + std::to_string(buffer_count) + + " buffers for inference input '" + input_id + "', got " + std::to_string(buffer_idx)); } @@ -1359,7 +1366,7 @@ InferenceRequest::ValidateBytesInputs( return Status( Status::Code::INVALID_ARG, LogRequest() + "expected " + std::to_string(element_count) + - " strings for inference input '" + input_id + "', got " + + " string elements for inference input '" + input_id + "', got " + std::to_string(element_idx)); } From e8656fa87664388d81a2c08f0e73bac6be767f5d Mon Sep 17 00:00:00 2001 From: Yingge He Date: Wed, 5 Jun 2024 08:43:54 -0700 Subject: [PATCH 06/10] skip input byte-size checks for TensorRT --- src/infer_request.cc | 66 +++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index 0934fc8fd..9d98212a0 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -1180,30 +1180,40 @@ InferenceRequest::Normalize() const auto& data_type = input.DType(); const auto& input_dims = input.ShapeWithBatchDim(); int64_t expected_byte_size = INT_MAX; + + // Skip byte size validation for TensorRT backend because it breaks + // shape-size assumption bool skip_byte_size_check = false; - // Because Triton expects STRING type to be in special format - // (prepend 4 bytes to specify string length), so need to add all the - // first 4 bytes for each element to find expected byte size - if (data_type == inference::DataType::TYPE_STRING) { - RETURN_IF_ERROR( - ValidateBytesInputs(input_id, input, &expected_byte_size)); - // FIXME: -1 is used as a signal to skip total byte size validation for - // unhandled cases in ValidateBytesInputs. - skip_byte_size_check = (expected_byte_size == -1); - } else { - expected_byte_size = triton::common::GetByteSize(data_type, input_dims); - } + constexpr char trt_prefix[] = "tensorrt_"; + const std::string& platform = model_raw_->Config().platform(); + skip_byte_size_check |= (platform.rfind(trt_prefix) == 0); + + if (!skip_byte_size_check) { + // Because Triton expects STRING type to be in special format + // (prepend 4 bytes to specify string length), so need to add all the + // first 4 bytes for each element to find expected byte size + if (data_type == inference::DataType::TYPE_STRING) { + RETURN_IF_ERROR( + ValidateBytesInputs(input_id, input, &expected_byte_size)); + // FIXME: -1 is used as a signal to skip total byte size validation + // for unhandled cases in ValidateBytesInputs. + skip_byte_size_check |= (expected_byte_size == -1); + } else { + expected_byte_size = + triton::common::GetByteSize(data_type, input_dims); + } - bool byte_size_valid = - (byte_size > INT_MAX) || - (static_cast(byte_size) != expected_byte_size); - if (!skip_byte_size_check && byte_size_valid) { - return Status( - Status::Code::INVALID_ARG, - LogRequest() + "input byte size mismatch for input '" + input_id + - "' for model '" + ModelName() + "'. Expected " + - std::to_string(expected_byte_size) + ", got " + - std::to_string(byte_size)); + bool byte_size_valid = + (byte_size > INT_MAX) || + (static_cast(byte_size) != expected_byte_size); + if (!skip_byte_size_check && byte_size_valid) { + return Status( + Status::Code::INVALID_ARG, + LogRequest() + "input byte size mismatch for input '" + input_id + + "' for model '" + ModelName() + "'. Expected " + + std::to_string(expected_byte_size) + ", got " + + std::to_string(byte_size)); + } } } } @@ -1283,7 +1293,7 @@ InferenceRequest::ValidateBytesInputs( int64_t element_idx = 0; size_t remaining_element_size = 0; - size_t buffer_idx = 0; + size_t buffer_next_idx = 0; const size_t buffer_count = input.DataBufferCount(); const char* buffer = nullptr; @@ -1292,16 +1302,16 @@ InferenceRequest::ValidateBytesInputs( int64_t buffer_memory_id; // Validate elements until all buffers have been fully processed. - while (remaining_buffer_size || buffer_idx < buffer_count) { + while (remaining_buffer_size || buffer_next_idx < buffer_count) { // Get the next buffer if not currently processing one. if (!remaining_buffer_size) { // Reset remaining buffer size and pointers for next buffer. RETURN_IF_ERROR(input.DataBuffer( - buffer_idx++, (const void**)(&buffer), &remaining_buffer_size, + buffer_next_idx++, (const void**)(&buffer), &remaining_buffer_size, &buffer_memory_type, &buffer_memory_id)); *expected_byte_size += remaining_buffer_size; - if (buffer_idx > buffer_count) { + if (buffer_next_idx > buffer_count) { return Status( Status::Code::INVALID_ARG, LogRequest() + @@ -1353,12 +1363,12 @@ InferenceRequest::ValidateBytesInputs( } // Validate the number of processed buffers exactly match expectations. - if (buffer_idx != buffer_count) { + if (buffer_next_idx != buffer_count) { return Status( Status::Code::INVALID_ARG, LogRequest() + "expected " + std::to_string(buffer_count) + " buffers for inference input '" + input_id + "', got " + - std::to_string(buffer_idx)); + std::to_string(buffer_next_idx)); } // Validate the number of processed elements exactly match expectations. From a40429b33684d33495bdff77225a6d9f40b7b252 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Wed, 5 Jun 2024 10:17:38 -0700 Subject: [PATCH 07/10] Rename byte_size_valid to byte_size_invalid --- src/infer_request.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index 9d98212a0..e16cf34d7 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -1203,10 +1203,10 @@ InferenceRequest::Normalize() triton::common::GetByteSize(data_type, input_dims); } - bool byte_size_valid = + bool byte_size_invalid = (byte_size > INT_MAX) || (static_cast(byte_size) != expected_byte_size); - if (!skip_byte_size_check && byte_size_valid) { + if (!skip_byte_size_check && byte_size_invalid) { return Status( Status::Code::INVALID_ARG, LogRequest() + "input byte size mismatch for input '" + input_id + From 6037ab85200f3314335741fa8981f5f4ae8a5bc7 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Thu, 6 Jun 2024 11:51:02 -0700 Subject: [PATCH 08/10] Minor updates --- src/infer_request.cc | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index e16cf34d7..a0a8d4b53 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -1181,23 +1181,26 @@ InferenceRequest::Normalize() const auto& input_dims = input.ShapeWithBatchDim(); int64_t expected_byte_size = INT_MAX; - // Skip byte size validation for TensorRT backend because it breaks - // shape-size assumption + // FIXME: Skip byte size validation for TensorRT backend because it breaks + // shape-size assumption. See DLIS-6805 for proper fix for TRT backend + // reformat_free tensors. bool skip_byte_size_check = false; constexpr char trt_prefix[] = "tensorrt_"; const std::string& platform = model_raw_->Config().platform(); skip_byte_size_check |= (platform.rfind(trt_prefix) == 0); if (!skip_byte_size_check) { + TRITONSERVER_MemoryType input_memory_type; // Because Triton expects STRING type to be in special format // (prepend 4 bytes to specify string length), so need to add all the // first 4 bytes for each element to find expected byte size if (data_type == inference::DataType::TYPE_STRING) { - RETURN_IF_ERROR( - ValidateBytesInputs(input_id, input, &expected_byte_size)); - // FIXME: -1 is used as a signal to skip total byte size validation - // for unhandled cases in ValidateBytesInputs. - skip_byte_size_check |= (expected_byte_size == -1); + RETURN_IF_ERROR(ValidateBytesInputs( + input_id, input, &expected_byte_size, &input_memory_type)); + // FIXME: Temporarily skips byte size checks for GPU tensors. See + // DLIS-6820. + skip_byte_size_check |= + (input_memory_type == TRITONSERVER_MEMORY_GPU); } else { expected_byte_size = triton::common::GetByteSize(data_type, input_dims); @@ -1284,13 +1287,14 @@ InferenceRequest::ValidateRequestInputs() Status InferenceRequest::ValidateBytesInputs( const std::string& input_id, const Input& input, - int64_t* const expected_byte_size) const + int64_t* const expected_byte_size, + TRITONSERVER_MemoryType* buffer_memory_type) const { *expected_byte_size = 0; const auto& input_dims = input.ShapeWithBatchDim(); int64_t element_count = triton::common::GetElementCount(input_dims); - int64_t element_idx = 0; + int64_t element_checked = 0; size_t remaining_element_size = 0; size_t buffer_next_idx = 0; @@ -1298,7 +1302,6 @@ InferenceRequest::ValidateBytesInputs( const char* buffer = nullptr; size_t remaining_buffer_size = 0; - TRITONSERVER_MemoryType buffer_memory_type; int64_t buffer_memory_id; // Validate elements until all buffers have been fully processed. @@ -1308,20 +1311,12 @@ InferenceRequest::ValidateBytesInputs( // Reset remaining buffer size and pointers for next buffer. RETURN_IF_ERROR(input.DataBuffer( buffer_next_idx++, (const void**)(&buffer), &remaining_buffer_size, - &buffer_memory_type, &buffer_memory_id)); + buffer_memory_type, &buffer_memory_id)); *expected_byte_size += remaining_buffer_size; - if (buffer_next_idx > buffer_count) { - return Status( - Status::Code::INVALID_ARG, - LogRequest() + - "element strings exceed the end of the last buffer."); - } - // FIXME: Skip GPU buffers for now, return an expected_byte_size of -1 as // a signal to skip validation. - if (buffer_memory_type == TRITONSERVER_MEMORY_GPU) { - *expected_byte_size = -1; + if (*buffer_memory_type == TRITONSERVER_MEMORY_GPU) { return Status::Success; } } @@ -1340,7 +1335,7 @@ InferenceRequest::ValidateBytesInputs( // Start the next element and reset the remaining element size. remaining_element_size = *(reinterpret_cast(buffer)); - element_idx++; + element_checked++; // Advance pointer and remainder by the indicator size. buffer += kElementSizeIndicator; @@ -1372,12 +1367,12 @@ InferenceRequest::ValidateBytesInputs( } // Validate the number of processed elements exactly match expectations. - if (element_idx != element_count) { + if (element_checked != element_count) { return Status( Status::Code::INVALID_ARG, LogRequest() + "expected " + std::to_string(element_count) + " string elements for inference input '" + input_id + "', got " + - std::to_string(element_idx)); + std::to_string(element_checked)); } return Status::Success; From 272ef2905d9a2bcaa57215794ab98aa96a35cec9 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Thu, 6 Jun 2024 12:04:29 -0700 Subject: [PATCH 09/10] Update header --- src/infer_request.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/infer_request.h b/src/infer_request.h index c97ef8039..1dc0310cc 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -749,7 +749,8 @@ class InferenceRequest { Status ValidateBytesInputs( const std::string& input_id, const Input& input, - int64_t* const expected_byte_size) const; + int64_t* const expected_byte_size, + TRITONSERVER_MemoryType* buffer_memory_type) const; // Helpers for pending request metrics void IncrementPendingRequestCount(); From 0e3b63bda47477f2a5eede6bedd0c924bd72136c Mon Sep 17 00:00:00 2001 From: Yingge He Date: Thu, 6 Jun 2024 13:55:53 -0700 Subject: [PATCH 10/10] Remove redundant checks --- src/infer_request.cc | 36 ++++++++++++++---------------------- src/infer_request.h | 1 - 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index a0a8d4b53..4ea687538 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -1176,10 +1176,7 @@ InferenceRequest::Normalize() // Note: Since we're using normalized input.ShapeWithBatchDim() here, // make sure that all the normalization is before the check. { - const size_t& byte_size = input.Data()->TotalByteSize(); const auto& data_type = input.DType(); - const auto& input_dims = input.ShapeWithBatchDim(); - int64_t expected_byte_size = INT_MAX; // FIXME: Skip byte size validation for TensorRT backend because it breaks // shape-size assumption. See DLIS-6805 for proper fix for TRT backend @@ -1195,27 +1192,27 @@ InferenceRequest::Normalize() // (prepend 4 bytes to specify string length), so need to add all the // first 4 bytes for each element to find expected byte size if (data_type == inference::DataType::TYPE_STRING) { - RETURN_IF_ERROR(ValidateBytesInputs( - input_id, input, &expected_byte_size, &input_memory_type)); + RETURN_IF_ERROR( + ValidateBytesInputs(input_id, input, &input_memory_type)); // FIXME: Temporarily skips byte size checks for GPU tensors. See // DLIS-6820. skip_byte_size_check |= (input_memory_type == TRITONSERVER_MEMORY_GPU); } else { + const auto& input_dims = input.ShapeWithBatchDim(); + int64_t expected_byte_size = INT_MAX; expected_byte_size = triton::common::GetByteSize(data_type, input_dims); - } - - bool byte_size_invalid = - (byte_size > INT_MAX) || - (static_cast(byte_size) != expected_byte_size); - if (!skip_byte_size_check && byte_size_invalid) { - return Status( - Status::Code::INVALID_ARG, - LogRequest() + "input byte size mismatch for input '" + input_id + - "' for model '" + ModelName() + "'. Expected " + - std::to_string(expected_byte_size) + ", got " + - std::to_string(byte_size)); + const size_t& byte_size = input.Data()->TotalByteSize(); + if ((byte_size > INT_MAX) || + (static_cast(byte_size) != expected_byte_size)) { + return Status( + Status::Code::INVALID_ARG, + LogRequest() + "input byte size mismatch for input '" + + input_id + "' for model '" + ModelName() + "'. Expected " + + std::to_string(expected_byte_size) + ", got " + + std::to_string(byte_size)); + } } } } @@ -1287,10 +1284,8 @@ InferenceRequest::ValidateRequestInputs() Status InferenceRequest::ValidateBytesInputs( const std::string& input_id, const Input& input, - int64_t* const expected_byte_size, TRITONSERVER_MemoryType* buffer_memory_type) const { - *expected_byte_size = 0; const auto& input_dims = input.ShapeWithBatchDim(); int64_t element_count = triton::common::GetElementCount(input_dims); @@ -1312,10 +1307,7 @@ InferenceRequest::ValidateBytesInputs( RETURN_IF_ERROR(input.DataBuffer( buffer_next_idx++, (const void**)(&buffer), &remaining_buffer_size, buffer_memory_type, &buffer_memory_id)); - *expected_byte_size += remaining_buffer_size; - // FIXME: Skip GPU buffers for now, return an expected_byte_size of -1 as - // a signal to skip validation. if (*buffer_memory_type == TRITONSERVER_MEMORY_GPU) { return Status::Success; } diff --git a/src/infer_request.h b/src/infer_request.h index 1dc0310cc..37da42b02 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -749,7 +749,6 @@ class InferenceRequest { Status ValidateBytesInputs( const std::string& input_id, const Input& input, - int64_t* const expected_byte_size, TRITONSERVER_MemoryType* buffer_memory_type) const; // Helpers for pending request metrics