From 1dcf5bb9ac823c01f1c0b87dff51b82b60cda558 Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Wed, 6 Sep 2023 08:33:51 -0700 Subject: [PATCH 01/12] Fix state transitions for re-running requests (#251) --- src/infer_request.cc | 79 +++++++++++++++------------------ src/infer_request.h | 6 +-- src/test/response_cache_test.cc | 2 - 3 files changed, 38 insertions(+), 49 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index 803335bca..ef6abca78 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -106,75 +106,69 @@ InferenceRequest::InferenceRequest( : needs_normalization_(true), model_raw_(model), requested_model_version_(requested_model_version), flags_(0), correlation_id_(0), batch_size_(0), timeout_us_(0), collect_stats_(true), - state_(InferenceRequest::State::INITIALIZED), null_request_(false), - decrement_pending_count_(false) + state_(InferenceRequest::State::INITIALIZED), null_request_(false) { SetPriority(0); } -InferenceRequest::~InferenceRequest() -{ - // If request has been enqueued but hasn't started executing by destruction - // time, an error occurred and the pending request count will need to be - // decremented. - DecrementPendingRequestCount(); -} - - Status InferenceRequest::SetState(InferenceRequest::State new_state) { + LOG_VERBOSE(1) << LogRequest() << "Setting state from " << state_ << " to " + << new_state; // No-op if this is already the current state, or if this is a null request. if (new_state == state_ || null_request_) { return Status::Success; } - // Allow RELEASED state transition from any state for now. - // Not all requests will follow linear transition, such as null requests - // used for padding batches, and ensemble requests. - if (new_state == InferenceRequest::State::RELEASED) { - state_ = new_state; - return Status::Success; - } - // Generate error when called rather than copying it into every case below. const auto generate_error = [&]() { std::stringstream ss; ss << LogRequest() << "Invalid request state transition from " << state_ << " to " << new_state; - return Status(Status::Code::INVALID_ARG, ss.str()); + return Status(Status::Code::INTERNAL, ss.str()); }; // Define state transitions switch (state_) { case InferenceRequest::State::INITIALIZED: { - if (new_state != InferenceRequest::State::STARTED) { + if (new_state == InferenceRequest::State::PENDING) { + IncrementPendingRequestCount(); + } else if (new_state == InferenceRequest::State::RELEASED) { + // No-op when moving from initialized to released, just releasing early. + } else { return generate_error(); } - state_ = new_state; - IncrementPendingRequestCount(); break; } - case InferenceRequest::State::STARTED: { - if (new_state != InferenceRequest::State::EXECUTING) { + case InferenceRequest::State::PENDING: { + // Request may move from pending to either execution when scheduled to + // backend, or released early due to some error. + if (new_state == InferenceRequest::State::EXECUTING || + new_state == InferenceRequest::State::RELEASED) { + DecrementPendingRequestCount(); + } else { + // Unexpected state transition return generate_error(); } - state_ = new_state; - DecrementPendingRequestCount(); break; } case InferenceRequest::State::EXECUTING: { if (new_state != InferenceRequest::State::RELEASED) { return generate_error(); } - state_ = new_state; break; } case InferenceRequest::State::RELEASED: { - // No state transition currently supported after release. - return generate_error(); + if (new_state != InferenceRequest::State::INITIALIZED) { + // Only transition currently supported after release is to start over + // again, such as re-using request objects for multiple inferences. + return generate_error(); + } + break; } } + state_ = new_state; return Status::Success; } @@ -182,10 +176,11 @@ void InferenceRequest::IncrementPendingRequestCount() { #ifdef TRITON_ENABLE_METRICS + // Pending request count should always be 0 or 1 per-request. If a request + // increments the count, it should not be incremented again until decremented. auto reporter = model_raw_->MetricReporter(); if (reporter) { reporter->IncrementGauge(kPendingRequestMetric, 1); - decrement_pending_count_ = true; } #endif // TRITON_ENABLE_METRICS } @@ -194,13 +189,11 @@ void InferenceRequest::DecrementPendingRequestCount() { #ifdef TRITON_ENABLE_METRICS - // Only decrement if count has been incremented, and not already decremented. - if (decrement_pending_count_) { - auto reporter = model_raw_->MetricReporter(); - if (reporter) { - reporter->DecrementGauge(kPendingRequestMetric, 1); - } - decrement_pending_count_ = false; + // Pending request count should always be 0 or 1 per-request. A request should + // not decrement the count unless it has already been incremented. + auto reporter = model_raw_->MetricReporter(); + if (reporter) { + reporter->DecrementGauge(kPendingRequestMetric, 1); } #endif // TRITON_ENABLE_METRICS } @@ -376,7 +369,7 @@ InferenceRequest::OutputBufferProperties( Status InferenceRequest::Run(std::unique_ptr& request) { - RETURN_IF_ERROR(request->SetState(InferenceRequest::State::STARTED)); + RETURN_IF_ERROR(request->SetState(InferenceRequest::State::PENDING)); return request->model_raw_->Enqueue(request); } @@ -849,8 +842,10 @@ InferenceRequest::PrepareForInference() request_start_ns_ = 0; #endif // TRITON_ENABLE_STATS - LOG_VERBOSE(1) << LogRequest() << "prepared: " << *this; + // Help enforce that PrepareForInference() is called prior to Run(). + RETURN_IF_ERROR(SetState(InferenceRequest::State::INITIALIZED)); + LOG_VERBOSE(1) << LogRequest() << "prepared: " << *this; return Status::Success; } @@ -1580,8 +1575,8 @@ operator<<(std::ostream& out, const InferenceRequest::State& state) out << "INITIALIZED"; break; } - case InferenceRequest::State::STARTED: { - out << "STARTED"; + case InferenceRequest::State::PENDING: { + out << "PENDING"; break; } case InferenceRequest::State::EXECUTING: { diff --git a/src/infer_request.h b/src/infer_request.h index 97c56ba27..41e7deed8 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -63,7 +63,7 @@ class InferenceRequest { INITIALIZED, // The request has been enqueued, but is not yet executing. - STARTED, + PENDING, // The request has been picked up by a backend model instance for execution, // but hasn't been released yet. @@ -291,7 +291,6 @@ class InferenceRequest { const int64_t requested_model_version); InferenceRequest(Model* model, const int64_t requested_model_version); - ~InferenceRequest(); const std::string& ModelName() const; int64_t RequestedModelVersion() const { return requested_model_version_; } @@ -799,9 +798,6 @@ class InferenceRequest { // Whether this is a null request used for direct sequence batch padding or // not. bool null_request_; - // Catch-all to correctly decrement pending count if needed on destruction - // if request doesn't follow normal execution path (error, unused, ensembles) - bool decrement_pending_count_; }; std::ostream& operator<<(std::ostream& out, const InferenceRequest& request); diff --git a/src/test/response_cache_test.cc b/src/test/response_cache_test.cc index 2b40a04c5..dad7d0faf 100644 --- a/src/test/response_cache_test.cc +++ b/src/test/response_cache_test.cc @@ -66,8 +66,6 @@ InferenceRequest::InferenceRequest( response_factory_.reset(new InferenceResponseFactory()); } -InferenceRequest::~InferenceRequest() {} - InferenceRequest::Input::Input( const std::string& name, const inference::DataType datatype, const int64_t* shape, const uint64_t dim_count) From a8087100d000afab79fb3f395cff28720b710ad2 Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Tue, 29 Aug 2023 17:01:13 -0400 Subject: [PATCH 02/12] Add backend/server APIs --- include/triton/core/tritonbackend.h | 27 ++++++++++++++++++++++++++- include/triton/core/tritonserver.h | 15 ++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index de0fffb8d..5ca49e743 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -94,7 +94,7 @@ struct TRITONBACKEND_Batcher; /// } /// #define TRITONBACKEND_API_VERSION_MAJOR 1 -#define TRITONBACKEND_API_VERSION_MINOR 15 +#define TRITONBACKEND_API_VERSION_MINOR 16 /// Get the TRITONBACKEND API version supported by Triton. This value /// can be compared against the TRITONBACKEND_API_VERSION_MAJOR and @@ -375,6 +375,31 @@ TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_OutputBufferAttributes( TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestId( TRITONBACKEND_Request* request, const char** id); +/// Query whether the request is cancelled or not. +/// +/// If possible the backend should terminate any processing and +/// send an error response with cancelled status. +/// +/// \param request The inference request. +/// \param is_cancelled Returns true if the request is cancelled otherwise it +/// would return false. +/// \return a TRITONSERVER_Error indicating success or failure. +TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestIsCancelled( + TRITONBACKEND_Request* request, bool* is_cancelled); + +/// Query whether the response factory is cancelled or not. +/// +/// If possible the backend should terminate any processing and +/// send an error response with cancelled status. +/// +/// \param factory The response factory +/// \param is_cancelled Returns true if the request is cancelled otherwise it +/// would return false. +/// \return a TRITONSERVER_Error indicating success or failure. +TRITONBACKEND_DECLSPEC TRITONSERVER_Error* +TRITONBACKEND_ResponseFactoryIsCancelled( + TRITONBACKEND_ResponseFactory* factory, bool* is_cancelled); + /// Get the correlation ID of the request if it is an unsigned integer. /// Zero indicates that the request does not have a correlation ID. /// Returns failure if correlation ID for given request is not an unsigned diff --git a/include/triton/core/tritonserver.h b/include/triton/core/tritonserver.h index c037242bb..425407ce3 100644 --- a/include/triton/core/tritonserver.h +++ b/include/triton/core/tritonserver.h @@ -91,7 +91,7 @@ struct TRITONSERVER_MetricFamily; /// } /// #define TRITONSERVER_API_VERSION_MAJOR 1 -#define TRITONSERVER_API_VERSION_MINOR 24 +#define TRITONSERVER_API_VERSION_MINOR 25 /// Get the TRITONBACKEND API version supported by the Triton shared /// library. This value can be compared against the @@ -1091,6 +1091,19 @@ TRITONSERVER_InferenceRequestSetCorrelationIdString( struct TRITONSERVER_InferenceRequest* inference_request, const char* correlation_id); +/// Query whether the request is cancelled or not. +/// +/// If possible the backend should terminate any processing and +/// send an error response with cancelled status. +/// +/// \param inference_request The request object. +/// \param is_cancelled Returns whether the inference request is cancelled or not. +/// \return a TRITONSERVER_Error indicating success or failure. +TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* +TRITONSERVER_InferenceRequestIsCancelled( + struct TRITONSERVER_InferenceRequest* inference_request, + bool* is_cancelled); + /// Deprecated. See TRITONSERVER_InferenceRequestPriorityUInt64 instead. /// /// Get the priority for a request. The default is 0 indicating that From 30645d9b0432567edf1ed320bccdf31fcbeb39a7 Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Wed, 30 Aug 2023 10:44:34 -0400 Subject: [PATCH 03/12] Implement the cancellation APIs --- include/triton/core/tritonbackend.h | 4 ++-- include/triton/core/tritonserver.h | 15 +++++++++++++-- src/backend_model.cc | 21 +++++++++++++++++++++ src/infer_request.h | 8 ++++++++ src/infer_response.h | 8 +++++++- src/tritonserver.cc | 21 +++++++++++++++++++++ src/tritonserver_stub.cc | 16 ++++++++++++++++ 7 files changed, 88 insertions(+), 5 deletions(-) diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index 5ca49e743..4b5808d60 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -377,7 +377,7 @@ TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestId( /// Query whether the request is cancelled or not. /// -/// If possible the backend should terminate any processing and +/// If possible the backend should terminate any processing and /// send an error response with cancelled status. /// /// \param request The inference request. @@ -389,7 +389,7 @@ TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestIsCancelled( /// Query whether the response factory is cancelled or not. /// -/// If possible the backend should terminate any processing and +/// If possible the backend should terminate any processing and /// send an error response with cancelled status. /// /// \param factory The response factory diff --git a/include/triton/core/tritonserver.h b/include/triton/core/tritonserver.h index 425407ce3..e23c6819e 100644 --- a/include/triton/core/tritonserver.h +++ b/include/triton/core/tritonserver.h @@ -1091,13 +1091,24 @@ TRITONSERVER_InferenceRequestSetCorrelationIdString( struct TRITONSERVER_InferenceRequest* inference_request, const char* correlation_id); +/// Cancel an inference request. Requests are canceled on a best +/// effort basis and no guarantee is provided that cancelling a +/// request will result in early termination. +/// +/// \param inference_request The request object. +/// \return a TRITONSERVER_Error indicating success or failure. +TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* +TRITONSERVER_InferenceRequestCancel( + struct TRITONSERVER_InferenceRequest* inference_request); + /// Query whether the request is cancelled or not. /// -/// If possible the backend should terminate any processing and +/// If possible the backend should terminate any processing and /// send an error response with cancelled status. /// /// \param inference_request The request object. -/// \param is_cancelled Returns whether the inference request is cancelled or not. +/// \param is_cancelled Returns whether the inference request is cancelled or +/// not. /// \return a TRITONSERVER_Error indicating success or failure. TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceRequestIsCancelled( diff --git a/src/backend_model.cc b/src/backend_model.cc index 7a2fbfc7d..552e88a62 100644 --- a/src/backend_model.cc +++ b/src/backend_model.cc @@ -1017,6 +1017,16 @@ TRITONBACKEND_RequestFlags(TRITONBACKEND_Request* request, uint32_t* flags) return nullptr; // success } +TRITONAPI_DECLSPEC TRITONSERVER_Error* +TRITONBACKEND_RequestIsCancelled( + TRITONBACKEND_Request* request, bool* is_cancelled) +{ + InferenceRequest* tr = reinterpret_cast(request); + *is_cancelled = tr->IsCancelled(); + return nullptr; +} + + TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestCorrelationIdString( TRITONBACKEND_Request* request, const char** id) @@ -1365,6 +1375,17 @@ TRITONBACKEND_ResponseFactorySendFlags( return nullptr; // success } +TRITONAPI_DECLSPEC TRITONSERVER_Error* +TRITONBACKEND_ResponseFactoryIsCancelled( + TRITONBACKEND_ResponseFactory* factory, bool* is_cancelled) +{ + std::shared_ptr* response_factory = + reinterpret_cast*>(factory); + *is_cancelled = (*response_factory)->IsCancelled(); + return nullptr; // success +} + + /// /// TRITONBACKEND_Response /// diff --git a/src/infer_request.h b/src/infer_request.h index 41e7deed8..074cd2de0 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -679,6 +679,14 @@ class InferenceRequest { secondary_stats_aggregator_ = secondary_stats_aggregator; } + void Cancel() + { + response_factory_->Cancel(); + is_cancelled_ = true; + } + + bool IsCancelled() { return is_cancelled_; } + #endif // TRITON_ENABLE_STATS private: diff --git a/src/infer_response.h b/src/infer_response.h index 5632c0f84..b451dc2d4 100644 --- a/src/infer_response.h +++ b/src/infer_response.h @@ -59,10 +59,15 @@ class InferenceResponseFactory { std::unique_ptr&&, const uint32_t)>& delegator) : model_(model), id_(id), allocator_(allocator), alloc_userp_(alloc_userp), response_fn_(response_fn), - response_userp_(response_userp), response_delegator_(delegator) + response_userp_(response_userp), response_delegator_(delegator), + is_cancelled_(false) { } + void Cancel() { is_cancelled_ = true; } + + bool IsCancelled() { return is_cancelled_; } + const ResponseAllocator* Allocator() { return allocator_; } void* AllocatorUserp() { return alloc_userp_; } @@ -118,6 +123,7 @@ class InferenceResponseFactory { std::function&&, const uint32_t)> response_delegator_; + std::atomic is_cancelled_; #ifdef TRITON_ENABLE_TRACING // Inference trace associated with this response. diff --git a/src/tritonserver.cc b/src/tritonserver.cc index c9fc49fc4..998c7a90d 100644 --- a/src/tritonserver.cc +++ b/src/tritonserver.cc @@ -1637,6 +1637,27 @@ TRITONSERVER_InferenceRequestFlags( return nullptr; // Success } +TRITONAPI_DECLSPEC TRITONSERVER_Error* +TRITONSERVER_InferenceRequestIsCancelled( + struct TRITONSERVER_InferenceRequest* inference_request, bool* is_cancelled) +{ + tc::InferenceRequest* lrequest = + reinterpret_cast(inference_request); + *is_cancelled = lrequest->IsCancelled(); + return nullptr; // Success +} + +TRITONAPI_DECLSPEC TRITONSERVER_Error* +TRITONSERVER_InferenceRequestCancel( + struct TRITONSERVER_InferenceRequest* inference_request) +{ + tc::InferenceRequest* lrequest = + reinterpret_cast(inference_request); + lrequest->Cancel(); + return nullptr; // Success +} + + TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetFlags( TRITONSERVER_InferenceRequest* inference_request, uint32_t flags) diff --git a/src/tritonserver_stub.cc b/src/tritonserver_stub.cc index b0081a0a2..141b38fa6 100644 --- a/src/tritonserver_stub.cc +++ b/src/tritonserver_stub.cc @@ -118,6 +118,14 @@ TRITONSERVER_ResponseAllocatorDelete() { } TRITONAPI_DECLSPEC void +TRITONSERVER_InferenceRequestIsCancelled() +{ +} +TRITONAPI_DECLSPEC void +TRITONSERVER_InferenceRequestCancel() +{ +} +TRITONAPI_DECLSPEC void TRITONSERVER_MessageNewFromSerializedJson() { } @@ -651,6 +659,14 @@ TRITONBACKEND_RequestId() { } TRITONAPI_DECLSPEC void +TRITONBACKEND_ResponseFactoryIsCancelled() +{ +} +TRITONAPI_DECLSPEC void +TRITONBACKEND_RequestIsCancelled() +{ +} +TRITONAPI_DECLSPEC void TRITONBACKEND_RequestCorrelationId() { } From 212d4fd3cae4e2110ee907dafe32f3f2f71e95ed Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Wed, 30 Aug 2023 14:42:10 -0400 Subject: [PATCH 04/12] Only store the state in response factory --- src/infer_request.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/infer_request.h b/src/infer_request.h index 074cd2de0..fef79fa5b 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -679,13 +679,9 @@ class InferenceRequest { secondary_stats_aggregator_ = secondary_stats_aggregator; } - void Cancel() - { - response_factory_->Cancel(); - is_cancelled_ = true; - } + void Cancel() { response_factory_->Cancel(); } - bool IsCancelled() { return is_cancelled_; } + bool IsCancelled() { return response_factory_->IsCancelled(); } #endif // TRITON_ENABLE_STATS From 8a6d9dfa5f1764c2b34d5683d91df391d90869cc Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Thu, 31 Aug 2023 00:43:27 -0400 Subject: [PATCH 05/12] Add unit testing for request cancellation --- src/test/CMakeLists.txt | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 7fe27f5c0..1b5dbc3ed 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -534,3 +534,43 @@ install( TARGETS register_api_test RUNTIME DESTINATION bin ) + +# +# Request Cancellation Unittest +# +add_executable( + request_cancellation_test + request_cancellation_test.cc +) + +set_target_properties( + request_cancellation_test + PROPERTIES + SKIP_BUILD_RPATH TRUE + BUILD_WITH_INSTALL_RPATH TRUE + INSTALL_RPATH_USE_LINK_PATH FALSE + INSTALL_RPATH "" +) + +target_include_directories( + request_cancellation_test + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/../../include + ${GTEST_INCLUDE_DIRS} +) + +target_link_libraries( + request_cancellation_test + PRIVATE + triton-common-error # from repo-common + triton-common-logging # from repo-common + triton-core + GTest::gtest + GTest::gtest_main +) + +install( + TARGETS request_cancellation_test + RUNTIME DESTINATION bin +) From 691834bf1db5f5f37870368b1c431b19cd6c47d6 Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Thu, 31 Aug 2023 12:05:58 -0400 Subject: [PATCH 06/12] Add test --- src/test/request_cancellation_test.cc | 150 ++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 src/test/request_cancellation_test.cc diff --git a/src/test/request_cancellation_test.cc b/src/test/request_cancellation_test.cc new file mode 100644 index 000000000..94b10efee --- /dev/null +++ b/src/test/request_cancellation_test.cc @@ -0,0 +1,150 @@ +// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#include "gtest/gtest.h" +#include "triton/core/tritonbackend.h" +#include "triton/core/tritonserver.h" + + +#define FAIL_TEST_IF_ERR(X) \ + do { \ + std::shared_ptr err__((X), TRITONSERVER_ErrorDelete); \ + ASSERT_TRUE((err__ == nullptr)) \ + << TRITONSERVER_ErrorCodeString(err__.get()) << " - " \ + << TRITONSERVER_ErrorMessage(err__.get()); \ + } while (false) + + +class RequestCancellationTest : public ::testing::Test { + protected: + static void SetUpTestSuite() + { + // Create the server... + TRITONSERVER_ServerOptions* server_options = nullptr; + FAIL_TEST_IF_ERR(TRITONSERVER_ServerOptionsNew(&server_options)); + FAIL_TEST_IF_ERR(TRITONSERVER_ServerOptionsSetModelRepositoryPath( + server_options, "./models")); + FAIL_TEST_IF_ERR(TRITONSERVER_ServerOptionsSetBackendDirectory( + server_options, "/opt/tritonserver/backends")); + + FAIL_TEST_IF_ERR(TRITONSERVER_ServerNew(&server_, server_options)); + FAIL_TEST_IF_ERR(TRITONSERVER_ServerOptionsDelete(server_options)); + } + + static void TearDownTestSuite() + { + FAIL_TEST_IF_ERR(TRITONSERVER_ServerDelete(server_)); + } + + void SetUp() override + { + ASSERT_TRUE(server_ != nullptr) << "Server has not created"; + // Wait until the server is both live and ready. + size_t health_iters = 0; + while (true) { + bool live, ready; + FAIL_TEST_IF_ERR(TRITONSERVER_ServerIsLive(server_, &live)); + FAIL_TEST_IF_ERR(TRITONSERVER_ServerIsReady(server_, &ready)); + if (live && ready) { + break; + } + + if (++health_iters >= 10) { + FAIL() << "failed to find healthy inference server"; + } + + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + } + } + + + static TRITONSERVER_Server* server_; +}; + +TRITONSERVER_Server* RequestCancellationTest::server_ = nullptr; + + +TEST_F(RequestCancellationTest, Cancellation) +{ + TRITONSERVER_InferenceRequest* request; + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceRequestNew(&request, server_, "model", 1)); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestSetResponseCallback( + request, nullptr, nullptr, nullptr, nullptr)); + TRITONBACKEND_Request* backend_request = + reinterpret_cast(request); + TRITONBACKEND_ResponseFactory* response_factory; + FAIL_TEST_IF_ERR( + TRITONBACKEND_ResponseFactoryNew(&response_factory, backend_request)); + + bool is_cancelled = true; + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceRequestIsCancelled(request, &is_cancelled)); + ASSERT_FALSE(is_cancelled); + + is_cancelled = true; + FAIL_TEST_IF_ERR( + TRITONBACKEND_RequestIsCancelled(backend_request, &is_cancelled)); + ASSERT_FALSE(is_cancelled); + + is_cancelled = true; + FAIL_TEST_IF_ERR(TRITONBACKEND_ResponseFactoryIsCancelled( + response_factory, &is_cancelled)); + ASSERT_FALSE(is_cancelled); + + is_cancelled = false; + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestCancel(request)); + + FAIL_TEST_IF_ERR( + TRITONBACKEND_RequestIsCancelled(backend_request, &is_cancelled)); + ASSERT_TRUE(is_cancelled); + + is_cancelled = false; + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceRequestIsCancelled(request, &is_cancelled)); + ASSERT_TRUE(is_cancelled); + + is_cancelled = false; + FAIL_TEST_IF_ERR(TRITONBACKEND_ResponseFactoryIsCancelled( + response_factory, &is_cancelled)); + ASSERT_TRUE(is_cancelled); + + FAIL_TEST_IF_ERR(TRITONBACKEND_ResponseFactoryDelete(response_factory)); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestDelete(request)); +} + +int +main(int argc, char** argv) +{ +#ifdef TRITON_ENABLE_LOGGING + LOG_SET_VERBOSE(2); +#endif // TRITON_ENABLE_LOGGING + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From d33138698bc5e70bba171cde098b51c808ba1288 Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Thu, 31 Aug 2023 13:35:18 -0400 Subject: [PATCH 07/12] Add cancellation status --- include/triton/core/tritonserver.h | 3 ++- src/status.cc | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/triton/core/tritonserver.h b/include/triton/core/tritonserver.h index e23c6819e..0b4e15bba 100644 --- a/include/triton/core/tritonserver.h +++ b/include/triton/core/tritonserver.h @@ -308,7 +308,8 @@ typedef enum TRITONSERVER_errorcode_enum { TRITONSERVER_ERROR_INVALID_ARG, TRITONSERVER_ERROR_UNAVAILABLE, TRITONSERVER_ERROR_UNSUPPORTED, - TRITONSERVER_ERROR_ALREADY_EXISTS + TRITONSERVER_ERROR_ALREADY_EXISTS, + TRITONSERVER_ERROR_CANCELLED } TRITONSERVER_Error_Code; /// Create a new error object. The caller takes ownership of the diff --git a/src/status.cc b/src/status.cc index 1640ee5ed..1344eeefc 100644 --- a/src/status.cc +++ b/src/status.cc @@ -48,7 +48,8 @@ TritonCodeToStatusCode(TRITONSERVER_Error_Code code) return Status::Code::UNSUPPORTED; case TRITONSERVER_ERROR_ALREADY_EXISTS: return Status::Code::ALREADY_EXISTS; - + case TRITONSERVER_ERROR_CANCELLED: + return Status::Code::CANCELLED; default: break; } @@ -74,7 +75,8 @@ StatusCodeToTritonCode(Status::Code status_code) return TRITONSERVER_ERROR_UNSUPPORTED; case Status::Code::ALREADY_EXISTS: return TRITONSERVER_ERROR_ALREADY_EXISTS; - + case Status::Code::CANCELLED: + return TRITONSERVER_ERROR_CANCELLED; default: break; } From f3dc75b9e59d911ad96d66a3fb03f954437b6e28 Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Thu, 31 Aug 2023 15:03:19 -0400 Subject: [PATCH 08/12] Add testing for cancelling a request after release --- src/test/request_cancellation_test.cc | 51 +++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/test/request_cancellation_test.cc b/src/test/request_cancellation_test.cc index 94b10efee..cfb693636 100644 --- a/src/test/request_cancellation_test.cc +++ b/src/test/request_cancellation_test.cc @@ -88,6 +88,13 @@ class RequestCancellationTest : public ::testing::Test { TRITONSERVER_Server* RequestCancellationTest::server_ = nullptr; +void +ReleaseCallback( + struct TRITONSERVER_InferenceRequest* request, const uint32_t flags, + void* userp) +{ + // no-op +} TEST_F(RequestCancellationTest, Cancellation) { @@ -138,6 +145,50 @@ TEST_F(RequestCancellationTest, Cancellation) FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestDelete(request)); } +TEST_F(RequestCancellationTest, CancellationAfterRelease) +{ + TRITONSERVER_InferenceRequest* request; + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceRequestNew(&request, server_, "model", 1)); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestSetResponseCallback( + request, nullptr, nullptr, nullptr, nullptr)); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestSetReleaseCallback( + request, ReleaseCallback, nullptr)); + TRITONBACKEND_Request* backend_request = + reinterpret_cast(request); + TRITONBACKEND_ResponseFactory* response_factory; + FAIL_TEST_IF_ERR( + TRITONBACKEND_ResponseFactoryNew(&response_factory, backend_request)); + FAIL_TEST_IF_ERR(TRITONBACKEND_RequestRelease( + backend_request, TRITONSERVER_REQUEST_RELEASE_ALL)); + + bool is_cancelled = true; + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceRequestIsCancelled(request, &is_cancelled)); + ASSERT_FALSE(is_cancelled); + + is_cancelled = true; + FAIL_TEST_IF_ERR(TRITONBACKEND_ResponseFactoryIsCancelled( + response_factory, &is_cancelled)); + ASSERT_FALSE(is_cancelled); + + is_cancelled = false; + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestCancel(request)); + + is_cancelled = false; + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceRequestIsCancelled(request, &is_cancelled)); + ASSERT_TRUE(is_cancelled); + + is_cancelled = false; + FAIL_TEST_IF_ERR(TRITONBACKEND_ResponseFactoryIsCancelled( + response_factory, &is_cancelled)); + ASSERT_TRUE(is_cancelled); + + FAIL_TEST_IF_ERR(TRITONBACKEND_ResponseFactoryDelete(response_factory)); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestDelete(request)); +} + int main(int argc, char** argv) { From 2aabb8b0d32d09e56ae195bacc8f4e6c14e05b70 Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Wed, 6 Sep 2023 11:29:07 -0400 Subject: [PATCH 09/12] Handle request re-use --- src/infer_request.h | 14 +- src/test/request_cancellation_test.cc | 181 ++++++++++++++++++++------ src/tritonserver.cc | 2 +- 3 files changed, 153 insertions(+), 44 deletions(-) diff --git a/src/infer_request.h b/src/infer_request.h index fef79fa5b..cbc10c977 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -679,7 +679,17 @@ class InferenceRequest { secondary_stats_aggregator_ = secondary_stats_aggregator; } - void Cancel() { response_factory_->Cancel(); } + Status Cancel() + { + if (state_ != InferenceRequest::State::INITIALIZED) { + response_factory_->Cancel(); + } else { + return Status( + Status::Code::INTERNAL, + "Request cannot be cancelled before it has started executing."); + } + return Status::Success; + } bool IsCancelled() { return response_factory_->IsCancelled(); } @@ -798,7 +808,7 @@ class InferenceRequest { std::shared_ptr sequence_states_; // The state of the request. - InferenceRequest::State state_; + std::atomic state_; // Whether this is a null request used for direct sequence batch padding or // not. bool null_request_; diff --git a/src/test/request_cancellation_test.cc b/src/test/request_cancellation_test.cc index cfb693636..84d865f90 100644 --- a/src/test/request_cancellation_test.cc +++ b/src/test/request_cancellation_test.cc @@ -24,6 +24,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include #include #include "gtest/gtest.h" @@ -40,6 +41,60 @@ } while (false) +TRITONSERVER_Error* +ResponseAlloc( + TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name, + size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type, + int64_t preferred_memory_type_id, void* userp, void** buffer, + void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type, + int64_t* actual_memory_type_id) +{ + *actual_memory_type = TRITONSERVER_MEMORY_CPU; + *actual_memory_type_id = preferred_memory_type_id; + + if (byte_size == 0) { + *buffer = nullptr; + *buffer_userp = nullptr; + } else { + void* allocated_ptr = nullptr; + allocated_ptr = malloc(byte_size); + + if (allocated_ptr != nullptr) { + *buffer = allocated_ptr; + *buffer_userp = new std::string(tensor_name); + } + } + return nullptr; // Success +} + +TRITONSERVER_Error* +ResponseRelease( + TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp, + size_t byte_size, TRITONSERVER_MemoryType memory_type, + int64_t memory_type_id) +{ + return nullptr; // Success +} + +void +InferRequestComplete( + TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp) +{ +} + +void +InferResponseComplete( + TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp) +{ + if (response != nullptr) { + // Notify that the completion. + std::promise* p = + reinterpret_cast*>(userp); + p->set_value(response); + delete p; + } +} + class RequestCancellationTest : public ::testing::Test { protected: static void SetUpTestSuite() @@ -51,6 +106,10 @@ class RequestCancellationTest : public ::testing::Test { server_options, "./models")); FAIL_TEST_IF_ERR(TRITONSERVER_ServerOptionsSetBackendDirectory( server_options, "/opt/tritonserver/backends")); + FAIL_TEST_IF_ERR(TRITONSERVER_ServerOptionsSetRepoAgentDirectory( + server_options, "/opt/tritonserver/repoagents")); + FAIL_TEST_IF_ERR( + TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true)); FAIL_TEST_IF_ERR(TRITONSERVER_ServerNew(&server_, server_options)); FAIL_TEST_IF_ERR(TRITONSERVER_ServerOptionsDelete(server_options)); @@ -80,60 +139,74 @@ class RequestCancellationTest : public ::testing::Test { std::this_thread::sleep_for(std::chrono::milliseconds(500)); } + + // Create allocator with common callback + FAIL_TEST_IF_ERR(TRITONSERVER_ResponseAllocatorNew( + &allocator_, ResponseAlloc, ResponseRelease, nullptr /* start_fn */)); + + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestNew( + &irequest_, server_, "model", -1 /* model_version */)); + + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestSetReleaseCallback( + irequest_, InferRequestComplete, nullptr /* request_release_userp */)); + + std::vector input0_shape({1, 1000}); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestAddInput( + irequest_, "INPUT0", TRITONSERVER_TYPE_INT32, &input0_shape[0], + input0_shape.size())); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData( + irequest_, "INPUT0", &input0_data_[0], input0_data_.size(), + TRITONSERVER_MEMORY_CPU, 0)); } + void TearDown() override + { + FAIL_TEST_IF_ERR(TRITONSERVER_ResponseAllocatorDelete(allocator_)); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestDelete(irequest_)); + } static TRITONSERVER_Server* server_; + TRITONSERVER_ResponseAllocator* allocator_ = nullptr; + static std::vector input0_data_; + TRITONSERVER_InferenceRequest* irequest_ = nullptr; }; TRITONSERVER_Server* RequestCancellationTest::server_ = nullptr; - -void -ReleaseCallback( - struct TRITONSERVER_InferenceRequest* request, const uint32_t flags, - void* userp) -{ - // no-op -} +std::vector RequestCancellationTest::input0_data_(16, 1); TEST_F(RequestCancellationTest, Cancellation) { - TRITONSERVER_InferenceRequest* request; - FAIL_TEST_IF_ERR( - TRITONSERVER_InferenceRequestNew(&request, server_, "model", 1)); + auto p = new std::promise(); + std::future future = p->get_future(); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestSetResponseCallback( - request, nullptr, nullptr, nullptr, nullptr)); + irequest_, allocator_, nullptr /* response_allocator_userp */, + InferResponseComplete, reinterpret_cast(p))); + TRITONBACKEND_Request* backend_request = - reinterpret_cast(request); + reinterpret_cast(irequest_); TRITONBACKEND_ResponseFactory* response_factory; FAIL_TEST_IF_ERR( TRITONBACKEND_ResponseFactoryNew(&response_factory, backend_request)); bool is_cancelled = true; FAIL_TEST_IF_ERR( - TRITONSERVER_InferenceRequestIsCancelled(request, &is_cancelled)); - ASSERT_FALSE(is_cancelled); - - is_cancelled = true; - FAIL_TEST_IF_ERR( - TRITONBACKEND_RequestIsCancelled(backend_request, &is_cancelled)); + TRITONSERVER_InferenceRequestIsCancelled(irequest_, &is_cancelled)); ASSERT_FALSE(is_cancelled); - is_cancelled = true; - FAIL_TEST_IF_ERR(TRITONBACKEND_ResponseFactoryIsCancelled( - response_factory, &is_cancelled)); - ASSERT_FALSE(is_cancelled); - - is_cancelled = false; - FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestCancel(request)); + TRITONSERVER_Error* error = TRITONSERVER_InferenceRequestCancel(irequest_); + ASSERT_TRUE(error != nullptr); + ASSERT_TRUE( + std::string(TRITONSERVER_ErrorMessage(error)) == + "Request cannot be cancelled before it has started executing."); FAIL_TEST_IF_ERR( - TRITONBACKEND_RequestIsCancelled(backend_request, &is_cancelled)); - ASSERT_TRUE(is_cancelled); + TRITONSERVER_ServerInferAsync(server_, irequest_, nullptr /* trace */)); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestCancel(irequest_)); is_cancelled = false; FAIL_TEST_IF_ERR( - TRITONSERVER_InferenceRequestIsCancelled(request, &is_cancelled)); + TRITONSERVER_InferenceRequestIsCancelled(irequest_, &is_cancelled)); ASSERT_TRUE(is_cancelled); is_cancelled = false; @@ -141,21 +214,48 @@ TEST_F(RequestCancellationTest, Cancellation) response_factory, &is_cancelled)); ASSERT_TRUE(is_cancelled); + TRITONSERVER_InferenceResponse* response = future.get(); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceResponseDelete(response)); FAIL_TEST_IF_ERR(TRITONBACKEND_ResponseFactoryDelete(response_factory)); - FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestDelete(request)); + + // TODO: Enable after https://github.com/triton-inference-server/core/pull/251 + // is merged. Currently, it fails with "Invalid request state transition from + // EXECUTING to STARTED". + // p = new std::promise(); + // future = p->get_future(); + + // FAIL_TEST_IF_ERR( + // TRITONSERVER_InferenceRequestSetResponseCallback( + // irequest_, allocator_, nullptr /* response_allocator_userp */, + // InferResponseComplete, reinterpret_cast(p))); + // // Sending another request and the request should not be cancelled. + // FAIL_TEST_IF_ERR(TRITONSERVER_ServerInferAsync(server_, irequest_, nullptr + // /* trace */)); + + // is_cancelled = true; + // FAIL_TEST_IF_ERR( + // TRITONSERVER_InferenceRequestIsCancelled(irequest_, &is_cancelled)); + // ASSERT_FALSE(is_cancelled); + + // is_cancelled = false; + // FAIL_TEST_IF_ERR( + // TRITONBACKEND_ResponseFactoryIsCancelled(response_factory, + // &is_cancelled)); + // ASSERT_FALSE(is_cancelled); + // future.get(); } TEST_F(RequestCancellationTest, CancellationAfterRelease) { - TRITONSERVER_InferenceRequest* request; - FAIL_TEST_IF_ERR( - TRITONSERVER_InferenceRequestNew(&request, server_, "model", 1)); + auto p = new std::promise(); + std::future future = p->get_future(); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestSetResponseCallback( - request, nullptr, nullptr, nullptr, nullptr)); - FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestSetReleaseCallback( - request, ReleaseCallback, nullptr)); + irequest_, allocator_, nullptr /* response_allocator_userp */, + InferResponseComplete, reinterpret_cast(p))); + TRITONBACKEND_Request* backend_request = - reinterpret_cast(request); + reinterpret_cast(irequest_); TRITONBACKEND_ResponseFactory* response_factory; FAIL_TEST_IF_ERR( TRITONBACKEND_ResponseFactoryNew(&response_factory, backend_request)); @@ -164,7 +264,7 @@ TEST_F(RequestCancellationTest, CancellationAfterRelease) bool is_cancelled = true; FAIL_TEST_IF_ERR( - TRITONSERVER_InferenceRequestIsCancelled(request, &is_cancelled)); + TRITONSERVER_InferenceRequestIsCancelled(irequest_, &is_cancelled)); ASSERT_FALSE(is_cancelled); is_cancelled = true; @@ -173,11 +273,11 @@ TEST_F(RequestCancellationTest, CancellationAfterRelease) ASSERT_FALSE(is_cancelled); is_cancelled = false; - FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestCancel(request)); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestCancel(irequest_)); is_cancelled = false; FAIL_TEST_IF_ERR( - TRITONSERVER_InferenceRequestIsCancelled(request, &is_cancelled)); + TRITONSERVER_InferenceRequestIsCancelled(irequest_, &is_cancelled)); ASSERT_TRUE(is_cancelled); is_cancelled = false; @@ -186,7 +286,6 @@ TEST_F(RequestCancellationTest, CancellationAfterRelease) ASSERT_TRUE(is_cancelled); FAIL_TEST_IF_ERR(TRITONBACKEND_ResponseFactoryDelete(response_factory)); - FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestDelete(request)); } int diff --git a/src/tritonserver.cc b/src/tritonserver.cc index 998c7a90d..4a3013be9 100644 --- a/src/tritonserver.cc +++ b/src/tritonserver.cc @@ -1653,7 +1653,7 @@ TRITONSERVER_InferenceRequestCancel( { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); - lrequest->Cancel(); + RETURN_IF_STATUS_ERROR(lrequest->Cancel()); return nullptr; // Success } From 3a6ce93053fed4fda35b0c444b7b5215c062a77e Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Wed, 6 Sep 2023 11:45:52 -0400 Subject: [PATCH 10/12] Enable request reuse test --- src/test/request_cancellation_test.cc | 50 +++++++++++++-------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/src/test/request_cancellation_test.cc b/src/test/request_cancellation_test.cc index 84d865f90..7f5d92b5f 100644 --- a/src/test/request_cancellation_test.cc +++ b/src/test/request_cancellation_test.cc @@ -216,33 +216,31 @@ TEST_F(RequestCancellationTest, Cancellation) TRITONSERVER_InferenceResponse* response = future.get(); FAIL_TEST_IF_ERR(TRITONSERVER_InferenceResponseDelete(response)); - FAIL_TEST_IF_ERR(TRITONBACKEND_ResponseFactoryDelete(response_factory)); - // TODO: Enable after https://github.com/triton-inference-server/core/pull/251 - // is merged. Currently, it fails with "Invalid request state transition from - // EXECUTING to STARTED". - // p = new std::promise(); - // future = p->get_future(); - - // FAIL_TEST_IF_ERR( - // TRITONSERVER_InferenceRequestSetResponseCallback( - // irequest_, allocator_, nullptr /* response_allocator_userp */, - // InferResponseComplete, reinterpret_cast(p))); - // // Sending another request and the request should not be cancelled. - // FAIL_TEST_IF_ERR(TRITONSERVER_ServerInferAsync(server_, irequest_, nullptr - // /* trace */)); - - // is_cancelled = true; - // FAIL_TEST_IF_ERR( - // TRITONSERVER_InferenceRequestIsCancelled(irequest_, &is_cancelled)); - // ASSERT_FALSE(is_cancelled); - - // is_cancelled = false; - // FAIL_TEST_IF_ERR( - // TRITONBACKEND_ResponseFactoryIsCancelled(response_factory, - // &is_cancelled)); - // ASSERT_FALSE(is_cancelled); - // future.get(); + p = new std::promise(); + future = p->get_future(); + + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestSetResponseCallback( + irequest_, allocator_, nullptr /* response_allocator_userp */, + InferResponseComplete, reinterpret_cast(p))); + // Sending another request and the request should not be cancelled. + FAIL_TEST_IF_ERR(TRITONSERVER_ServerInferAsync( + server_, irequest_, nullptr + /* trace */)); + + is_cancelled = true; + FAIL_TEST_IF_ERR( + TRITONSERVER_InferenceRequestIsCancelled(irequest_, &is_cancelled)); + ASSERT_FALSE(is_cancelled); + + is_cancelled = false; + FAIL_TEST_IF_ERR(TRITONBACKEND_ResponseFactoryIsCancelled( + response_factory, &is_cancelled)); + ASSERT_FALSE(is_cancelled); + + response = future.get(); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceResponseDelete(response)); + FAIL_TEST_IF_ERR(TRITONBACKEND_ResponseFactoryDelete(response_factory)); } TEST_F(RequestCancellationTest, CancellationAfterRelease) From 775005193ff2de8bb8afb8314b10392a6baee3f0 Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Wed, 6 Sep 2023 16:43:00 -0400 Subject: [PATCH 11/12] Add staged changes --- include/triton/core/tritonserver.h | 6 +++++- src/infer_request.cc | 1 + src/infer_request.h | 13 ++----------- src/infer_response.h | 1 + src/test/request_cancellation_test.cc | 7 ++----- src/tritonserver.cc | 2 +- 6 files changed, 12 insertions(+), 18 deletions(-) diff --git a/include/triton/core/tritonserver.h b/include/triton/core/tritonserver.h index 0b4e15bba..a77c00980 100644 --- a/include/triton/core/tritonserver.h +++ b/include/triton/core/tritonserver.h @@ -1094,7 +1094,11 @@ TRITONSERVER_InferenceRequestSetCorrelationIdString( /// Cancel an inference request. Requests are canceled on a best /// effort basis and no guarantee is provided that cancelling a -/// request will result in early termination. +/// request will result in early termination. Note that the +/// inference request cancellation status will be reset after +/// TRITONSERVER_InferAsync is run. This means that if you cancel +/// the request before calling TRITONSERVER_InferAsync +/// the request will not be cancelled. /// /// \param inference_request The request object. /// \return a TRITONSERVER_Error indicating success or failure. diff --git a/src/infer_request.cc b/src/infer_request.cc index ef6abca78..d1a042346 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -819,6 +819,7 @@ InferenceRequest::PrepareForInference() // inference execution. inputs_.clear(); override_inputs_.clear(); + ResetCancel(); // Renormalize if anything has changed in the inference request in a // way that could impact renormalization. diff --git a/src/infer_request.h b/src/infer_request.h index cbc10c977..56504abb9 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -679,17 +679,8 @@ class InferenceRequest { secondary_stats_aggregator_ = secondary_stats_aggregator; } - Status Cancel() - { - if (state_ != InferenceRequest::State::INITIALIZED) { - response_factory_->Cancel(); - } else { - return Status( - Status::Code::INTERNAL, - "Request cannot be cancelled before it has started executing."); - } - return Status::Success; - } + void Cancel() { response_factory_->Cancel(); } + void ResetCancel() { response_factory_->ResetCancel(); } bool IsCancelled() { return response_factory_->IsCancelled(); } diff --git a/src/infer_response.h b/src/infer_response.h index b451dc2d4..2beb9a667 100644 --- a/src/infer_response.h +++ b/src/infer_response.h @@ -65,6 +65,7 @@ class InferenceResponseFactory { } void Cancel() { is_cancelled_ = true; } + void ResetCancel() { is_cancelled_ = false; } bool IsCancelled() { return is_cancelled_; } diff --git a/src/test/request_cancellation_test.cc b/src/test/request_cancellation_test.cc index 7f5d92b5f..f4822cad4 100644 --- a/src/test/request_cancellation_test.cc +++ b/src/test/request_cancellation_test.cc @@ -194,11 +194,7 @@ TEST_F(RequestCancellationTest, Cancellation) TRITONSERVER_InferenceRequestIsCancelled(irequest_, &is_cancelled)); ASSERT_FALSE(is_cancelled); - TRITONSERVER_Error* error = TRITONSERVER_InferenceRequestCancel(irequest_); - ASSERT_TRUE(error != nullptr); - ASSERT_TRUE( - std::string(TRITONSERVER_ErrorMessage(error)) == - "Request cannot be cancelled before it has started executing."); + FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestCancel(irequest_)); FAIL_TEST_IF_ERR( TRITONSERVER_ServerInferAsync(server_, irequest_, nullptr /* trace */)); @@ -223,6 +219,7 @@ TEST_F(RequestCancellationTest, Cancellation) FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestSetResponseCallback( irequest_, allocator_, nullptr /* response_allocator_userp */, InferResponseComplete, reinterpret_cast(p))); + // Sending another request and the request should not be cancelled. FAIL_TEST_IF_ERR(TRITONSERVER_ServerInferAsync( server_, irequest_, nullptr diff --git a/src/tritonserver.cc b/src/tritonserver.cc index 4a3013be9..998c7a90d 100644 --- a/src/tritonserver.cc +++ b/src/tritonserver.cc @@ -1653,7 +1653,7 @@ TRITONSERVER_InferenceRequestCancel( { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); - RETURN_IF_STATUS_ERROR(lrequest->Cancel()); + lrequest->Cancel(); return nullptr; // Success } From 56eae8aa3e0512eac4201268fda30c85215b9cf3 Mon Sep 17 00:00:00 2001 From: Iman Tabrizian Date: Wed, 6 Sep 2023 19:31:18 -0400 Subject: [PATCH 12/12] Add temporary fix for the request state bug --- src/test/request_cancellation_test.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/test/request_cancellation_test.cc b/src/test/request_cancellation_test.cc index f4822cad4..8e2652d30 100644 --- a/src/test/request_cancellation_test.cc +++ b/src/test/request_cancellation_test.cc @@ -24,6 +24,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include #include #include @@ -106,6 +107,8 @@ class RequestCancellationTest : public ::testing::Test { server_options, "./models")); FAIL_TEST_IF_ERR(TRITONSERVER_ServerOptionsSetBackendDirectory( server_options, "/opt/tritonserver/backends")); + FAIL_TEST_IF_ERR( + TRITONSERVER_ServerOptionsSetLogVerbose(server_options, 1)); FAIL_TEST_IF_ERR(TRITONSERVER_ServerOptionsSetRepoAgentDirectory( server_options, "/opt/tritonserver/repoagents")); FAIL_TEST_IF_ERR( @@ -212,6 +215,12 @@ TEST_F(RequestCancellationTest, Cancellation) TRITONSERVER_InferenceResponse* response = future.get(); FAIL_TEST_IF_ERR(TRITONSERVER_InferenceResponseDelete(response)); + FAIL_TEST_IF_ERR(TRITONBACKEND_ResponseFactoryDelete(response_factory)); + + // FIXME: Looks like there is an issue with internal request state management. + // If the backend send responses before releasing the requests the state may + // not be set to "RELEASED" which is allowed for converting to "INITIALIZED". + std::this_thread::sleep_for(std::chrono::seconds(2)); p = new std::promise(); future = p->get_future(); @@ -219,6 +228,8 @@ TEST_F(RequestCancellationTest, Cancellation) FAIL_TEST_IF_ERR(TRITONSERVER_InferenceRequestSetResponseCallback( irequest_, allocator_, nullptr /* response_allocator_userp */, InferResponseComplete, reinterpret_cast(p))); + FAIL_TEST_IF_ERR( + TRITONBACKEND_ResponseFactoryNew(&response_factory, backend_request)); // Sending another request and the request should not be cancelled. FAIL_TEST_IF_ERR(TRITONSERVER_ServerInferAsync( @@ -230,7 +241,7 @@ TEST_F(RequestCancellationTest, Cancellation) TRITONSERVER_InferenceRequestIsCancelled(irequest_, &is_cancelled)); ASSERT_FALSE(is_cancelled); - is_cancelled = false; + is_cancelled = true; FAIL_TEST_IF_ERR(TRITONBACKEND_ResponseFactoryIsCancelled( response_factory, &is_cancelled)); ASSERT_FALSE(is_cancelled);