From cc9ec545223199b0d3795e23435185ade544cd3b Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Tue, 20 Feb 2024 16:42:35 -0800 Subject: [PATCH 1/3] Add cancellation into response statistics --- src/backend_model_instance.cc | 4 ++++ src/infer_stats.cc | 27 +++++++++++++++++++++++++++ src/infer_stats.h | 9 ++++++++- src/tritonserver.cc | 3 +++ 4 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/backend_model_instance.cc b/src/backend_model_instance.cc index 5e43e093d..0cd1f43c3 100644 --- a/src/backend_model_instance.cc +++ b/src/backend_model_instance.cc @@ -1103,6 +1103,10 @@ TRITONBACKEND_ModelInstanceReportResponseStatistics( RETURN_TRITONSERVER_ERROR_IF_ERROR( sa->UpdateResponseEmpty(key, rs->response_start, rs->response_end)); } + } else if ( + TRITONSERVER_ErrorCode(rs->error) == TRITONSERVER_ERROR_CANCELLED) { + RETURN_TRITONSERVER_ERROR_IF_ERROR( + sa->UpdateResponseCancel(key, rs->response_start, rs->response_end)); } else { RETURN_TRITONSERVER_ERROR_IF_ERROR(sa->UpdateResponseFail( key, rs->response_start, rs->compute_output_start, rs->response_end)); diff --git a/src/infer_stats.cc b/src/infer_stats.cc index 0f47485c2..68cf70a0c 100644 --- a/src/infer_stats.cc +++ b/src/infer_stats.cc @@ -336,6 +336,33 @@ InferenceStatsAggregator::UpdateResponseEmpty( return Status::Success; } +Status +InferenceStatsAggregator::UpdateResponseCancel( + const std::string& key, const uint64_t response_start_ns, + const uint64_t response_end_ns) +{ + if (response_start_ns > response_end_ns) { + return Status( + Status::Code::INVALID_ARG, + "Response start cannot happen after response end"); + } + const uint64_t total_duration_ns = response_end_ns - response_start_ns; + + { + std::lock_guard lock(mu_); + + auto it = response_stats_.find(key); + if (it == response_stats_.end()) { + it = response_stats_.emplace(key, InferResponseStats()).first; + } + + it->second.cancel_count++; + it->second.cancel_duration_ns += total_duration_ns; + } + + return Status::Success; +} + void InferenceStatsAggregator::UpdateInferBatchStats( MetricModelReporter* metric_reporter, const size_t batch_size, diff --git a/src/infer_stats.h b/src/infer_stats.h index 2ae2bc226..66b3659bd 100644 --- a/src/infer_stats.h +++ b/src/infer_stats.h @@ -84,7 +84,7 @@ class InferenceStatsAggregator { compute_output_count(0), compute_output_duration_ns(0), success_count(0), success_duration_ns(0), fail_count(0), fail_duration_ns(0), empty_response_count(0), - empty_response_duration_ns(0) + empty_response_duration_ns(0), cancel_count(0), cancel_duration_ns(0) { } uint64_t compute_infer_count; @@ -97,6 +97,8 @@ class InferenceStatsAggregator { uint64_t fail_duration_ns; uint64_t empty_response_count; uint64_t empty_response_duration_ns; + uint64_t cancel_count; + uint64_t cancel_duration_ns; }; struct InferBatchStats { @@ -181,6 +183,11 @@ class InferenceStatsAggregator { const std::string& key, const uint64_t response_start_ns, const uint64_t response_end_ns); + // Add durations to response stats for a cancellation response. + Status UpdateResponseCancel( + const std::string& key, const uint64_t response_start_ns, + const uint64_t response_end_ns); + // Add durations to batch infer stats for a batch execution. // 'success_request_count' is the number of success requests in the // batch that have infer_stats attached. diff --git a/src/tritonserver.cc b/src/tritonserver.cc index f2d3c2624..76d65743e 100644 --- a/src/tritonserver.cc +++ b/src/tritonserver.cc @@ -2959,6 +2959,9 @@ TRITONSERVER_ServerModelStatistics( metadata, res_stat, "empty_response", res_pair.second.empty_response_count, res_pair.second.empty_response_duration_ns); + SetDurationStat( + metadata, res_stat, "cancel", res_pair.second.cancel_count, + res_pair.second.cancel_duration_ns); RETURN_IF_STATUS_ERROR( response_stats.Add(res_pair.first.c_str(), std::move(res_stat))); } From 86ea0ddaf1696253b130f23a86bb5d8acc702b1a Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Thu, 22 Feb 2024 14:47:01 -0800 Subject: [PATCH 2/3] Update backend API version --- include/triton/core/tritonbackend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index ad04d57f8..17a9442d9 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -95,7 +95,7 @@ struct TRITONBACKEND_Batcher; /// } /// #define TRITONBACKEND_API_VERSION_MAJOR 1 -#define TRITONBACKEND_API_VERSION_MINOR 19 +#define TRITONBACKEND_API_VERSION_MINOR 20 /// Get the TRITONBACKEND API version supported by Triton. This value /// can be compared against the TRITONBACKEND_API_VERSION_MAJOR and From e535cf7a77ada777c4cecf226e889fe350368f98 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Thu, 22 Feb 2024 14:55:28 -0800 Subject: [PATCH 3/3] Revert "Update backend API version" This reverts commit 86ea0ddaf1696253b130f23a86bb5d8acc702b1a. --- include/triton/core/tritonbackend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index 17a9442d9..ad04d57f8 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -95,7 +95,7 @@ struct TRITONBACKEND_Batcher; /// } /// #define TRITONBACKEND_API_VERSION_MAJOR 1 -#define TRITONBACKEND_API_VERSION_MINOR 20 +#define TRITONBACKEND_API_VERSION_MINOR 19 /// Get the TRITONBACKEND API version supported by Triton. This value /// can be compared against the TRITONBACKEND_API_VERSION_MAJOR and