Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cancellation into response statistics #329

Merged
merged 3 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/triton/core/tritonbackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ struct TRITONBACKEND_Batcher;
/// }
///
#define TRITONBACKEND_API_VERSION_MAJOR 1
#define TRITONBACKEND_API_VERSION_MINOR 19
#define TRITONBACKEND_API_VERSION_MINOR 20
kthui marked this conversation as resolved.
Show resolved Hide resolved

/// Get the TRITONBACKEND API version supported by Triton. This value
/// can be compared against the TRITONBACKEND_API_VERSION_MAJOR and
Expand Down
4 changes: 4 additions & 0 deletions src/backend_model_instance.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,10 @@ TRITONBACKEND_ModelInstanceReportResponseStatistics(
RETURN_TRITONSERVER_ERROR_IF_ERROR(
sa->UpdateResponseEmpty(key, rs->response_start, rs->response_end));
}
} else if (
TRITONSERVER_ErrorCode(rs->error) == TRITONSERVER_ERROR_CANCELLED) {
RETURN_TRITONSERVER_ERROR_IF_ERROR(
sa->UpdateResponseCancel(key, rs->response_start, rs->response_end));
} else {
RETURN_TRITONSERVER_ERROR_IF_ERROR(sa->UpdateResponseFail(
key, rs->response_start, rs->compute_output_start, rs->response_end));
Expand Down
27 changes: 27 additions & 0 deletions src/infer_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,33 @@ InferenceStatsAggregator::UpdateResponseEmpty(
return Status::Success;
}

Status
InferenceStatsAggregator::UpdateResponseCancel(
const std::string& key, const uint64_t response_start_ns,
const uint64_t response_end_ns)
{
if (response_start_ns > response_end_ns) {
return Status(
Status::Code::INVALID_ARG,
"Response start cannot happen after response end");
}
const uint64_t total_duration_ns = response_end_ns - response_start_ns;

{
std::lock_guard<std::mutex> lock(mu_);

auto it = response_stats_.find(key);
if (it == response_stats_.end()) {
it = response_stats_.emplace(key, InferResponseStats()).first;
}

it->second.cancel_count++;
it->second.cancel_duration_ns += total_duration_ns;
}

return Status::Success;
}

void
InferenceStatsAggregator::UpdateInferBatchStats(
MetricModelReporter* metric_reporter, const size_t batch_size,
Expand Down
9 changes: 8 additions & 1 deletion src/infer_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class InferenceStatsAggregator {
compute_output_count(0), compute_output_duration_ns(0),
success_count(0), success_duration_ns(0), fail_count(0),
fail_duration_ns(0), empty_response_count(0),
empty_response_duration_ns(0)
empty_response_duration_ns(0), cancel_count(0), cancel_duration_ns(0)
{
}
uint64_t compute_infer_count;
Expand All @@ -97,6 +97,8 @@ class InferenceStatsAggregator {
uint64_t fail_duration_ns;
uint64_t empty_response_count;
uint64_t empty_response_duration_ns;
uint64_t cancel_count;
uint64_t cancel_duration_ns;
};

struct InferBatchStats {
Expand Down Expand Up @@ -181,6 +183,11 @@ class InferenceStatsAggregator {
const std::string& key, const uint64_t response_start_ns,
const uint64_t response_end_ns);

// Add durations to response stats for a cancellation response.
Status UpdateResponseCancel(
const std::string& key, const uint64_t response_start_ns,
const uint64_t response_end_ns);

// Add durations to batch infer stats for a batch execution.
// 'success_request_count' is the number of success requests in the
// batch that have infer_stats attached.
Expand Down
3 changes: 3 additions & 0 deletions src/tritonserver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2959,6 +2959,9 @@ TRITONSERVER_ServerModelStatistics(
metadata, res_stat, "empty_response",
res_pair.second.empty_response_count,
res_pair.second.empty_response_duration_ns);
SetDurationStat(
metadata, res_stat, "cancel", res_pair.second.cancel_count,
res_pair.second.cancel_duration_ns);
RETURN_IF_STATUS_ERROR(
response_stats.Add(res_pair.first.c_str(), std::move(res_stat)));
}
Expand Down
Loading