diff --git a/src/infer_response.cc b/src/infer_response.cc index 8e3e2712..09737b26 100644 --- a/src/infer_response.cc +++ b/src/infer_response.cc @@ -249,9 +249,11 @@ InferResponse::Send( } bool cuda_copy = false; +#ifdef TRITON_ENABLE_GPU // This variable is used to avoid printing the same message multiple times // when the output tensor is failed to be allocated from the CUDA memory pool. bool log_warning = true; +#endif // TRITON_ENABLE_GPU for (auto& output_tensor : OutputTensors()) { // FIXME: for decoupled models we will skip the requested output names.