Skip to content

Commit

Permalink
[STF] Remove unmaintained CUDASTF_DEBUG option (#3944)
Browse files Browse the repository at this point in the history
* Remove outdated debug code that is no longer needed because we have better instrumentation available

* Misc. compilation fixes for CUDASTF_DEBUG

* Entirely remove CUDASTF_DEBUG

* remove more outdated debug code
  • Loading branch information
caugonnet authored Feb 26, 2025
1 parent 2895b96 commit 64bba7b
Show file tree
Hide file tree
Showing 10 changed files with 5 additions and 181 deletions.
4 changes: 1 addition & 3 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@
"cudax_ENABLE_CUDASTF": true,
"cudax_ENABLE_CUDASTF_BOUNDSCHECK": false,
"cudax_ENABLE_CUDASTF_CODE_GENERATION": true,
"cudax_ENABLE_CUDASTF_DEBUG": false,
"cudax_ENABLE_CUDASTF_MATHLIBS": false,
"cudax_ENABLE_DIALECT_CPP17": true,
"cudax_ENABLE_DIALECT_CPP20": true,
Expand All @@ -100,8 +99,7 @@
"CMAKE_BUILD_TYPE": "Debug",
"CMAKE_CUDA_FLAGS": "-G",
"CCCL_ENABLE_BENCHMARKS": false,
"cudax_ENABLE_CUDASTF_BOUNDSCHECK": true,
"cudax_ENABLE_CUDASTF_DEBUG": true
"cudax_ENABLE_CUDASTF_BOUNDSCHECK": true
}
},
{
Expand Down
5 changes: 2 additions & 3 deletions cudax/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,11 @@ option(cudax_ENABLE_EXAMPLES "Build CUDA Experimental's examples." ON)
option(cudax_ENABLE_CUDASTF "Enable CUDASTF subproject" ON)
option(cudax_ENABLE_CUDASTF_CODE_GENERATION "Enable code generation using STF's parallel_for or launch with CUDA compiler." ON)
option(cudax_ENABLE_CUDASTF_BOUNDSCHECK "Enable bounds checks for STF targets. Requires debug build." OFF)
option(cudax_ENABLE_CUDASTF_DEBUG "Enable additional debugging for STF targets. Requires debug build." OFF)
option(cudax_ENABLE_CUDASTF_MATHLIBS "Enable STF tests/examples that use cublas/cusolver." OFF)

if ((cudax_ENABLE_CUDASTF_BOUNDSCHECK OR cudax_ENABLE_CUDASTF_DEBUG) AND
if (cudax_ENABLE_CUDASTF_BOUNDSCHECK AND
NOT CMAKE_BUILD_TYPE MATCHES "Debug" AND NOT CMAKE_BUILD_TYPE MATCHES "RelWithDebInfo")
message(FATAL_ERROR "cudax_ENABLE_CUDASTF_BOUNDSCHECK and cudax_ENABLE_CUDASTF_DEBUG require a Debug build.")
message(FATAL_ERROR "cudax_ENABLE_CUDASTF_BOUNDSCHECK requires a Debug build.")
endif()

include(cmake/cudaxBuildCompilerTargets.cmake)
Expand Down
6 changes: 0 additions & 6 deletions cudax/cmake/cudaxSTFConfigureTarget.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,4 @@ function(cudax_stf_configure_target target_name)
"CUDASTF_BOUNDSCHECK"
)
endif()

if (cudax_ENABLE_CUDASTF_DEBUG)
target_compile_definitions(${target_name} PRIVATE
"CUDASTF_DEBUG"
)
endif()
endfunction()
52 changes: 0 additions & 52 deletions cudax/include/cuda/experimental/__stf/graph/graph_ctx.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -39,29 +39,6 @@
namespace cuda::experimental::stf
{

namespace reserved
{

// For counters
class graph_tag
{
public:
class launch
{};
class instantiate
{};
class update
{
public:
class success
{};
class failure
{};
};
};

} // end namespace reserved

/**
* @brief Uncached allocator (used as a base for other allocators)
*
Expand Down Expand Up @@ -348,23 +325,6 @@ public:
state.submitted_stream = nullptr;
state.cleanup();
set_phase(backend_ctx_untyped::phase::finalized);

#ifdef CUDASTF_DEBUG
const char* display_stats_env = getenv("CUDASTF_DISPLAY_STATS");
if (!display_stats_env || atoi(display_stats_env) == 0)
{
return;
}

fprintf(
stderr, "[STATS CUDA GRAPHS] instantiated=%lu\n", reserved::counter<reserved::graph_tag::instantiate>.load());
fprintf(stderr, "[STATS CUDA GRAPHS] launched=%lu\n", reserved::counter<reserved::graph_tag::launch>.load());
fprintf(stderr,
"[STATS CUDA GRAPHS] updated=%lu success=%ld failed=%ld\n",
reserved::counter<reserved::graph_tag::update>.load(),
reserved::counter<reserved::graph_tag::update::success>.load(),
reserved::counter<reserved::graph_tag::update::failure>.load());
#endif
}

void submit(cudaStream_t stream = nullptr)
Expand Down Expand Up @@ -394,10 +354,6 @@ public:

cuda_try(cudaGraphLaunch(*state.exec_graph, state.submitted_stream));

#ifdef CUDASTF_DEBUG
reserved::counter<reserved::graph_tag::launch>.increment();
#endif

// Note that we comment this out for now, so that it is possible to use
// the print_to_dot method; but we may perhaps discard this graph to
// some dedicated member variable.
Expand Down Expand Up @@ -614,10 +570,6 @@ private:

cuda_try(cudaGraphInstantiateWithFlags(res.get(), g, 0));

#ifdef CUDASTF_DEBUG
reserved::counter<reserved::graph_tag::instantiate>.increment();
#endif

return res;
}

Expand Down Expand Up @@ -712,10 +664,6 @@ private:

cuda_try(cudaGraphLaunch(local_exec_graph, state.submitted_stream));

#ifdef CUDASTF_DEBUG
reserved::counter<reserved::graph_tag::launch>.increment();
#endif

return state.submitted_stream;
}

Expand Down
10 changes: 0 additions & 10 deletions cudax/include/cuda/experimental/__stf/internal/async_prereq.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -433,15 +433,6 @@ inline event_list event_impl::from_stream(backend_ctx_untyped&, cudaStream_t) co
}
_CCCL_DIAG_POP

namespace reserved
{

// For counters
class join_tag
{};

} // end namespace reserved

/**
* @brief Introduce a dependency from all entries of an event list to an event.
Expand Down Expand Up @@ -474,7 +465,6 @@ void join(context_t& ctx, some_event& to, event_list& prereq_in)
{
from = static_cast<some_event*>(item.operator->());
}
reserved::counter<reserved::join_tag>::increment();
to.insert_dep(ctx.async_resources(), *from);
from->outbound_deps++;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -554,11 +554,8 @@ protected:
reserved::dot::instance().is_timing());

// We generate symbols if we may use them
#ifdef CUDASTF_DEBUG
generate_event_symbols = true;
#else
generate_event_symbols = dot->is_tracing_prereqs();
#endif

// Record it in the list of all traced contexts
reserved::dot::instance().per_ctx.push_back(dot);
}
Expand All @@ -575,8 +572,6 @@ protected:
}

display_transfers();

fprintf(stderr, "TOTAL SYNC COUNT: %lu\n", reserved::counter<reserved::join_tag>::load());
}

impl(const impl&) = delete;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,6 @@ inline bool try_updating_executable_graph(cudaGraphExec_t exec_graph, cudaGraph_
// Be sure to "erase" the last error
cudaError_t res = cudaGetLastError();

#ifdef CUDASTF_DEBUG
reserved::counter<reserved::graph_tag::update>.increment();
if (res == cudaSuccess)
{
reserved::counter<reserved::graph_tag::update::success>.increment();
}
else
{
reserved::counter<reserved::graph_tag::update::failure>.increment();
}
#endif

return (res == cudaSuccess);
}

Expand All @@ -81,10 +69,6 @@ inline ::std::shared_ptr<cudaGraphExec_t> graph_instantiate(cudaGraph_t g)

cuda_try(cudaGraphInstantiateWithFlags(res.get(), g, 0));

#ifdef CUDASTF_DEBUG
reserved::counter<reserved::graph_tag::instantiate>.increment();
#endif

return res;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,6 @@ inline event join_with_stream(

using stream_and_event_vector = small_vector<reserved::handle<stream_and_event>, 7>;

/* Tag types for event counters */
class cuda_event_tag
{
public:
class created
{};
class alive
{};
class destroyed
{};
};

class cuda_stream_wait_event_tag
{};

} // namespace reserved

/* This event type allows to synchronize a CUDA stream with a CUDA event in
Expand All @@ -77,10 +62,6 @@ protected:
if (cudaEvent)
{
cuda_safe_call(cudaEventDestroy(cudaEvent));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_event_tag::destroyed>::increment();
reserved::counter<reserved::cuda_event_tag::alive>::decrement();
#endif

// fprintf(stderr, "DESTROY EVENT %p #%d (created %d)\n", event, ++destroyed_event_cnt,
// event_cnt);
Expand Down Expand Up @@ -129,27 +110,13 @@ public:
// Disable timing to avoid implicit barriers
cudaEvent_t sync_event;
cuda_safe_call(cudaEventCreateWithFlags(&sync_event, cudaEventDisableTiming));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_event_tag::created>::increment();
reserved::counter<reserved::cuda_event_tag::alive>::increment();
reserved::high_water_mark<reserved::cuda_event_tag::alive>::record(
reserved::counter<cuda_event_tag::alive>::load());
#endif

cuda_safe_call(cudaEventRecord(sync_event, s2));

// According to documentation "event may be from a different device than stream."
cuda_safe_call(cudaStreamWaitEvent(s1, sync_event, 0));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_stream_wait_event_tag>.increment();
#endif

// Asynchronously destroy event to avoid a memleak
cuda_safe_call(cudaEventDestroy(sync_event));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_event_tag::destroyed>::increment();
reserved::counter<reserved::cuda_event_tag::alive>::decrement();
#endif
};
}

Expand All @@ -167,12 +134,6 @@ public:
cuda_safe_call(cudaEventCreateWithFlags(&cudaEvent, cudaEventDisableTiming));
// fprintf(stderr, "CREATE EVENT %p %s\n", cudaEvent, get_symbol().c_str());
assert(cudaEvent);
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_event_tag::created>::increment();
reserved::counter<reserved::cuda_event_tag::alive>::increment();
reserved::high_water_mark<reserved::cuda_event_tag::alive>::record(
reserved::counter<cuda_event_tag::alive>::load());
#endif
cuda_safe_call(cudaEventRecord(cudaEvent, dstream.stream));
};
}
Expand All @@ -186,9 +147,6 @@ public:
if (!skip)
{
cuda_safe_call(cudaStreamWaitEvent(dstream.stream, from.cudaEvent, 0));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_stream_wait_event_tag>.increment();
#endif
}
}
}
Expand Down Expand Up @@ -415,9 +373,6 @@ private:
if (!skip)
{
cuda_safe_call(cudaStreamWaitEvent(dstream.stream, se->get_cuda_event(), 0));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_stream_wait_event_tag>.increment();
#endif
}
}
se->outbound_deps++;
Expand Down
33 changes: 1 addition & 32 deletions cudax/include/cuda/experimental/__stf/stream/stream_ctx.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <cuda/experimental/__stf/stream/interfaces/slice.cuh> // For implicit logical_data_untyped constructors
#include <cuda/experimental/__stf/stream/interfaces/void_interface.cuh>
#include <cuda/experimental/__stf/stream/stream_task.cuh>
#include <cuda/experimental/__stf/utility/threads.cuh> // for reserved::counter

namespace cuda::experimental::stf
{
Expand Down Expand Up @@ -516,38 +517,6 @@ public:
}
state.cleanup();
set_phase(backend_ctx_untyped::phase::finalized);

#ifdef CUDASTF_DEBUG
// Ensure that the total number of CUDA events created corresponds to
// the number of events destroyed
const auto alive = reserved::counter<reserved::cuda_event_tag::alive>.load();
if (alive != 0)
{
fprintf(stderr,
"WARNING!!! %lu CUDA events leaked (approx %lu created vs. %lu destroyed).\n",
alive,
reserved::counter<reserved::cuda_event_tag::created>.load(),
reserved::counter<reserved::cuda_event_tag::destroyed>.load());
}

assert(alive == 0);

const char* display_stats_env = getenv("CUDASTF_DISPLAY_STATS");
if (!display_stats_env || atoi(display_stats_env) == 0)
{
return;
}

fprintf(stderr,
"[STATS CUDA EVENTS] created=%lu destroyed=%lu alive=%lu reserved::high_water_mark=%lu\n",
reserved::counter<reserved::cuda_event_tag::created>.load(),
reserved::counter<reserved::cuda_event_tag::destroyed>.load(),
alive,
reserved::high_water_mark<reserved::cuda_event_tag>.load());
fprintf(stderr,
"[STATS CUDA EVENTS] cuda_stream_wait_event=%lu\n",
reserved::counter<reserved::cuda_stream_wait_event_tag>.load());
#endif
}

float get_submission_time_ms() const
Expand Down
8 changes: 0 additions & 8 deletions cudax/include/cuda/experimental/__stf/stream/stream_task.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -415,10 +415,6 @@ private:
cudaEvent_t sync_event;
// Disable timing to avoid implicit barriers
cuda_safe_call(cudaEventCreateWithFlags(&sync_event, cudaEventDisableTiming));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_event_tag::created> ++;
reserved::high_water_mark<reserved::cuda_event_tag>.record(++reserved::counter<cuda_event_tag::alive>);
#endif

cuda_safe_call(cudaEventRecord(sync_event, streams[0].stream));

Expand All @@ -430,10 +426,6 @@ private:

// Asynchronously destroy event to avoid a memleak
cuda_safe_call(cudaEventDestroy(sync_event));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_event_tag::destroyed>.increment();
reserved::counter<reserved::cuda_event_tag::alive>.decrement();
#endif

if (current_dev != s0_dev)
{
Expand Down

0 comments on commit 64bba7b

Please sign in to comment.