Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[STF] Remove unmaintained CUDASTF_DEBUG option #3944

Merged
merged 7 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@
"cudax_ENABLE_CUDASTF": true,
"cudax_ENABLE_CUDASTF_BOUNDSCHECK": false,
"cudax_ENABLE_CUDASTF_CODE_GENERATION": true,
"cudax_ENABLE_CUDASTF_DEBUG": false,
"cudax_ENABLE_CUDASTF_MATHLIBS": false,
"cudax_ENABLE_DIALECT_CPP17": true,
"cudax_ENABLE_DIALECT_CPP20": true,
Expand All @@ -100,8 +99,7 @@
"CMAKE_BUILD_TYPE": "Debug",
"CMAKE_CUDA_FLAGS": "-G",
"CCCL_ENABLE_BENCHMARKS": false,
"cudax_ENABLE_CUDASTF_BOUNDSCHECK": true,
"cudax_ENABLE_CUDASTF_DEBUG": true
"cudax_ENABLE_CUDASTF_BOUNDSCHECK": true
}
},
{
Expand Down
5 changes: 2 additions & 3 deletions cudax/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,11 @@ option(cudax_ENABLE_EXAMPLES "Build CUDA Experimental's examples." ON)
option(cudax_ENABLE_CUDASTF "Enable CUDASTF subproject" ON)
option(cudax_ENABLE_CUDASTF_CODE_GENERATION "Enable code generation using STF's parallel_for or launch with CUDA compiler." ON)
option(cudax_ENABLE_CUDASTF_BOUNDSCHECK "Enable bounds checks for STF targets. Requires debug build." OFF)
option(cudax_ENABLE_CUDASTF_DEBUG "Enable additional debugging for STF targets. Requires debug build." OFF)
option(cudax_ENABLE_CUDASTF_MATHLIBS "Enable STF tests/examples that use cublas/cusolver." OFF)

if ((cudax_ENABLE_CUDASTF_BOUNDSCHECK OR cudax_ENABLE_CUDASTF_DEBUG) AND
if (cudax_ENABLE_CUDASTF_BOUNDSCHECK AND
NOT CMAKE_BUILD_TYPE MATCHES "Debug" AND NOT CMAKE_BUILD_TYPE MATCHES "RelWithDebInfo")
message(FATAL_ERROR "cudax_ENABLE_CUDASTF_BOUNDSCHECK and cudax_ENABLE_CUDASTF_DEBUG require a Debug build.")
message(FATAL_ERROR "cudax_ENABLE_CUDASTF_BOUNDSCHECK requires a Debug build.")
endif()

include(cmake/cudaxBuildCompilerTargets.cmake)
Expand Down
6 changes: 0 additions & 6 deletions cudax/cmake/cudaxSTFConfigureTarget.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,4 @@ function(cudax_stf_configure_target target_name)
"CUDASTF_BOUNDSCHECK"
)
endif()

if (cudax_ENABLE_CUDASTF_DEBUG)
target_compile_definitions(${target_name} PRIVATE
"CUDASTF_DEBUG"
)
endif()
endfunction()
52 changes: 0 additions & 52 deletions cudax/include/cuda/experimental/__stf/graph/graph_ctx.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -39,29 +39,6 @@
namespace cuda::experimental::stf
{

namespace reserved
{

// For counters
class graph_tag
{
public:
class launch
{};
class instantiate
{};
class update
{
public:
class success
{};
class failure
{};
};
};

} // end namespace reserved

/**
* @brief Uncached allocator (used as a base for other allocators)
*
Expand Down Expand Up @@ -348,23 +325,6 @@ public:
state.submitted_stream = nullptr;
state.cleanup();
set_phase(backend_ctx_untyped::phase::finalized);

#ifdef CUDASTF_DEBUG
const char* display_stats_env = getenv("CUDASTF_DISPLAY_STATS");
if (!display_stats_env || atoi(display_stats_env) == 0)
{
return;
}

fprintf(
stderr, "[STATS CUDA GRAPHS] instantiated=%lu\n", reserved::counter<reserved::graph_tag::instantiate>.load());
fprintf(stderr, "[STATS CUDA GRAPHS] launched=%lu\n", reserved::counter<reserved::graph_tag::launch>.load());
fprintf(stderr,
"[STATS CUDA GRAPHS] updated=%lu success=%ld failed=%ld\n",
reserved::counter<reserved::graph_tag::update>.load(),
reserved::counter<reserved::graph_tag::update::success>.load(),
reserved::counter<reserved::graph_tag::update::failure>.load());
#endif
}

void submit(cudaStream_t stream = nullptr)
Expand Down Expand Up @@ -394,10 +354,6 @@ public:

cuda_try(cudaGraphLaunch(*state.exec_graph, state.submitted_stream));

#ifdef CUDASTF_DEBUG
reserved::counter<reserved::graph_tag::launch>.increment();
#endif

// Note that we comment this out for now, so that it is possible to use
// the print_to_dot method; but we may perhaps discard this graph to
// some dedicated member variable.
Expand Down Expand Up @@ -614,10 +570,6 @@ private:

cuda_try(cudaGraphInstantiateWithFlags(res.get(), g, 0));

#ifdef CUDASTF_DEBUG
reserved::counter<reserved::graph_tag::instantiate>.increment();
#endif

return res;
}

Expand Down Expand Up @@ -712,10 +664,6 @@ private:

cuda_try(cudaGraphLaunch(local_exec_graph, state.submitted_stream));

#ifdef CUDASTF_DEBUG
reserved::counter<reserved::graph_tag::launch>.increment();
#endif

return state.submitted_stream;
}

Expand Down
10 changes: 0 additions & 10 deletions cudax/include/cuda/experimental/__stf/internal/async_prereq.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -433,15 +433,6 @@ inline event_list event_impl::from_stream(backend_ctx_untyped&, cudaStream_t) co
}
_CCCL_DIAG_POP

namespace reserved
{

// For counters
class join_tag
{};

} // end namespace reserved

/**
* @brief Introduce a dependency from all entries of an event list to an event.

Expand Down Expand Up @@ -474,7 +465,6 @@ void join(context_t& ctx, some_event& to, event_list& prereq_in)
{
from = static_cast<some_event*>(item.operator->());
}
reserved::counter<reserved::join_tag>::increment();
to.insert_dep(ctx.async_resources(), *from);
from->outbound_deps++;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -554,11 +554,8 @@ protected:
reserved::dot::instance().is_timing());

// We generate symbols if we may use them
#ifdef CUDASTF_DEBUG
generate_event_symbols = true;
#else
generate_event_symbols = dot->is_tracing_prereqs();
#endif

// Record it in the list of all traced contexts
reserved::dot::instance().per_ctx.push_back(dot);
}
Expand All @@ -575,8 +572,6 @@ protected:
}

display_transfers();

fprintf(stderr, "TOTAL SYNC COUNT: %lu\n", reserved::counter<reserved::join_tag>::load());
}

impl(const impl&) = delete;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,6 @@ inline bool try_updating_executable_graph(cudaGraphExec_t exec_graph, cudaGraph_
// Be sure to "erase" the last error
cudaError_t res = cudaGetLastError();

#ifdef CUDASTF_DEBUG
reserved::counter<reserved::graph_tag::update>.increment();
if (res == cudaSuccess)
{
reserved::counter<reserved::graph_tag::update::success>.increment();
}
else
{
reserved::counter<reserved::graph_tag::update::failure>.increment();
}
#endif

return (res == cudaSuccess);
}

Expand All @@ -81,10 +69,6 @@ inline ::std::shared_ptr<cudaGraphExec_t> graph_instantiate(cudaGraph_t g)

cuda_try(cudaGraphInstantiateWithFlags(res.get(), g, 0));

#ifdef CUDASTF_DEBUG
reserved::counter<reserved::graph_tag::instantiate>.increment();
#endif

return res;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,6 @@ inline event join_with_stream(

using stream_and_event_vector = small_vector<reserved::handle<stream_and_event>, 7>;

/* Tag types for event counters */
class cuda_event_tag
{
public:
class created
{};
class alive
{};
class destroyed
{};
};

class cuda_stream_wait_event_tag
{};

} // namespace reserved

/* This event type allows to synchronize a CUDA stream with a CUDA event in
Expand All @@ -77,10 +62,6 @@ protected:
if (cudaEvent)
{
cuda_safe_call(cudaEventDestroy(cudaEvent));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_event_tag::destroyed>::increment();
reserved::counter<reserved::cuda_event_tag::alive>::decrement();
#endif

// fprintf(stderr, "DESTROY EVENT %p #%d (created %d)\n", event, ++destroyed_event_cnt,
// event_cnt);
Expand Down Expand Up @@ -129,27 +110,13 @@ public:
// Disable timing to avoid implicit barriers
cudaEvent_t sync_event;
cuda_safe_call(cudaEventCreateWithFlags(&sync_event, cudaEventDisableTiming));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_event_tag::created>::increment();
reserved::counter<reserved::cuda_event_tag::alive>::increment();
reserved::high_water_mark<reserved::cuda_event_tag::alive>::record(
reserved::counter<cuda_event_tag::alive>::load());
#endif

cuda_safe_call(cudaEventRecord(sync_event, s2));

// According to documentation "event may be from a different device than stream."
cuda_safe_call(cudaStreamWaitEvent(s1, sync_event, 0));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_stream_wait_event_tag>.increment();
#endif

// Asynchronously destroy event to avoid a memleak
cuda_safe_call(cudaEventDestroy(sync_event));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_event_tag::destroyed>::increment();
reserved::counter<reserved::cuda_event_tag::alive>::decrement();
#endif
};
}

Expand All @@ -167,12 +134,6 @@ public:
cuda_safe_call(cudaEventCreateWithFlags(&cudaEvent, cudaEventDisableTiming));
// fprintf(stderr, "CREATE EVENT %p %s\n", cudaEvent, get_symbol().c_str());
assert(cudaEvent);
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_event_tag::created>::increment();
reserved::counter<reserved::cuda_event_tag::alive>::increment();
reserved::high_water_mark<reserved::cuda_event_tag::alive>::record(
reserved::counter<cuda_event_tag::alive>::load());
#endif
cuda_safe_call(cudaEventRecord(cudaEvent, dstream.stream));
};
}
Expand All @@ -186,9 +147,6 @@ public:
if (!skip)
{
cuda_safe_call(cudaStreamWaitEvent(dstream.stream, from.cudaEvent, 0));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_stream_wait_event_tag>.increment();
#endif
}
}
}
Expand Down Expand Up @@ -415,9 +373,6 @@ private:
if (!skip)
{
cuda_safe_call(cudaStreamWaitEvent(dstream.stream, se->get_cuda_event(), 0));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_stream_wait_event_tag>.increment();
#endif
}
}
se->outbound_deps++;
Expand Down
33 changes: 1 addition & 32 deletions cudax/include/cuda/experimental/__stf/stream/stream_ctx.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <cuda/experimental/__stf/stream/interfaces/slice.cuh> // For implicit logical_data_untyped constructors
#include <cuda/experimental/__stf/stream/interfaces/void_interface.cuh>
#include <cuda/experimental/__stf/stream/stream_task.cuh>
#include <cuda/experimental/__stf/utility/threads.cuh> // for reserved::counter

namespace cuda::experimental::stf
{
Expand Down Expand Up @@ -516,38 +517,6 @@ public:
}
state.cleanup();
set_phase(backend_ctx_untyped::phase::finalized);

#ifdef CUDASTF_DEBUG
// Ensure that the total number of CUDA events created corresponds to
// the number of events destroyed
const auto alive = reserved::counter<reserved::cuda_event_tag::alive>.load();
if (alive != 0)
{
fprintf(stderr,
"WARNING!!! %lu CUDA events leaked (approx %lu created vs. %lu destroyed).\n",
alive,
reserved::counter<reserved::cuda_event_tag::created>.load(),
reserved::counter<reserved::cuda_event_tag::destroyed>.load());
}

assert(alive == 0);

const char* display_stats_env = getenv("CUDASTF_DISPLAY_STATS");
if (!display_stats_env || atoi(display_stats_env) == 0)
{
return;
}

fprintf(stderr,
"[STATS CUDA EVENTS] created=%lu destroyed=%lu alive=%lu reserved::high_water_mark=%lu\n",
reserved::counter<reserved::cuda_event_tag::created>.load(),
reserved::counter<reserved::cuda_event_tag::destroyed>.load(),
alive,
reserved::high_water_mark<reserved::cuda_event_tag>.load());
fprintf(stderr,
"[STATS CUDA EVENTS] cuda_stream_wait_event=%lu\n",
reserved::counter<reserved::cuda_stream_wait_event_tag>.load());
#endif
}

float get_submission_time_ms() const
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -415,10 +415,6 @@ private:
cudaEvent_t sync_event;
// Disable timing to avoid implicit barriers
cuda_safe_call(cudaEventCreateWithFlags(&sync_event, cudaEventDisableTiming));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_event_tag::created> ++;
reserved::high_water_mark<reserved::cuda_event_tag>.record(++reserved::counter<cuda_event_tag::alive>);
#endif

cuda_safe_call(cudaEventRecord(sync_event, streams[0].stream));

Expand All @@ -430,10 +426,6 @@ private:

// Asynchronously destroy event to avoid a memleak
cuda_safe_call(cudaEventDestroy(sync_event));
#ifdef CUDASTF_DEBUG
reserved::counter<reserved::cuda_event_tag::destroyed>.increment();
reserved::counter<reserved::cuda_event_tag::alive>.decrement();
#endif

if (current_dev != s0_dev)
{
Expand Down
Loading