diff --git a/graphbolt/src/cuda/common.h b/graphbolt/src/cuda/common.h index c2ffc719c438..b18258a1dbb6 100644 --- a/graphbolt/src/cuda/common.h +++ b/graphbolt/src/cuda/common.h @@ -58,7 +58,9 @@ struct CUDAWorkspaceAllocator { // Required by thrust to satisfy allocator requirements. using value_type = value_t; - explicit CUDAWorkspaceAllocator() { at::globalContext().lazyInitCUDA(); } + explicit CUDAWorkspaceAllocator() { + at::globalContext().lazyInitDevice(at::kCUDA); + } template CUDAWorkspaceAllocator(CUDAWorkspaceAllocator const&) noexcept {} diff --git a/tensoradapter/pytorch/torch.cpp b/tensoradapter/pytorch/torch.cpp index e02b02e46dda..3e410f01c7ba 100644 --- a/tensoradapter/pytorch/torch.cpp +++ b/tensoradapter/pytorch/torch.cpp @@ -28,7 +28,7 @@ TA_EXPORTS void CPURawDelete(void* ptr) { #ifdef DGL_USE_CUDA TA_EXPORTS void* CUDARawAlloc(size_t nbytes, cudaStream_t stream) { - at::globalContext().lazyInitCUDA(); + at::globalContext().lazyInitDevice(at::kCUDA); return c10::cuda::CUDACachingAllocator::raw_alloc_with_stream(nbytes, stream); }