Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modernize MSVC 2005/nvcc workaround #3606

Merged
merged 1 commit into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 22 additions & 36 deletions thrust/thrust/system/cuda/detail/assign_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,60 +38,46 @@
THRUST_NAMESPACE_BEGIN
namespace cuda_cub
{

template <typename DerivedPolicy, typename Pointer1, typename Pointer2>
inline _CCCL_HOST_DEVICE void
assign_value(thrust::cuda::execution_policy<DerivedPolicy>& exec, Pointer1 dst, Pointer2 src)
_CCCL_HOST_DEVICE void assign_value(execution_policy<DerivedPolicy>& exec, Pointer1 dst, Pointer2 src)
{
// XXX war nvbugs/881631
struct war_nvbugs_881631
// Because of https://docs.nvidia.com/cuda/cuda-c-programming-guide/#cuda-arch point 2., if a call from a __host__
// __device__ function leads to the template instantiation of a __global__ function, then this instantiation needs to
// happen regardless of whether __CUDA_ARCH__ is defined. Therefore, we make the host path visible outside the
// NV_IF_TARGET switch. See also NVBug 881631.
struct HostPath
{
_CCCL_HOST inline static void
host_path(thrust::cuda::execution_policy<DerivedPolicy>& exec, Pointer1 dst, Pointer2 src)
_CCCL_HOST auto operator()(execution_policy<DerivedPolicy>& exec, Pointer1 dst, Pointer2 src)
{
cuda_cub::copy(exec, src, src + 1, dst);
}

_CCCL_DEVICE inline static void
device_path(thrust::cuda::execution_policy<DerivedPolicy>&, Pointer1 dst, Pointer2 src)
{
*thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src);
}
};

NV_IF_TARGET(
NV_IS_HOST, (war_nvbugs_881631::host_path(exec, dst, src);), (war_nvbugs_881631::device_path(exec, dst, src);));

} // end assign_value()
NV_IS_HOST, (HostPath{}(exec, dst, src);), *thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src););
}

template <typename System1, typename System2, typename Pointer1, typename Pointer2>
inline _CCCL_HOST_DEVICE void assign_value(cross_system<System1, System2>& systems, Pointer1 dst, Pointer2 src)
_CCCL_HOST_DEVICE void assign_value(cross_system<System1, System2>& systems, Pointer1 dst, Pointer2 src)
{
// XXX war nvbugs/881631
struct war_nvbugs_881631
// Because of https://docs.nvidia.com/cuda/cuda-c-programming-guide/#cuda-arch point 2., if a call from a __host__
// __device__ function leads to the template instantiation of a __global__ function, then this instantiation needs to
// happen regardless of whether __CUDA_ARCH__ is defined. Therefore, we make the host path visible outside the
// NV_IF_TARGET switch. See also NVBug 881631.
struct HostPath
{
_CCCL_HOST inline static void host_path(cross_system<System1, System2>& systems, Pointer1 dst, Pointer2 src)
_CCCL_HOST auto operator()(cross_system<System1, System2>& systems, Pointer1 dst, Pointer2 src)
{
// rotate the systems so that they are ordered the same as (src, dst)
// for the call to thrust::copy
// rotate the systems so that they are ordered the same as (src, dst) for the call to thrust::copy
cross_system<System2, System1> rotated_systems = systems.rotate();
cuda_cub::copy(rotated_systems, src, src + 1, dst);
}

_CCCL_DEVICE inline static void device_path(cross_system<System1, System2>&, Pointer1 dst, Pointer2 src)
{
// XXX forward the true cuda::execution_policy inside systems here
// instead of materializing a tag
thrust::cuda::tag cuda_tag;
thrust::cuda_cub::assign_value(cuda_tag, dst, src);
}
};

NV_IF_TARGET(NV_IS_HOST,
(war_nvbugs_881631::host_path(systems, dst, src);),
(war_nvbugs_881631::device_path(systems, dst, src);));
} // end assign_value()

(HostPath{}(systems, dst, src);),
(
// XXX forward the true cuda::execution_policy inside systems here instead of materializing a tag
cuda::tag cuda_tag; cuda_cub::assign_value(cuda_tag, dst, src);));
}
} // namespace cuda_cub
THRUST_NAMESPACE_END
#endif
50 changes: 13 additions & 37 deletions thrust/thrust/system/cuda/detail/get_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,51 +38,27 @@
THRUST_NAMESPACE_BEGIN
namespace cuda_cub
{

namespace
{

template <typename DerivedPolicy, typename Pointer>
inline _CCCL_HOST_DEVICE typename thrust::iterator_value<Pointer>::type
get_value_msvc2005_war(execution_policy<DerivedPolicy>& exec, Pointer ptr)
_CCCL_HOST_DEVICE iterator_value_t<Pointer> get_value(execution_policy<DerivedPolicy>& exec, Pointer ptr)
{
using result_type = typename thrust::iterator_value<Pointer>::type;

// XXX war nvbugs/881631
struct war_nvbugs_881631
// Because of https://docs.nvidia.com/cuda/cuda-c-programming-guide/#cuda-arch point 2., if a call from a __host__
// __device__ function leads to the template instantiation of a __global__ function, then this instantiation needs to
// happen regardless of whether __CUDA_ARCH__ is defined. Therefore, we make the host path visible outside the
// NV_IF_TARGET switch. See also NVBug 881631.
struct HostPath
{
_CCCL_HOST inline static result_type host_path(execution_policy<DerivedPolicy>& exec, Pointer ptr)
_CCCL_HOST auto operator()(execution_policy<DerivedPolicy>& exec, Pointer ptr)
{
// when called from host code, implement with assign_value
// note that this requires a type with default constructor
result_type result;

thrust::host_system_tag host_tag;
cross_system<thrust::host_system_tag, DerivedPolicy> systems(host_tag, exec);
// implemented with assign_value, which requires a type with a default constructor
iterator_value_t<Pointer> result;
host_system_tag host_tag;
cross_system<host_system_tag, DerivedPolicy> systems(host_tag, exec);
assign_value(systems, &result, ptr);

return result;
}

_CCCL_DEVICE inline static result_type device_path(execution_policy<DerivedPolicy>&, Pointer ptr)
{
// when called from device code, just do simple deref
return *thrust::raw_pointer_cast(ptr);
}
};

NV_IF_TARGET(
NV_IS_HOST, (return war_nvbugs_881631::host_path(exec, ptr);), (return war_nvbugs_881631::device_path(exec, ptr);))
} // end get_value_msvc2005_war()
} // namespace

template <typename DerivedPolicy, typename Pointer>
inline _CCCL_HOST_DEVICE typename thrust::iterator_value<Pointer>::type
get_value(execution_policy<DerivedPolicy>& exec, Pointer ptr)
{
return get_value_msvc2005_war(exec, ptr);
} // end get_value()

NV_IF_TARGET(NV_IS_DEVICE, return *thrust::raw_pointer_cast(ptr);, (return HostPath{}(exec, ptr);))
}
} // namespace cuda_cub
THRUST_NAMESPACE_END

Expand Down
Loading