diff --git a/thrust/thrust/system/cuda/detail/assign_value.h b/thrust/thrust/system/cuda/detail/assign_value.h
index 395dce9341f..bc5f02b702c 100644
--- a/thrust/thrust/system/cuda/detail/assign_value.h
+++ b/thrust/thrust/system/cuda/detail/assign_value.h
@@ -38,60 +38,37 @@
 THRUST_NAMESPACE_BEGIN
 namespace cuda_cub
 {
-
 template <typename DerivedPolicy, typename Pointer1, typename Pointer2>
-inline _CCCL_HOST_DEVICE void
-assign_value(thrust::cuda::execution_policy<DerivedPolicy>& exec, Pointer1 dst, Pointer2 src)
+_CCCL_HOST_DEVICE void assign_value(execution_policy<DerivedPolicy>& exec, Pointer1 dst, Pointer2 src)
 {
-  // XXX war nvbugs/881631
-  struct war_nvbugs_881631
-  {
-    _CCCL_HOST inline static void
-    host_path(thrust::cuda::execution_policy<DerivedPolicy>& exec, Pointer1 dst, Pointer2 src)
-    {
-      cuda_cub::copy(exec, src, src + 1, dst);
-    }
-
-    _CCCL_DEVICE inline static void
-    device_path(thrust::cuda::execution_policy<DerivedPolicy>&, Pointer1 dst, Pointer2 src)
-    {
-      *thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src);
-    }
+  // Because of https://docs.nvidia.com/cuda/cuda-c-programming-guide/#cuda-arch point 2., if a call from a __host__
+  // __device__ function leads to the template instantiation of a __global__ function, then this instantiation needs to
+  // happen regardless of whether __CUDA_ARCH__ is defined. Therefore, we make the host path visible outside the
+  // NV_IF_TARGET switch. See also NVBug 881631.
+  auto host = [&]() {
+    cuda_cub::copy(exec, src, src + 1, dst);
   };
-
-  NV_IF_TARGET(
-    NV_IS_HOST, (war_nvbugs_881631::host_path(exec, dst, src);), (war_nvbugs_881631::device_path(exec, dst, src);));
-
-} // end assign_value()
+  NV_IF_TARGET(NV_IS_HOST, host();, *thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src););
+}
 
 template <typename System1, typename System2, typename Pointer1, typename Pointer2>
-inline _CCCL_HOST_DEVICE void assign_value(cross_system<System1, System2>& systems, Pointer1 dst, Pointer2 src)
+_CCCL_HOST_DEVICE void assign_value(cross_system<System1, System2>& systems, Pointer1 dst, Pointer2 src)
 {
-  // XXX war nvbugs/881631
-  struct war_nvbugs_881631
-  {
-    _CCCL_HOST inline static void host_path(cross_system<System1, System2>& systems, Pointer1 dst, Pointer2 src)
-    {
-      // rotate the systems so that they are ordered the same as (src, dst)
-      // for the call to thrust::copy
-      cross_system<System2, System1> rotated_systems = systems.rotate();
-      cuda_cub::copy(rotated_systems, src, src + 1, dst);
-    }
-
-    _CCCL_DEVICE inline static void device_path(cross_system<System1, System2>&, Pointer1 dst, Pointer2 src)
-    {
-      // XXX forward the true cuda::execution_policy inside systems here
-      //     instead of materializing a tag
-      thrust::cuda::tag cuda_tag;
-      thrust::cuda_cub::assign_value(cuda_tag, dst, src);
-    }
+  // Because of https://docs.nvidia.com/cuda/cuda-c-programming-guide/#cuda-arch point 2., if a call from a __host__
+  // __device__ function leads to the template instantiation of a __global__ function, then this instantiation needs to
+  // happen regardless of whether __CUDA_ARCH__ is defined. Therefore, we make the host path visible outside the
+  // NV_IF_TARGET switch. See also NVBug 881631.
+  auto host = [&]() {
+    // rotate the systems so that they are ordered the same as (src, dst) for the call to thrust::copy
+    cross_system<System2, System1> rotated_systems = systems.rotate();
+    cuda_cub::copy(rotated_systems, src, src + 1, dst);
   };
-
-  NV_IF_TARGET(NV_IS_HOST,
-               (war_nvbugs_881631::host_path(systems, dst, src);),
-               (war_nvbugs_881631::device_path(systems, dst, src);));
-} // end assign_value()
-
+  NV_IF_TARGET(NV_IS_HOST, host();
+               ,
+               (
+                 // XXX forward the true cuda::execution_policy inside systems here instead of materializing a tag
+                 cuda::tag cuda_tag; cuda_cub::assign_value(cuda_tag, dst, src);));
+}
 } // namespace cuda_cub
 THRUST_NAMESPACE_END
 #endif
diff --git a/thrust/thrust/system/cuda/detail/get_value.h b/thrust/thrust/system/cuda/detail/get_value.h
index 5d1bb7e4cb6..fdfa1eb2084 100644
--- a/thrust/thrust/system/cuda/detail/get_value.h
+++ b/thrust/thrust/system/cuda/detail/get_value.h
@@ -38,51 +38,23 @@
 THRUST_NAMESPACE_BEGIN
 namespace cuda_cub
 {
-
-namespace
-{
-
 template <typename DerivedPolicy, typename Pointer>
-inline _CCCL_HOST_DEVICE typename thrust::iterator_value<Pointer>::type
-get_value_msvc2005_war(execution_policy<DerivedPolicy>& exec, Pointer ptr)
+_CCCL_HOST_DEVICE iterator_value_t<Pointer> get_value(execution_policy<DerivedPolicy>& exec, Pointer ptr)
 {
-  using result_type = typename thrust::iterator_value<Pointer>::type;
-
-  // XXX war nvbugs/881631
-  struct war_nvbugs_881631
-  {
-    _CCCL_HOST inline static result_type host_path(execution_policy<DerivedPolicy>& exec, Pointer ptr)
-    {
-      // when called from host code, implement with assign_value
-      // note that this requires a type with default constructor
-      result_type result;
-
-      thrust::host_system_tag host_tag;
-      cross_system<thrust::host_system_tag, DerivedPolicy> systems(host_tag, exec);
-      assign_value(systems, &result, ptr);
-
-      return result;
-    }
-
-    _CCCL_DEVICE inline static result_type device_path(execution_policy<DerivedPolicy>&, Pointer ptr)
-    {
-      // when called from device code, just do simple deref
-      return *thrust::raw_pointer_cast(ptr);
-    }
+  // Because of https://docs.nvidia.com/cuda/cuda-c-programming-guide/#cuda-arch point 2., if a call from a __host__
+  // __device__ function leads to the template instantiation of a __global__ function, then this instantiation needs to
+  // happen regardless of whether __CUDA_ARCH__ is defined. Therefore, we make the host path visible outside the
+  // NV_IF_TARGET switch. See also NVBug 881631.
+  auto host = [&]() {
+    // implemented with assign_value, which requires a type with a default constructor
+    iterator_value_t<Pointer> result;
+    host_system_tag host_tag;
+    cross_system<host_system_tag, DerivedPolicy> systems(host_tag, exec);
+    assign_value(systems, &result, ptr);
+    return result;
   };
-
-  NV_IF_TARGET(
-    NV_IS_HOST, (return war_nvbugs_881631::host_path(exec, ptr);), (return war_nvbugs_881631::device_path(exec, ptr);))
-} // end get_value_msvc2005_war()
-} // namespace
-
-template <typename DerivedPolicy, typename Pointer>
-inline _CCCL_HOST_DEVICE typename thrust::iterator_value<Pointer>::type
-get_value(execution_policy<DerivedPolicy>& exec, Pointer ptr)
-{
-  return get_value_msvc2005_war(exec, ptr);
-} // end get_value()
-
+  NV_IF_TARGET(NV_IS_DEVICE, return *thrust::raw_pointer_cast(ptr);, return host();)
+}
 } // namespace cuda_cub
 THRUST_NAMESPACE_END