NVIDIA
diff --git a/‎ci/windows/build_common.psm1
+4-1 b/‎ci/windows/build_common.psm1
+4-1
diff --git a/‎libcudacxx/include/cuda/std/__tuple_dir/sfinae_helpers.h
+1-1 b/‎libcudacxx/include/cuda/std/__tuple_dir/sfinae_helpers.h
+1-1
diff --git a/‎libcudacxx/include/cuda/std/__type_traits/result_of.h
+3-2 b/‎libcudacxx/include/cuda/std/__type_traits/result_of.h
+3-2
diff --git a/‎libcudacxx/include/cuda/std/detail/libcxx/include/type_traits
-2 b/‎libcudacxx/include/cuda/std/detail/libcxx/include/type_traits
-2
diff --git a/‎thrust/examples/arbitrary_transformation.cu
+2 b/‎thrust/examples/arbitrary_transformation.cu
+2
diff --git a/‎thrust/examples/bounding_box.cu
+2 b/‎thrust/examples/bounding_box.cu
+2
diff --git a/‎thrust/examples/bucket_sort2d.cu
+2 b/‎thrust/examples/bucket_sort2d.cu
+2
diff --git a/‎thrust/examples/cuda/range_view.cu
+2 b/‎thrust/examples/cuda/range_view.cu
+2
diff --git a/‎thrust/examples/discrete_voronoi.cu
+1 b/‎thrust/examples/discrete_voronoi.cu
+1
diff --git a/‎thrust/examples/dot_products_with_zip.cu
+2 b/‎thrust/examples/dot_products_with_zip.cu
+2
diff --git a/‎thrust/examples/include/host_device.h
+29 b/‎thrust/examples/include/host_device.h
+29
diff --git a/‎thrust/examples/lambda.cu
+2 b/‎thrust/examples/lambda.cu
+2
diff --git a/‎thrust/examples/max_abs_diff.cu
+2 b/‎thrust/examples/max_abs_diff.cu
+2
diff --git a/‎thrust/examples/minmax.cu
+2 b/‎thrust/examples/minmax.cu
+2
diff --git a/‎thrust/examples/monte_carlo.cu
+2 b/‎thrust/examples/monte_carlo.cu
+2
diff --git a/‎thrust/examples/monte_carlo_disjoint_sequences.cu
+2 b/‎thrust/examples/monte_carlo_disjoint_sequences.cu
+2
diff --git a/‎thrust/examples/norm.cu
+2 b/‎thrust/examples/norm.cu
+2
diff --git a/‎thrust/examples/padded_grid_reduction.cu
+1 b/‎thrust/examples/padded_grid_reduction.cu
+1
diff --git a/‎thrust/examples/raw_reference_cast.cu
+2 b/‎thrust/examples/raw_reference_cast.cu
+2
diff --git a/‎thrust/examples/remove_points2d.cu
+2 b/‎thrust/examples/remove_points2d.cu
+2
diff --git a/‎thrust/examples/repeated_range.cu
+2 b/‎thrust/examples/repeated_range.cu
+2
diff --git a/‎thrust/examples/saxpy.cu
+2 b/‎thrust/examples/saxpy.cu
+2
diff --git a/‎thrust/examples/scan_by_key.cu
+2 b/‎thrust/examples/scan_by_key.cu
+2
diff --git a/‎thrust/examples/scan_matrix_by_rows.cu
+1 b/‎thrust/examples/scan_matrix_by_rows.cu
+1
diff --git a/‎thrust/examples/simple_moving_average.cu
+2 b/‎thrust/examples/simple_moving_average.cu
+2
diff --git a/‎thrust/examples/sort.cu
+2 b/‎thrust/examples/sort.cu
+2
diff --git a/‎thrust/examples/sorting_aos_vs_soa.cu
+1 b/‎thrust/examples/sorting_aos_vs_soa.cu
+1
diff --git a/‎thrust/examples/stream_compaction.cu
+2 b/‎thrust/examples/stream_compaction.cu
+2
diff --git a/‎thrust/examples/strided_range.cu
+2 b/‎thrust/examples/strided_range.cu
+2
diff --git a/‎thrust/examples/sum_rows.cu
+2 b/‎thrust/examples/sum_rows.cu
+2
diff --git a/‎thrust/examples/summary_statistics.cu
+2 b/‎thrust/examples/summary_statistics.cu
+2
diff --git a/‎thrust/examples/summed_area_table.cu
+2 b/‎thrust/examples/summed_area_table.cu
+2
diff --git a/‎thrust/examples/tiled_range.cu
+2 b/‎thrust/examples/tiled_range.cu
+2
diff --git a/‎thrust/examples/transform_input_output_iterator.cu
+2 b/‎thrust/examples/transform_input_output_iterator.cu
+2
diff --git a/‎thrust/examples/transform_iterator.cu
+2 b/‎thrust/examples/transform_iterator.cu
+2
diff --git a/‎thrust/examples/transform_output_iterator.cu
+1 b/‎thrust/examples/transform_output_iterator.cu
+1
diff --git a/‎thrust/examples/uninitialized_vector.cu
+2 b/‎thrust/examples/uninitialized_vector.cu
+2
diff --git a/‎thrust/examples/word_count.cu
+2 b/‎thrust/examples/word_count.cu
+2
@@ -73,7 +73,10 @@ function configure_preset {
     # CMake must be invoked in the same directory as the presets file:
     pushd ".."
 
-    cmake --preset $PRESET $CMAKE_OPTIONS --log-level VERBOSE
+    # Echo and execute command to stdout:
+    $configure_command = "cmake --preset $PRESET $CMAKE_OPTIONS --log-level VERBOSE"
+    Write-Host $configure_command
+    Invoke-Expression $configure_command
     $test_result = $LastExitCode
 
     If ($test_result -ne 0) {
 
@@ -42,7 +42,7 @@ template <bool... _Preds>
 struct __all_dummy;
 
 template <bool... _Pred>
-using __all = _IsSame<__all_dummy<_Pred...>, __all_dummy<((void) _Pred, true)...>>;
+using __all = is_same<__all_dummy<_Pred...>, __all_dummy<((void) _Pred, true)...>>;
 
 struct __tuple_sfinae_base
 {
 
@@ -28,10 +28,11 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD
 
 #if _CCCL_STD_VER <= 2017 || defined(_LIBCUDACXX_ENABLE_CXX20_REMOVED_TYPE_TRAITS)
 template <class _Callable>
-class _LIBCUDACXX_DEPRECATED_IN_CXX17 result_of;
+class result_of;
 
 template <class _Fp, class... _Args>
-class _LIBCUDACXX_TEMPLATE_VIS result_of<_Fp(_Args...)> : public __invoke_of<_Fp, _Args...>
+class _LIBCUDACXX_DEPRECATED_IN_CXX17 _LIBCUDACXX_TEMPLATE_VIS result_of<_Fp(_Args...)>
+    : public __invoke_of<_Fp, _Args...>
 {};
 
 #  if _CCCL_STD_VER > 2011
 
@@ -567,8 +567,6 @@ namespace std
 
 _LIBCUDACXX_BEGIN_NAMESPACE_STD
 
-template <class _Tp>
-class _LIBCUDACXX_TEMPLATE_VIS reference_wrapper;
 template <class _Tp>
 struct _LIBCUDACXX_TEMPLATE_VIS hash;
 
 
@@ -10,6 +10,8 @@
 #  include <thrust/zip_function.h>
 #endif // >= C++11
 
+#include "include/host_device.h"
+
 // This example shows how to implement an arbitrary transformation of
 // the form output[i] = F(first[i], second[i], third[i], ... ).
 // In this example, we use a function with 3 inputs and 1 output.
 
@@ -4,6 +4,8 @@
 #include <thrust/random.h>
 #include <thrust/transform_reduce.h>
 
+#include "include/host_device.h"
+
 // This example shows how to compute a bounding box
 // for a set of points in two dimensions.
 
 
@@ -9,6 +9,8 @@
 #include <iomanip>
 #include <iostream>
 
+#include "include/host_device.h"
+
 // define a 2d float vector
 typedef thrust::tuple<float, float> vec2;
 
 
@@ -5,6 +5,8 @@
 
 #include <iostream>
 
+#include "../include/host_device.h"
+
 // This example demonstrates the use of a view: a non-owning wrapper for an
 // iterator range which presents a container-like interface to the user.
 //
 
@@ -10,6 +10,7 @@
 #include <iomanip>
 #include <iostream>
 
+#include "include/host_device.h"
 #include "include/timer.h"
 
 // Compute an approximate Voronoi Diagram with a Jump Flooding Algorithm (JFA)
 
@@ -5,6 +5,8 @@
 #include <thrust/random.h>
 #include <thrust/transform.h>
 
+#include "include/host_device.h"
+
 // This example shows how thrust::zip_iterator can be used to create a
 // 'virtual' array of structures.  In this case the structure is a 3d
 // vector type (Float3) whose (x,y,z) components will be stored in
 
@@ -0,0 +1,29 @@
+/*
+ *  Copyright 2008-2009 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#if THRUST_DEVICE_COMPILER != THRUST_DEVICE_COMPILER_NVCC
+
+#  ifndef __host__
+#    define __host__
+#  endif
+
+#  ifndef __device__
+#    define __device__
+#  endif
+
+#endif
@@ -4,6 +4,8 @@
 
 #include <iostream>
 
+#include "include/host_device.h"
+
 // This example demonstrates the use of placeholders to implement
 // the SAXPY operation (i.e. Y[i] = a * X[i] + Y[i]).
 //
 
@@ -5,6 +5,8 @@
 #include <cmath>
 #include <iostream>
 
+#include "include/host_device.h"
+
 // this example computes the maximum absolute difference
 // between the elements of two vectors
 
 
@@ -5,6 +5,8 @@
 #include <thrust/random.h>
 #include <thrust/transform_reduce.h>
 
+#include "include/host_device.h"
+
 // compute minimum and maximum values in a single reduction
 
 // minmax_pair stores the minimum and maximum
 
@@ -7,6 +7,8 @@
 #include <iomanip>
 #include <iostream>
 
+#include "include/host_device.h"
+
 // we could vary M & N to find the perf sweet spot
 
 __host__ __device__ unsigned int hash(unsigned int a)
 
@@ -6,6 +6,8 @@
 #include <cmath>
 #include <iostream>
 
+#include "include/host_device.h"
+
 // The technique demonstrated in the example monte_carlo.cu
 // assigns an independently seeded random number generator to each
 // of 30K threads, and uses a hashing scheme based on thread index to
 
@@ -6,6 +6,8 @@
 #include <cmath>
 #include <iostream>
 
+#include "include/host_device.h"
+
 //   This example computes the norm [1] of a vector.  The norm is
 // computed by squaring all numbers in the vector, summing the
 // squares, and taking the square root of the sum of squares.  In
 
@@ -10,6 +10,7 @@
 #include <cmath>
 #include <iomanip>
 
+#include "include/host_device.h"
 #include <float.h>
 
 // This example computes the minimum and maximum values
 
@@ -5,6 +5,8 @@
 
 #include <iostream>
 
+#include "include/host_device.h"
+
 // This example illustrates how to use the raw_reference_cast to convert
 // system-specific reference wrappers into native references.
 //
 
@@ -2,6 +2,8 @@
 #include <thrust/random.h>
 #include <thrust/remove.h>
 
+#include "include/host_device.h"
+
 // This example generates random points in the
 // unit square [0,1)x[0,1) and then removes all
 // points where x^2 + y^2 > 1
 
@@ -8,6 +8,8 @@
 
 #include <iostream>
 
+#include "include/host_device.h"
+
 // this example illustrates how to make repeated access to a range of values
 // examples:
 //   repeated_range([0, 1, 2, 3], 1) -> [0, 1, 2, 3]
 
@@ -7,6 +7,8 @@
 #include <iostream>
 #include <iterator>
 
+#include "include/host_device.h"
+
 // This example illustrates how to implement the SAXPY
 // operation (Y[i] = a * X[i] + Y[i]) using Thrust.
 // The saxpy_slow function demonstrates the most
 
@@ -4,6 +4,8 @@
 
 #include <iostream>
 
+#include "include/host_device.h"
+
 // BinaryPredicate for the head flag segment representation
 // equivalent to thrust::not2(thrust::project2nd<int,int>()));
 template <typename HeadFlagType>
 
@@ -4,6 +4,7 @@
 #include <thrust/scan.h>
 #include <thrust/sequence.h>
 
+#include "include/host_device.h"
 #include <assert.h>
 
 // We have a matrix stored in a `thrust::device_vector`. We want to perform a
 
@@ -8,6 +8,8 @@
 #include <iomanip>
 #include <iostream>
 
+#include "include/host_device.h"
+
 // Efficiently computes the simple moving average (SMA) [1] of a data series
 // using a parallel prefix-sum or "scan" operation.
 //
 
@@ -5,6 +5,8 @@
 #include <iomanip>
 #include <iostream>
 
+#include "include/host_device.h"
+
 // Helper routines
 
 void initialize(thrust::device_vector<int>& v)
 
@@ -3,6 +3,7 @@
 #include <thrust/random.h>
 #include <thrust/sort.h>
 
+#include "include/host_device.h"
 #include "include/timer.h"
 #include <assert.h>
 
 
@@ -8,6 +8,8 @@
 #include <iterator>
 #include <string>
 
+#include "include/host_device.h"
+
 // this functor returns true if the argument is odd, and false otherwise
 template <typename T>
 struct is_odd : public thrust::unary_function<T, bool>
 
@@ -8,6 +8,8 @@
 
 #include <iostream>
 
+#include "include/host_device.h"
+
 // this example illustrates how to make strided access to a range of values
 // examples:
 //   strided_range([0, 1, 2, 3, 4, 5, 6], 1) -> [0, 1, 2, 3, 4, 5, 6]
 
@@ -7,6 +7,8 @@
 
 #include <iostream>
 
+#include "include/host_device.h"
+
 // convert a linear index to a row index
 template <typename T>
 struct linear_index_to_row_index : public thrust::unary_function<T, T>
 
@@ -8,6 +8,8 @@
 #include <iostream>
 #include <limits>
 
+#include "include/host_device.h"
+
 // This example computes several statistical properties of a data
 // series in a single reduction.  The algorithm is described in detail here:
 // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
 
@@ -9,6 +9,8 @@
 #include <iomanip>
 #include <iostream>
 
+#include "include/host_device.h"
+
 // This example computes a summed area table using segmented scan
 // http://en.wikipedia.org/wiki/Summed_area_table
 
 
@@ -8,6 +8,8 @@
 
 #include <iostream>
 
+#include "include/host_device.h"
+
 // this example illustrates how to tile a range multiple times
 // examples:
 //   tiled_range([0, 1, 2, 3], 1) -> [0, 1, 2, 3]
 
@@ -7,6 +7,8 @@
 
 #include <iostream>
 
+#include "include/host_device.h"
+
 // Base 2 fixed point
 class ScaledInteger
 {
 
@@ -8,6 +8,8 @@
 #include <iterator>
 #include <string>
 
+#include "include/host_device.h"
+
 // this functor clamps a value to the range [lo, hi]
 template <typename T>
 struct clamp : public thrust::unary_function<T, T>
 
@@ -5,6 +5,7 @@
 
 #include <iostream>
 
+#include "include/host_device.h"
 struct Functor
 {
   template <class Tuple>
 
@@ -11,6 +11,8 @@
 
 #include <cassert>
 
+#include "include/host_device.h"
+
 // uninitialized_allocator is an allocator which
 // derives from device_allocator and which has a
 // no-op construct member function
 
@@ -5,6 +5,8 @@
 
 #include <iostream>
 
+#include "include/host_device.h"
+
 // This example computes the number of words in a text sample
 // with a single call to thrust::inner_product.  The algorithm
 // counts the number of characters which start a new word, i.e.
Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ template <bool... _Preds>`
`42`	`42`	`struct __all_dummy;`
`43`	`43`
`44`	`44`	`template <bool... _Pred>`
`45`		`-using __all = _IsSame<__all_dummy<_Pred...>, __all_dummy<((void) _Pred, true)...>>;`
	`45`	`+using __all = is_same<__all_dummy<_Pred...>, __all_dummy<((void) _Pred, true)...>>;`
`46`	`46`
`47`	`47`	`struct __tuple_sfinae_base`
`48`	`48`	`{`
Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,8 @@`
`7`	`7`
`8`	`8`	`#include <iostream>`
`9`	`9`
	`10`	`+#include "include/host_device.h"`
	`11`	`+`
`10`	`12`	`// Base 2 fixed point`
`11`	`13`	`class ScaledInteger`
`12`	`14`	`{`
Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,7 @@`
`5`	`5`
`6`	`6`	`#include <iostream>`
`7`	`7`
	`8`	`+#include "include/host_device.h"`
`8`	`9`	`struct Functor`
`9`	`10`	`{`
`10`	`11`	`template <class Tuple>`