Devsh-Graphics-Programming · devshgraphicsprogramming · Jan 16, 2025 · Nov 11, 2024 · Nov 12, 2024 · Nov 13, 2024
diff --git a/examples_tests b/examples_tests
diff --git a/include/nbl/builtin/hlsl/complex.hlsl b/include/nbl/builtin/hlsl/complex.hlsl
@@ -5,8 +5,47 @@
 #ifndef _NBL_BUILTIN_HLSL_COMPLEX_INCLUDED_
 #define _NBL_BUILTIN_HLSL_COMPLEX_INCLUDED_
 
-#include "nbl/builtin/hlsl/functional.hlsl"
-#include "nbl/builtin/hlsl/cpp_compat/promote.hlsl"
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/functional.hlsl>
+
+using namespace nbl::hlsl;
+
+// -------------------------------------- CPP VERSION ------------------------------------
+#ifndef __HLSL_VERSION
+
+#include <complex>
+
+namespace nbl
+{
+namespace hlsl
+{
+
+template<typename Scalar>
+using complex_t = std::complex<Scalar>;
+
+// Fast mul by i
+template<typename Scalar>
+complex_t<Scalar> rotateLeft(NBL_CONST_REF_ARG(complex_t<Scalar>) value)
+{
+    complex_t<Scalar> retVal = { -value.imag(), value.real() };
+    return retVal;
+}
+
+// Fast mul by -i
+template<typename Scalar>
+complex_t<Scalar> rotateRight(NBL_CONST_REF_ARG(complex_t<Scalar>) value)
+{
+    complex_t<Scalar> retVal = { value.imag(), -value.real() };
+    return retVal;
+}
+
+}
+}
+
+// -------------------------------------- END CPP VERSION ------------------------------------
+
+// -------------------------------------- HLSL VERSION ---------------------------------------
+#else
 
 namespace nbl
 {
@@ -126,6 +165,8 @@ struct complex_t
 template<typename Scalar> 
 struct plus< complex_t<Scalar> > 
 {
+    using type_t = complex_t<Scalar>;
+
     complex_t<Scalar> operator()(NBL_CONST_REF_ARG(complex_t<Scalar>) lhs, NBL_CONST_REF_ARG(complex_t<Scalar>) rhs) 
     {
         return lhs + rhs;                                                             
@@ -137,6 +178,8 @@ struct plus< complex_t<Scalar> >
 template<typename Scalar> 
 struct minus< complex_t<Scalar> > 
 {
+    using type_t = complex_t<Scalar>;
+
     complex_t<Scalar> operator()(NBL_CONST_REF_ARG(complex_t<Scalar>) lhs, NBL_CONST_REF_ARG(complex_t<Scalar>) rhs) 
     {
         return lhs - rhs;                                                             
@@ -148,6 +191,8 @@ struct minus< complex_t<Scalar> >
 template<typename Scalar> 
 struct multiplies< complex_t<Scalar> > 
 {
+    using type_t = complex_t<Scalar>;
+
     complex_t<Scalar> operator()(NBL_CONST_REF_ARG(complex_t<Scalar>) lhs, NBL_CONST_REF_ARG(complex_t<Scalar>) rhs) 
     {
         return lhs * rhs;                                                             
@@ -164,6 +209,8 @@ struct multiplies< complex_t<Scalar> >
 template<typename Scalar> 
 struct divides< complex_t<Scalar> > 
 {
+    using type_t = complex_t<Scalar>;
+
     complex_t<Scalar> operator()(NBL_CONST_REF_ARG(complex_t<Scalar>) lhs, NBL_CONST_REF_ARG(complex_t<Scalar>) rhs) 
     {
         return lhs / rhs;                                                             
@@ -379,6 +426,22 @@ complex_t<Scalar> rotateRight(NBL_CONST_REF_ARG(complex_t<Scalar>) value)
     return retVal;
 }
 
+template<typename Scalar>
+struct ternary_operator< complex_t<Scalar> >
+{
+    using type_t = complex_t<Scalar>;
+
+    complex_t<Scalar> operator()(bool condition, NBL_CONST_REF_ARG(complex_t<Scalar>) lhs, NBL_CONST_REF_ARG(complex_t<Scalar>) rhs)
+    {
+        const vector<Scalar, 2> lhsVector = vector<Scalar, 2>(lhs.real(), lhs.imag());
+        const vector<Scalar, 2> rhsVector = vector<Scalar, 2>(rhs.real(), rhs.imag());
+        const vector<Scalar, 2> resultVector = condition ? lhsVector : rhsVector;
+        const complex_t<Scalar> result = { resultVector.x, resultVector.y };
+        return result;
+    }
+};
+
+
 }
 }
 
@@ -396,4 +459,7 @@ NBL_REGISTER_OBJ_TYPE(complex_t<float64_t2>,::nbl::hlsl::alignment_of_v<float64_
 NBL_REGISTER_OBJ_TYPE(complex_t<float64_t3>,::nbl::hlsl::alignment_of_v<float64_t3>)
 NBL_REGISTER_OBJ_TYPE(complex_t<float64_t4>,::nbl::hlsl::alignment_of_v<float64_t4>)
 
+// -------------------------------------- END HLSL VERSION ---------------------------------------
 #endif
+
+#endif
diff --git a/include/nbl/builtin/hlsl/concepts/accessors/fft.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/fft.hlsl
@@ -0,0 +1,87 @@
+#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_FFT_INCLUDED_
+#define _NBL_BUILTIN_HLSL_CONCEPTS_ACCESSORS_FFT_INCLUDED_
+
+#include "nbl/builtin/hlsl/concepts.hlsl"
+#include "nbl/builtin/hlsl/fft/common.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace workgroup
+{
+namespace fft
+{
+// The SharedMemoryAccessor MUST provide the following methods:
+//      * void get(uint32_t index, inout uint32_t value);  
+//      * void set(uint32_t index, in uint32_t value); 
+//      * void workgroupExecutionAndMemoryBarrier();
+
+#define NBL_CONCEPT_NAME FFTSharedMemoryAccessor
+#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)
+#define NBL_CONCEPT_TPLT_PRM_NAMES (T)
+#define NBL_CONCEPT_PARAM_0 (accessor, T)
+#define NBL_CONCEPT_PARAM_1 (index, uint32_t)
+#define NBL_CONCEPT_PARAM_2 (val, uint32_t)
+NBL_CONCEPT_BEGIN(3)
+#define accessor NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0
+#define index NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1
+#define val NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2
+NBL_CONCEPT_END(
+    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.template set<uint32_t, uint32_t>(index, val)), is_same_v, void))
+    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.template get<uint32_t, uint32_t>(index, val)), is_same_v, void))
+    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.workgroupExecutionAndMemoryBarrier()), is_same_v, void))
+);
+#undef val
+#undef index
+#undef accessor
+#include <nbl/builtin/hlsl/concepts/__end.hlsl>
+
+
+// The Accessor (for a small FFT) MUST provide the following methods:
+//     * void get(uint32_t index, inout complex_t<Scalar> value);
+//     * void set(uint32_t index, in complex_t<Scalar> value);
+
+#define NBL_CONCEPT_NAME SmallFFTAccessor
+#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)
+#define NBL_CONCEPT_TPLT_PRM_NAMES (T)(Scalar)
+#define NBL_CONCEPT_PARAM_0 (accessor, T)
+#define NBL_CONCEPT_PARAM_1 (index, uint32_t)
+#define NBL_CONCEPT_PARAM_2 (val, complex_t<Scalar>)
+NBL_CONCEPT_BEGIN(3)
+#define accessor NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0
+#define index NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1
+#define val NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2
+NBL_CONCEPT_END(
+    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.set(index, val)), is_same_v, void))
+    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.get(index, val)), is_same_v, void))
+);
+#undef val
+#undef index
+#undef accessor
+#include <nbl/builtin/hlsl/concepts/__end.hlsl>
+
+
+// The Accessor MUST provide the following methods:
+//     * void get(uint32_t index, inout complex_t<Scalar> value);
+//     * void set(uint32_t index, in complex_t<Scalar> value);
+//     * void memoryBarrier();
+
+#define NBL_CONCEPT_NAME FFTAccessor
+#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)
+#define NBL_CONCEPT_TPLT_PRM_NAMES (T)(Scalar)
+#define NBL_CONCEPT_PARAM_0 (accessor, T)
+NBL_CONCEPT_BEGIN(1)
+#define accessor NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0
+NBL_CONCEPT_END(
+    ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((accessor.memoryBarrier()), is_same_v, void))
+) && SmallFFTAccessor<T, Scalar>;
+#undef accessor
+#include <nbl/builtin/hlsl/concepts/__end.hlsl>
+
+}
+}
+}
+}
+
+#endif
diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h
@@ -41,6 +41,7 @@ inline To _static_cast(From v)
 #define NBL_CONSTEXPR_STATIC constexpr static
 #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline
 #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline
+#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr
 #define NBL_CONST_MEMBER_FUNC const
 
 namespace nbl::hlsl
@@ -70,6 +71,7 @@ namespace nbl::hlsl
 #define NBL_CONSTEXPR_STATIC const static
 #define NBL_CONSTEXPR_STATIC_INLINE const static
 #define NBL_CONSTEXPR_INLINE_FUNC inline
+#define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline
 #define NBL_CONST_MEMBER_FUNC 
 
 namespace nbl

diff --git a/include/nbl/builtin/hlsl/fft/common.hlsl b/include/nbl/builtin/hlsl/fft/common.hlsl
@@ -1,58 +1,107 @@
 #ifndef _NBL_BUILTIN_HLSL_FFT_COMMON_INCLUDED_
 #define _NBL_BUILTIN_HLSL_FFT_COMMON_INCLUDED_
 
-#include "nbl/builtin/hlsl/complex.hlsl"
-#include "nbl/builtin/hlsl/cpp_compat.hlsl"
-#include "nbl/builtin/hlsl/numbers.hlsl"
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/complex.hlsl>
+#include <nbl/builtin/hlsl/concepts.hlsl>
+#include <nbl/builtin/hlsl/math/intutil.hlsl>
+#include <nbl/builtin/hlsl/numbers.hlsl>
 
-namespace nbl 
+namespace nbl
 {
 namespace hlsl
 {
-namespace fft 
+namespace fft
 {
 
+// template parameter N controls the number of dimensions of the input
+// template parameter M controls the number of dimensions to pad up to PoT
+// "axes" indicates which dimensions to pad up to PoT
+template <uint16_t N, uint16_t M NBL_FUNC_REQUIRES(M <= N)
+inline vector<uint64_t, 3> padDimensions(NBL_CONST_REF_ARG(vector<uint32_t, N>) dimensions, NBL_CONST_REF_ARG(vector<uint16_t, M>) axes, bool realFFT = false)
+{
+    vector<uint32_t, N> newDimensions = dimensions;
+    uint16_t axisCount = 0;
+    for (uint16_t i = 0u; i < M; i++)
+    {
+        newDimensions[i] = hlsl::roundUpToPoT(newDimensions[i]);
+        if (realFFT && !axisCount++)
+            newDimensions[i] /= 2;
+    }
+    return newDimensions;
+}
+
+// template parameter N controls the number of dimensions of the input
+// template parameter M controls the number of dimensions we run an FFT along AND store the result
+// "axes" indicates which dimensions we run an FFT along AND store the result
+template <uint16_t N, uint16_t M NBL_FUNC_REQUIRES(M <= N)
+inline uint64_t getOutputBufferSize(NBL_CONST_REF_ARG(vector<uint32_t, N>) inputDimensions, uint32_t numChannels, NBL_CONST_REF_ARG(vector<uint16_t, M>) axes, bool realFFT = false, bool halfFloats = false)
+{
+    const vector<uint64_t, 3> paddedDims = padDimensions<N, M>(inputDimensions, axes);
+    const uint64_t numberOfComplexElements = paddedDims[0] * paddedDims[1] * paddedDims[2] * uint64_t(numChannels);
+    return numberOfComplexElements * (halfFloats ? sizeof(complex_t<float16_t>) : sizeof(complex_t<float32_t>));
+}
+
 // Computes the kth element in the group of N roots of unity
 // Notice 0 <= k < N/2, rotating counterclockwise in the forward (DIF) transform and clockwise in the inverse (DIT)
 template<bool inverse, typename Scalar>
 complex_t<Scalar> twiddle(uint32_t k, uint32_t halfN)
 {
     complex_t<Scalar> retVal;
-    const Scalar kthRootAngleRadians = numbers::pi<Scalar> * Scalar(k) / Scalar(halfN);
-    retVal.real( cos(kthRootAngleRadians) );
-    if (! inverse)
-        retVal.imag( sin(-kthRootAngleRadians) );
+    const Scalar kthRootAngleRadians = numbers::pi<Scalar> *Scalar(k) / Scalar(halfN);
+    retVal.real(cos(kthRootAngleRadians));
+    if (!inverse)
+        retVal.imag(sin(-kthRootAngleRadians));
     else
-        retVal.imag( sin(kthRootAngleRadians) );
-    return retVal;                         
+        retVal.imag(sin(kthRootAngleRadians));
+    return retVal;
 }
 
-template<bool inverse, typename Scalar> 
-struct DIX 
-{ 
+template<bool inverse, typename Scalar>
+struct DIX
+{
     static void radix2(NBL_CONST_REF_ARG(complex_t<Scalar>) twiddle, NBL_REF_ARG(complex_t<Scalar>) lo, NBL_REF_ARG(complex_t<Scalar>) hi)
     {
         plus_assign< complex_t<Scalar> > plusAss;
         //Decimation in time - inverse           
         if (inverse) {
             complex_t<Scalar> wHi = twiddle * hi;
             hi = lo - wHi;
-            plusAss(lo, wHi);            
+            plusAss(lo, wHi);
         }
         //Decimation in frequency - forward   
         else {
             complex_t<Scalar> diff = lo - hi;
             plusAss(lo, hi);
-            hi = twiddle * diff; 
+            hi = twiddle * diff;
         }
-    }                                              
+    }
 };
 
 template<typename Scalar>
 using DIT = DIX<true, Scalar>;
 
 template<typename Scalar>
 using DIF = DIX<false, Scalar>;
+
+// ------------------------------------------------- Utils ---------------------------------------------------------
+// 
+// Util to unpack two values from the packed FFT X + iY - get outputs in the same input arguments, storing x to lo and y to hi
+template<typename Scalar>
+void unpack(NBL_REF_ARG(complex_t<Scalar>) lo, NBL_REF_ARG(complex_t<Scalar>) hi)
+{
+    complex_t<Scalar> x = (lo + conj(hi)) * Scalar(0.5);
+    hi = rotateRight<Scalar>(lo - conj(hi)) * Scalar(0.5);
+    lo = x;
+}
+
+// Bit-reverses T as a binary string of length given by Bits
+template<typename T, uint16_t Bits NBL_FUNC_REQUIRES(is_integral_v<T> && Bits <= sizeof(T) * 8)
+T bitReverseAs(T value)
+{
+    return hlsl::bitReverse<uint32_t>(value) >> (sizeof(T) * 8 - Bits);
+}
+
 }
 }
 }

diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl
@@ -165,7 +165,7 @@ COMPOUND_ASSIGN(divides)
 
 // ----------------- End of compound assignment ops ----------------
 
-// Min and Max don't use ALIAS_STD because they don't exist in STD
+// Min, Max and Ternary Operator don't use ALIAS_STD because they don't exist in STD
 // TODO: implement as mix(rhs<lhs,lhs,rhs) (SPIR-V intrinsic from the extended set & glm on C++)
 template<typename T>
 struct minimum
@@ -195,6 +195,17 @@ struct maximum
     NBL_CONSTEXPR_STATIC_INLINE T identity = numeric_limits<scalar_t>::lowest; // TODO: `all_components<T>`
 };
 
+template<typename T>
+struct ternary_operator
+{
+    using type_t = T;
+
+    T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)
+    {
+        return condition ? lhs : rhs;
+    }
+};
+
 }
 }
+24 −0		11_FFT/CMakeLists.txt
+164 −0		11_FFT/README.md
+13 −0		11_FFT/app_resources/common.hlsl
+76 −0		11_FFT/app_resources/shader.comp.hlsl
+28 −0		11_FFT/config.json.template
+338 −0		11_FFT/main.cpp
+50 −0		11_FFT/pipeline.groovy
+24 −0		28_FFTBloom/CMakeLists.txt
+51 −0		28_FFTBloom/app_resources/common.hlsl
+72 −0		28_FFTBloom/app_resources/fft_common.hlsl
+254 −0		28_FFTBloom/app_resources/fft_convolve_ifft.hlsl
+46 −0		28_FFTBloom/app_resources/fft_mirror_common.hlsl
+88 −0		28_FFTBloom/app_resources/image_fft_first_axis.hlsl
+153 −0		28_FFTBloom/app_resources/image_ifft_first_axis.hlsl
+84 −0		28_FFTBloom/app_resources/kernel_fft_first_axis.hlsl
+214 −0		28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl
+19 −0		28_FFTBloom/app_resources/kernel_spectrum_normalize.hlsl
+28 −0		28_FFTBloom/config.json.template
+1,323 −0		28_FFTBloom/main.cpp
+50 −0		28_FFTBloom/pipeline.groovy
+5 −1		CMakeLists.txt