Autoexposure example restoration #728

devshgraphicsprogramming · 2024-08-13T21:57:04Z

Description

Testing

TODO list:

…into autoexposue_ex

devshgraphicsprogramming · 2024-08-13T22:12:52Z

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

+struct LumaMeteringWindow
+{
+	float32_t2 meteringWindowScale;
+	float32_t2 meteringWindowOffset;
+};
+
+template<uint32_t GroupSize, typename ValueAccessor, typename SharedAccessor, typename TexAccessor>
+struct geom_luma_meter {
+    using this_t = geom_luma_meter<GroupSize, ValueAccessor, SharedAccessor, TexAccessor>;


you want to split off the cross-C++-and-HLSL code like LumaMeteringWindow from GPU specific code like geom_luma_meter

also having structs with names and a namespace that has luma and meter in both is redundant, I'd accept luma_meter::MeteringWindow but everything else I'd kill the duplicates

devshgraphicsprogramming · 2024-08-16T14:06:25Z

include/nbl/builtin/hlsl/math/morton.hlsl

+#else
+template <typename T>
+constexpr T morton2d_mask(uint8_t _n)


why two different versions for HLSL and C++ ?

HLSL doesn't have constexpr, right?

add a #define NBL_CONSTEXPR_FUNC to https://github.com/Devsh-Graphics-Programming/Nabla/blob/master/include/nbl/builtin/hlsl/cpp_compat.hlsl

then you can do an empty define of NBL_CONSTEXPR_FUNC for HLSL_VERSION

devshgraphicsprogramming · 2024-08-16T14:12:49Z

include/nbl/builtin/hlsl/math/morton.hlsl

+template <typename T>
+T morton2d_mask(uint16_t _n)
+{
+    const static uint64_t mask[5] =
+    {
+        0x5555555555555555ull,
+        0x3333333333333333ull,
+        0x0F0F0F0F0F0F0F0Full,
+        0x00FF00FF00FF00FFull,
+        0x0000FFFF0000FFFFull
+    };
+    return (T)mask[_n];
+}
+
+template <typename T>
+T morton3d_mask(uint16_t _n)
+{
+    const static uint64_t mask[5] =
+    {
+        0x1249249249249249ull,
+        0x10C30C30C30C30C3ull,
+        0x010F00F00F00F00Full,
+        0x001F0000FF0000FFull,
+        0x001F00000000FFFFull
+    };
+    return (T)mask[_n];
+}
+template <typename T>
+T morton4d_mask(uint16_t _n)
+{
+    const static uint64_t mask[4] =
+    {
+        0x1111111111111111ull,
+        0x0303030303030303ull,
+        0x000F000F000F000Full,
+        0x000000FF000000FFull
+    };
+    return (T)mask[_n];
+}


couldn't you make them into

namespace morton { namespace impl { template<uint16_t Dims, typename T> struct mask; // now the partial specializations } }

with the masks as NBL_CONSTEXPR member variables

This way you can have

template<uint16_t Dims, typename T, uint16_t BitDepth> enable_if_t<is_scalar_v<T>&&is_integral_v<T>,T> decode(T x) { static_assert(BitDepth <= sizeof(T)*8); x = x & mask<Dims,T>::value[0]; .. rest of if-else statements }

devshgraphicsprogramming · 2024-08-16T14:21:37Z

include/nbl/builtin/hlsl/math/morton.hlsl

+template<typename T, uint32_t bitDepth = sizeof(T) * 8u>
+T morton2d_decode_x(T _morton) { return impl::morton2d_decode<T, bitDepth>(_morton); }
+template<typename T, uint32_t bitDepth = sizeof(T) * 8u>
+T morton2d_decode_y(T _morton) { return impl::morton2d_decode<T, bitDepth>(_morton >> 1); }
+
+template<typename T, uint32_t bitDepth = sizeof(T) * 8u>
+T morton2d_encode(T x, T y) { return impl::separate_bits_2d<T, bitDepth>(x) | (impl::separate_bits_2d<T, bitDepth>(y) << 1); }
+template<typename T, uint32_t bitDepth = sizeof(T) * 8u>
+T morton3d_encode(T x, T y, T z) { return impl::separate_bits_3d<T, bitDepth>(x) | (impl::separate_bits_3d<T, bitDepth>(y) << 1) | (impl::separate_bits_3d<T, bitDepth>(z) << 2); }
+template<typename T, uint32_t bitDepth = sizeof(T) * 8u>
+T morton4d_encode(T x, T y, T z, T w) { return impl::separate_bits_4d<T, bitDepth>(x) | (impl::separate_bits_4d<T, bitDepth>(y) << 1) | (impl::separate_bits_4d<T, bitDepth>(z) << 2) | (impl::separate_bits_4d<T, bitDepth>(w) << 3); }


imho these should be template<uint16_t Dims, typename T, uint16_t BitDepth=sizeof(T)*8>

because that way you only need to write the encode and decode once

template<uint16_t Dims, typename T, uint16_t BitDepth=sizeof(T)*8> vector<T,Dims> decode(const T _morton) { vector<T,Dims> retval; for (uint16_t i=0; i<Dims; i++) retval[i] = impl::decode<Dims,T,BitDepth>(_morton>>i); return retval; } template<uint16_t Dims, typename T, uint16_t BitDepth=sizeof(T)*8> T encode(const vector<T,Dims> coord) { T retval = impl::separate_bits<Dims,T,BitDepth>(coord[0]); for (uint16_t i=1; i<Dims; i++) retval |= impl::separate_bits<Dims,T,BitDepth>(coord[1])<<i; return retval; }

devshgraphicsprogramming · 2024-08-16T14:23:46Z

include/nbl/builtin/hlsl/tonemapper/operators.hlsl

+	static this_t create(float EV, float key = 0.18f, float Contrast = 1.f) {
+		this_t retval;
+		retval.gamma = Contrast;
+		retval.exposure = EV + log2(key * 0.77321666f);


did the GLSL have any comment (or git blame) about what the 0.77321666f is?

devshgraphicsprogramming · 2024-08-16T14:24:24Z

include/nbl/builtin/hlsl/tonemapper/operators.hlsl

+struct ReinhardParams
+{
+	using this_t = ReinhardParams;
+	static this_t create(float EV, float key = 0.18f, float WhitePointRelToEV = 16.f)
+	{
+		this_t retval;
+		retval.keyAndManualLinearExposure = key * exp2(EV);
+		retval.rcpWhite2 = 1.f / (WhitePointRelToEV * WhitePointRelToEV);
+		return retval;
+	}
+
+	float32_t keyAndManualLinearExposure;
+	float32_t rcpWhite2;
+};
+
+struct ACESParams


drop Params from the name

put the actual tonemapping as an operator()

[Optional] template them on float_t, so one has choice of using float16_t or float32_t

devshgraphicsprogramming · 2024-08-16T14:25:20Z

include/nbl/builtin/hlsl/tonemapper/operators.hlsl

+float32_t3 reinhard(ReinhardParams params, float32_t3 rawCIEXYZcolor)
+{
+	float32_t exposureFactors = params.keyAndManualLinearExposure;
+	float32_t exposedLuma = rawCIEXYZcolor.y * exposureFactors;
+	float32_t colorMultiplier = (exposureFactors * (1.0 + exposedLuma * params.rcpWhite2) / (1.0 + exposedLuma));
+	return rawCIEXYZcolor * colorMultiplier;
+}
+
+float32_t3 aces(ACESParams params, float32_t3 rawCIEXYZcolor)
+{


these should be operator()(const vector<float_t,3> rawCIEXYZcolor) inside the respective Reinhard and ACES structs

devshgraphicsprogramming · 2024-08-16T14:30:50Z

include/nbl/builtin/hlsl/tonemapper/operators.hlsl

+	float32_t3 a = v * (v + float32_t3(0.0245786, 0.0245786, 0.0245786)) - float32_t3(0.000090537, 0.000090537, 0.000090537);
+	float32_t3 b = v * (v * float32_t3(0.983729, 0.983729, 0.983729) + float32_t3(0.4329510, 0.4329510, 0.4329510)) + float32_t3(0.238081, 0.238081, 0.238081);


btw we have some promote thing made by @Przemog1 you can use for making vectors full of the same scalar

devshgraphicsprogramming · 2024-08-16T14:32:05Z

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

+struct geom_luma_meter {
+    using this_t = geom_luma_meter<GroupSize, ValueAccessor, SharedAccessor, TexAccessor>;
+
+    static this_t create(NBL_REF_ARG(LumaMeteringWindow) window, float32_t lumaMinimum, float32_t lumaMaximum)


CONST_REF nor just REF

devshgraphicsprogramming · 2024-08-16T14:33:09Z

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

+        uint32_t2 coord = {
+            morton2d_decode_x(glsl::gl_LocalInvocationIndex()),
+            morton2d_decode_y(glsl::gl_LocalInvocationIndex())
+        };
+        uint32_t tid = workgroup::SubgroupContiguousIndex();


you should use the tid instead of LocalInvocationIndex for the morton code

devshgraphicsprogramming · 2024-08-16T14:40:44Z

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

+        uint32_t2 sampleIndex = coord * GroupSize + float32_t2(glsl::gl_SubgroupID() + 1, glsl::gl_SubgroupInvocationID() + 1);
+        float32_t luma = 0.0f;
+
+        if (sampleIndex.x <= sampleCount.x && sampleIndex.y <= sampleCount.y) {


the computation of sampleIndex makes no sense here.

Also you shouldn't be bound testing anything, every invocation must sample otherwise your histogram/average is wrong.

Please compute a normalized uv coordinate relative to a full texture like this

const float32_t2 uv = (tileOffset+float32_t2(coord))*window.scale/totalInvocationsInDispatch+window.offset;

where tileOffset is basically gl_WorkgroupID*gl_WorkgroupSize but passed in from the outside

and window.scale/totalInvocationsInDispatch is precomputed obviously

devshgraphicsprogramming · 2024-08-16T14:41:33Z

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

+        retval.minLuma = lumaMinimum;
+        retval.maxLuma = lumaMaximum;


typo, you've set the min and max equal to each other

P.S. its also more useful to take a precomputed minLumaLog2 and lumaLog2Range (diff between log of max and log of min)

devshgraphicsprogramming · 2024-08-16T14:43:30Z

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

+    {
+        float32_t2 stride = window.meteringWindowScale / (sampleCount + float32_t2(1.0f, 1.0f));
+        float32_t2 samplePos = stride * sampleIndex;
+        float32_t2 uvPos = (samplePos + float32_t2(0.5f, 0.5f)) / viewportSize;


the windowOffset should have had the 0.5 texel shift inside itself, so viewportSize not needed

devshgraphicsprogramming · 2024-08-16T14:44:01Z

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

+        float32_t2 stride = window.meteringWindowScale / (sampleCount + float32_t2(1.0f, 1.0f));
+        float32_t2 samplePos = stride * sampleIndex;


just compute the UV this way https://github.com/Devsh-Graphics-Programming/Nabla/pull/728/files#r1719941761

devshgraphicsprogramming · 2024-08-16T14:45:23Z

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

+        float32_t2 stride = window.meteringWindowScale / (sampleCount + float32_t2(1.0f, 1.0f));
+        float32_t2 samplePos = stride * sampleIndex;
+        float32_t2 uvPos = (samplePos + float32_t2(0.5f, 0.5f)) / viewportSize;
+        float32_t3 color = colorspace::oetf::sRGB(tex.get(uvPos));


this is wrong, the HDR image you're reading (the noises example with FLOAT16 format) is scene referred in linear space

let the TextureAccessor worry about applying an EOTF if there is one needed.

devshgraphicsprogramming · 2024-08-16T14:46:45Z

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

+        float32_t2 samplePos = stride * sampleIndex;
+        float32_t2 uvPos = (samplePos + float32_t2(0.5f, 0.5f)) / viewportSize;
+        float32_t3 color = colorspace::oetf::sRGB(tex.get(uvPos));
+        float32_t luma = dot(colorspace::sRGBtoXYZ[1], color);


actually require/expect a TexAccessor::toXYZ static member float32_t3x3 variable :P

think of it as the TexAccessor telling everyone what the colorspace of the image is

devshgraphicsprogramming · 2024-08-16T14:50:29Z

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

+        luma = clamp(luma, minLuma, maxLuma);
+
+        return log2(luma / minLuma) / log2(maxLuma / minLuma);


you want to just do a max(log2(luma),minLumaLog2) here without any normalization

also rename the function to computeLumaLog2 or something

devshgraphicsprogramming · 2024-08-16T14:51:44Z

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

+            luma = computeLuma(tex, sampleCount, sampleIndex, viewportSize);
+            float32_t lumaSum = reduction(luma, sdata);
+
+            sdata.workgroupExecutionAndMemoryBarrier();


not sure you need the barrier, the last invocation will have the reduction value already

actually every invocation will

devshgraphicsprogramming · 2024-08-16T14:54:08Z

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

+        }
+    }
+
+    LumaMeteringWindow window;


the window shouldn't be stored, just fed to gatherLuma, see my point about gatherLuma naming

devshgraphicsprogramming · 2024-08-16T14:55:55Z

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

+                uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2();
+
+                uint32_t lumaSumBitPattern = uint32_t(clamp((lumaSum - log2(minLuma)) * (log2(maxLuma) - log2(minLuma)), 0.f, float32_t((1 << fixedPointBitsLeft) - 1)));
+
+                uint32_t3 workgroupSize = glsl::gl_WorkGroupSize();
+                uint32_t workgroupIndex = dot(uint32_t3(workgroupSize.y * workgroupSize.z, workgroupSize.z, 1), glsl::gl_WorkGroupID());
+
+                val.atomicAdd(workgroupIndex & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern);


there's a number of bugs here that we went over on discord.

your lumaSum should be just a sum of max(log2(luma),minLumaLog2)

fixedPointBitsLeft needs a better name and needs to come from the outside (precomputed)

the bitpattern needs to map lumaSum==minLumaLog2*WorkgroupSize to 0 and lumaSum==maxLumaLog2*WorkgroupSize to (1<<fixedPointBitsLeft)-1

devshgraphicsprogramming · 2024-08-16T14:59:13Z

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

+        return log2(luma / minLuma) / log2(maxLuma / minLuma);
+    }
+
+    void gatherLuma(


I'd rename this to sampleLuma because you want another method thats called either gather (because whole workgroup gathers partial averages or histograms) or get to actually obtain your measurement in the second dispatch

I recommend having the getter in the same struct, because you need much of the state and precomputed constants to be able to joint and decode the measurements to produce a single EV value.
(you can copy paste your struct initialization from push constants or whatever in both the measurement and tonemapping shader)

…into autoexposue_ex

devshgraphicsprogramming · 2024-09-24T13:51:05Z

What are the action points before completion?

nipunG314 added 14 commits July 20, 2024 00:33

Add luma_meter and tonemapper

096e09d

Update submodule pointer

4fd700f

Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …

f93bb0f

…into autoexposue_ex

Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …

6152f96

…into autoexposue_ex

Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …

2311521

…into autoexposue_ex

Convert morton.h to hlsl

52e7ab2

Fix HLSL morton code

1cc26bd

Create geom_luma_meter and computeLuma

6922d0c

Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …

6e6eb64

…into autoexposue_ex

Add gatherLuma method

603a92f

Add getGatheredLuma()

810a6ac

Add reinhard and aces hlsl operators

69a73c1

Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …

72e0bc5

…into autoexposue_ex

cast mask values to correct type

4c70cf5

devshgraphicsprogramming commented Aug 13, 2024

View reviewed changes

nipunG314 added 2 commits August 16, 2024 16:35

Add create methods to tonemapper params

d9d6dd8

Remove getGatheredLuma from luma_meter

305f7e7

devshgraphicsprogramming commented Aug 16, 2024

View reviewed changes

nipunG314 added 13 commits August 20, 2024 18:28

Separate LumaMeteringWindow into a common header

3f4f6e9

Simplify luma_meter naming

515512a

Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …

77f5756

…into autoexposue_ex

Simplify morton code

1919e53

Add missing comment

4c58238

Refactor tonemapping operators

3c3f8b8

Small fixes

b0e0750

Use promote to simplify code

e8e46c9

Add static create to MeteringWindow

ee5affe

Infer sample count from viewportSize

56389f4

Rename gatherLuma, add toXYZ method and templatize the float type

23771d1

Add uploadFloat, downloadFloat and gatherLuma

ac39039

Normalize tileOffset and coord to uv before computing Luma

49a8049

Simplify return statement

8a10ae2

		float32_t3 a = v * (v + float32_t3(0.0245786, 0.0245786, 0.0245786)) - float32_t3(0.000090537, 0.000090537, 0.000090537);
		float32_t3 b = v * (v * float32_t3(0.983729, 0.983729, 0.983729) + float32_t3(0.4329510, 0.4329510, 0.4329510)) + float32_t3(0.238081, 0.238081, 0.238081);

		float32_t2 stride = window.meteringWindowScale / (sampleCount + float32_t2(1.0f, 1.0f));
		float32_t2 samplePos = stride * sampleIndex;

		luma = clamp(luma, minLuma, maxLuma);

		return log2(luma / minLuma) / log2(maxLuma / minLuma);

Autoexposure example restoration #728

Are you sure you want to change the base?

Autoexposure example restoration #728

Conversation

devshgraphicsprogramming commented Aug 13, 2024

Description

Testing

TODO list:

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

devshgraphicsprogramming Aug 16, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

devshgraphicsprogramming Aug 16, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

devshgraphicsprogramming Aug 16, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

devshgraphicsprogramming Aug 16, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

devshgraphicsprogramming commented Sep 24, 2024

devshgraphicsprogramming Aug 16, 2024 •

edited

Loading

devshgraphicsprogramming Aug 16, 2024 •

edited

Loading

devshgraphicsprogramming Aug 16, 2024 •

edited

Loading

devshgraphicsprogramming Aug 16, 2024 •

edited

Loading