Jolt: Update to commit f094082aa, adding RISC-V, PPC64 and LoongArch …

…support Fixes #100557. (cherry picked from commit 4727f07)
Redot-Engine · Dec 22, 2024 · e57fb17 · e57fb17
1 parent 2f0d25a
commit e57fb17
Show file tree

Hide file tree

Showing 49 changed files with 346 additions and 162 deletions.
diff --git a/modules/jolt_physics/config.py b/modules/jolt_physics/config.py
@@ -1,5 +1,5 @@
 def can_build(env, platform):
-    return not env["disable_3d"]
+    return not env["disable_3d"] and not env["arch"] == "ppc32"
 
 
 def configure(env):

diff --git a/thirdparty/README.md b/thirdparty/README.md
@@ -424,7 +424,7 @@ Files generated from upstream source:
 ## jolt_physics
 
 - Upstream: https://github.com/jrouwe/JoltPhysics
-- Version: 5.2.1 (e3d3cdf644389b621914bb6e73d52ee3137591a7, 2024)
+- Version: 5.2.1 (f094082aa2bbfcbebc725dbe8b8f65c7d5152886, 2024)
 - License: MIT
 
 Files extracted from upstream source:

diff --git a/thirdparty/jolt_physics/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h b/thirdparty/jolt_physics/Jolt/AABBTree/NodeCodec/NodeCodecQuadTreeHalfFloat.h
@@ -254,6 +254,15 @@ class NodeCodecQuadTreeHalfFloat
 					const Node *node = reinterpret_cast<const Node *>(inBufferStart + (node_properties << OFFSET_NON_SIGNIFICANT_BITS));
 
 					// Unpack bounds
+				#ifdef JPH_CPU_BIG_ENDIAN
+					Vec4 bounds_minx = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinX[0] + (node->mBoundsMinX[1] << 16), node->mBoundsMinX[2] + (node->mBoundsMinX[3] << 16), 0, 0));
+					Vec4 bounds_miny = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinY[0] + (node->mBoundsMinY[1] << 16), node->mBoundsMinY[2] + (node->mBoundsMinY[3] << 16), 0, 0));
+					Vec4 bounds_minz = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMinZ[0] + (node->mBoundsMinZ[1] << 16), node->mBoundsMinZ[2] + (node->mBoundsMinZ[3] << 16), 0, 0));
+
+					Vec4 bounds_maxx = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxX[0] + (node->mBoundsMaxX[1] << 16), node->mBoundsMaxX[2] + (node->mBoundsMaxX[3] << 16), 0, 0));
+					Vec4 bounds_maxy = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxY[0] + (node->mBoundsMaxY[1] << 16), node->mBoundsMaxY[2] + (node->mBoundsMaxY[3] << 16), 0, 0));
+					Vec4 bounds_maxz = HalfFloatConversion::ToFloat(UVec4(node->mBoundsMaxZ[0] + (node->mBoundsMaxZ[1] << 16), node->mBoundsMaxZ[2] + (node->mBoundsMaxZ[3] << 16), 0, 0));
+				#else
 					UVec4 bounds_minxy = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&node->mBoundsMinX[0]));
 					Vec4 bounds_minx = HalfFloatConversion::ToFloat(bounds_minxy);
 					Vec4 bounds_miny = HalfFloatConversion::ToFloat(bounds_minxy.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
@@ -265,6 +274,7 @@ class NodeCodecQuadTreeHalfFloat
 					UVec4 bounds_maxyz = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&node->mBoundsMaxY[0]));
 					Vec4 bounds_maxy = HalfFloatConversion::ToFloat(bounds_maxyz);
 					Vec4 bounds_maxz = HalfFloatConversion::ToFloat(bounds_maxyz.Swizzle<SWIZZLE_Z, SWIZZLE_W, SWIZZLE_UNUSED, SWIZZLE_UNUSED>());
+				#endif
 
 					// Load properties for 4 children
 					UVec4 properties = UVec4::sLoadInt4(&node->mNodeProperties[0]);

diff --git a/thirdparty/jolt_physics/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h b/thirdparty/jolt_physics/Jolt/AABBTree/TriangleCodec/TriangleCodecIndexed8BitPackSOA4Flags.h
@@ -338,7 +338,7 @@ class TriangleCodecIndexed8BitPackSOA4Flags
 	class DecodingContext
 	{
 	private:
-		/// Private helper functions to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.)
+		/// Private helper function to unpack the 1 vertex of 4 triangles (outX contains the x coordinate of triangle 0 .. 3 etc.)
 		JPH_INLINE void				Unpack(const VertexData *inVertices, UVec4Arg inIndex, Vec4 &outX, Vec4 &outY, Vec4 &outZ) const
 		{
 			// Get compressed data
@@ -356,6 +356,28 @@ class TriangleCodecIndexed8BitPackSOA4Flags
 			outZ = Vec4::sFusedMultiplyAdd(zc.ToFloat(), mScaleZ, mOffsetZ);
 		}
 
+		/// Private helper function to unpack 4 triangles from a triangle block
+		JPH_INLINE void				Unpack(const TriangleBlock *inBlock, const VertexData *inVertices, Vec4 &outX1, Vec4 &outY1, Vec4 &outZ1, Vec4 &outX2, Vec4 &outY2, Vec4 &outZ2, Vec4 &outX3, Vec4 &outY3, Vec4 &outZ3) const
+		{
+			// Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
+			UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&inBlock->mIndices[0]));
+			UVec4 iv1 = indices.Expand4Byte0();
+			UVec4 iv2 = indices.Expand4Byte4();
+			UVec4 iv3 = indices.Expand4Byte8();
+
+		#ifdef JPH_CPU_BIG_ENDIAN
+			// On big endian systems we need to reverse the bytes
+			iv1 = iv1.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
+			iv2 = iv2.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
+			iv3 = iv3.Swizzle<SWIZZLE_W, SWIZZLE_Z, SWIZZLE_Y, SWIZZLE_X>();
+		#endif
+
+			// Decompress the triangle data
+			Unpack(inVertices, iv1, outX1, outY1, outZ1);
+			Unpack(inVertices, iv2, outX2, outY2, outZ2);
+			Unpack(inVertices, iv3, outX3, outY3, outZ3);
+		}
+
 	public:
 		JPH_INLINE explicit			DecodingContext(const TriangleHeader *inHeader) :
 			mOffsetX(Vec4::sReplicate(inHeader->mOffset.x)),
@@ -380,17 +402,9 @@ class TriangleCodecIndexed8BitPackSOA4Flags
 
 			do
 			{
-				// Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
-				UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&t->mIndices[0]));
-				UVec4 iv1 = indices.Expand4Byte0();
-				UVec4 iv2 = indices.Expand4Byte4();
-				UVec4 iv3 = indices.Expand4Byte8();
-
-				// Decompress the triangle data
+				// Unpack the vertices for 4 triangles
 				Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z;
-				Unpack(vertices, iv1, v1x, v1y, v1z);
-				Unpack(vertices, iv2, v2x, v2y, v2z);
-				Unpack(vertices, iv3, v3x, v3y, v3z);
+				Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
 
 				// Transpose it so we get normal vectors
 				Mat44 v1 = Mat44(v1x, v1y, v1z, Vec4::sZero()).Transposed();
@@ -425,17 +439,9 @@ class TriangleCodecIndexed8BitPackSOA4Flags
 			UVec4 start_triangle_idx = UVec4::sZero();
 			do
 			{
-				// Get the indices for the three vertices (reads 4 bytes extra, but these are the flags so that's ok)
-				UVec4 indices = UVec4::sLoadInt4(reinterpret_cast<const uint32 *>(&t->mIndices[0]));
-				UVec4 iv1 = indices.Expand4Byte0();
-				UVec4 iv2 = indices.Expand4Byte4();
-				UVec4 iv3 = indices.Expand4Byte8();
-
-				// Decompress the triangle data
+				// Unpack the vertices for 4 triangles
 				Vec4 v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z;
-				Unpack(vertices, iv1, v1x, v1y, v1z);
-				Unpack(vertices, iv2, v2x, v2y, v2z);
-				Unpack(vertices, iv3, v3x, v3y, v3z);
+				Unpack(t, vertices, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
 
 				// Perform ray vs triangle test
 				Vec4 distance = RayTriangle4(inRayOrigin, inRayDirection, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);

diff --git a/thirdparty/jolt_physics/Jolt/ConfigurationString.h b/thirdparty/jolt_physics/Jolt/ConfigurationString.h
@@ -14,8 +14,23 @@ inline const char *GetConfigurationString()
 		"x86 "
 #elif defined(JPH_CPU_ARM)
 		"ARM "
-#elif defined(JPH_PLATFORM_WASM)
+#elif defined(JPH_CPU_RISCV)
+		"RISC-V "
+#elif defined(JPH_CPU_PPC)
+		"PowerPC "
+	#ifdef JPH_CPU_BIG_ENDIAN
+		"(Big Endian) "
+	#else
+		"(Little Endian) "
+	#endif
+#elif defined(JPH_CPU_LOONGARCH)
+		"LoongArch "
+#elif defined(JPH_CPU_E2K)
+		"E2K "
+#elif defined(JPH_CPU_WASM)
 		"WASM "
+#else
+	#error Unknown CPU architecture
 #endif
 #if JPH_CPU_ADDRESS_BITS == 64
 		"64-bit "

diff --git a/thirdparty/jolt_physics/Jolt/Core/Array.h b/thirdparty/jolt_physics/Jolt/Core/Array.h
@@ -59,15 +59,15 @@ class [[nodiscard]] Array : private Allocator
 			{
 				for (T *destination_end = inDestination + inCount; inDestination < destination_end; ++inDestination, ++inSource)
 				{
-					::new (inDestination) T(std::move(*inSource));
+					new (inDestination) T(std::move(*inSource));
 					inSource->~T();
 				}
 			}
 			else
 			{
 				for (T *destination = inDestination + inCount - 1, *source = inSource + inCount - 1; destination >= inDestination; --destination, --source)
 				{
-					::new (destination) T(std::move(*source));
+					new (destination) T(std::move(*source));
 					source->~T();
 				}
 			}
@@ -124,7 +124,7 @@ class [[nodiscard]] Array : private Allocator
 
 		if constexpr (!std::is_trivially_constructible<T>())
 			for (T *element = mElements + mSize, *element_end = mElements + inNewSize; element < element_end; ++element)
-				::new (element) T;
+				new (element) T;
 		mSize = inNewSize;
 	}
 
@@ -137,7 +137,7 @@ class [[nodiscard]] Array : private Allocator
 		reserve(inNewSize);
 
 		for (T *element = mElements + mSize, *element_end = mElements + inNewSize; element < element_end; ++element)
-			::new (element) T(inValue);
+			new (element) T(inValue);
 		mSize = inNewSize;
 	}
 
@@ -187,7 +187,7 @@ class [[nodiscard]] Array : private Allocator
 		reserve(size_type(std::distance(inBegin, inEnd)));
 
 		for (Iterator element = inBegin; element != inEnd; ++element)
-			::new (&mElements[mSize++]) T(*element);
+			new (&mElements[mSize++]) T(*element);
 	}
 
 	/// Replace the contents of this array with inList
@@ -197,7 +197,7 @@ class [[nodiscard]] Array : private Allocator
 		reserve(size_type(inList.size()));
 
 		for (const T &v : inList)
-			::new (&mElements[mSize++]) T(v);
+			new (&mElements[mSize++]) T(v);
 	}
 
 	/// Default constructor
@@ -281,15 +281,15 @@ class [[nodiscard]] Array : private Allocator
 		grow();
 
 		T *element = mElements + mSize++;
-		::new (element) T(inValue);
+		new (element) T(inValue);
 	}
 
 	inline void				push_back(T &&inValue)
 	{
 		grow();
 
 		T *element = mElements + mSize++;
-		::new (element) T(std::move(inValue));
+		new (element) T(std::move(inValue));
 	}
 
 	/// Construct element at the back of the array
@@ -299,7 +299,7 @@ class [[nodiscard]] Array : private Allocator
 		grow();
 
 		T *element = mElements + mSize++;
-		::new (element) T(std::forward<A>(inValue)...);
+		new (element) T(std::forward<A>(inValue)...);
 		return *element;
 	}
 
@@ -365,7 +365,7 @@ class [[nodiscard]] Array : private Allocator
 			move(element_end, element_begin, mSize - first_element);
 
 			for (T *element = element_begin; element < element_end; ++element, ++inBegin)
-				::new (element) T(*inBegin);
+				new (element) T(*inBegin);
 
 			mSize += num_elements;
 		}
@@ -383,7 +383,7 @@ class [[nodiscard]] Array : private Allocator
 		T *element = mElements + first_element;
 		move(element + 1, element, mSize - first_element);
 
-		::new (element) T(inValue);
+		new (element) T(inValue);
 		mSize++;
 	}
 

diff --git a/thirdparty/jolt_physics/Jolt/Core/ByteBuffer.h b/thirdparty/jolt_physics/Jolt/Core/ByteBuffer.h
@@ -41,7 +41,7 @@ class ByteBuffer : public ByteBufferVector
 
 		// Construct elements
 		for (Type *d = data, *d_end = data + inSize; d < d_end; ++d)
-			::new (d) Type;
+			new (d) Type;
 
 		// Return pointer
 		return data;

diff --git a/thirdparty/jolt_physics/Jolt/Core/Core.h b/thirdparty/jolt_physics/Jolt/Core/Core.h
@@ -180,6 +180,18 @@
 		#define JPH_VECTOR_ALIGNMENT 8 // 32-bit ARM does not support aligning on the stack on 16 byte boundaries
 		#define JPH_DVECTOR_ALIGNMENT 8
 	#endif
+#elif defined(__riscv)
+	// RISC-V CPU architecture
+	#define JPH_CPU_RISCV
+	#if __riscv_xlen == 64
+		#define JPH_CPU_ADDRESS_BITS 64
+		#define JPH_VECTOR_ALIGNMENT 16
+		#define JPH_DVECTOR_ALIGNMENT 32
+	#else
+		#define JPH_CPU_ADDRESS_BITS 32
+		#define JPH_VECTOR_ALIGNMENT 16
+		#define JPH_DVECTOR_ALIGNMENT 8
+	#endif
 #elif defined(JPH_PLATFORM_WASM)
 	// WebAssembly CPU architecture
 	#define JPH_CPU_WASM
@@ -191,6 +203,29 @@
 		#define JPH_USE_SSE4_1
 		#define JPH_USE_SSE4_2
 	#endif
+#elif defined(__powerpc__) || defined(__powerpc64__)
+	// PowerPC CPU architecture
+	#define JPH_CPU_PPC
+	#if defined(__powerpc64__)
+		#define JPH_CPU_ADDRESS_BITS 64
+	#else
+		#define JPH_CPU_ADDRESS_BITS 32
+	#endif
+	#ifdef _BIG_ENDIAN
+		#define JPH_CPU_BIG_ENDIAN
+	#endif
+	#define JPH_VECTOR_ALIGNMENT 16
+	#define JPH_DVECTOR_ALIGNMENT 8
+#elif defined(__loongarch__)
+	// LoongArch CPU architecture
+	#define JPH_CPU_LOONGARCH
+	#if defined(__loongarch64)
+		#define JPH_CPU_ADDRESS_BITS 64
+	#else
+		#define JPH_CPU_ADDRESS_BITS 32
+	#endif
+	#define JPH_VECTOR_ALIGNMENT 16
+	#define JPH_DVECTOR_ALIGNMENT 8
 #elif defined(__e2k__)
 	// E2K CPU architecture (MCST Elbrus 2000)
 	#define JPH_CPU_E2K
@@ -358,10 +393,10 @@
 #elif defined(JPH_PLATFORM_LINUX) || defined(JPH_PLATFORM_ANDROID) || defined(JPH_PLATFORM_MACOS) || defined(JPH_PLATFORM_IOS) || defined(JPH_PLATFORM_FREEBSD)
 	#if defined(JPH_CPU_X86)
 		#define JPH_BREAKPOINT	__asm volatile ("int $0x3")
-	#elif defined(JPH_CPU_ARM)
-		#define JPH_BREAKPOINT	__builtin_trap()
-	#elif defined(JPH_CPU_E2K)
+	#elif defined(JPH_CPU_ARM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_E2K) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH)
 		#define JPH_BREAKPOINT	__builtin_trap()
+	#else
+		#error Unknown CPU architecture
 	#endif
 #elif defined(JPH_PLATFORM_WASM)
 	#define JPH_BREAKPOINT		do { } while (false) // Not supported

diff --git a/thirdparty/jolt_physics/Jolt/Core/FPControlWord.h b/thirdparty/jolt_physics/Jolt/Core/FPControlWord.h
@@ -126,6 +126,14 @@ class FPControlWord : public NonCopyable
 	uint32		mPrevState;
 };
 
+#elif defined(JPH_CPU_RISCV)
+
+// RISC-V only implements manually checking if exceptions occurred by reading the fcsr register. It doesn't generate exceptions.
+
+#elif defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH)
+
+// Not implemented right now
+
 #else
 
 #error Unsupported CPU architecture

diff --git a/thirdparty/jolt_physics/Jolt/Core/FPException.h b/thirdparty/jolt_physics/Jolt/Core/FPException.h
@@ -56,6 +56,14 @@ class FPExceptionDisableInvalid : public FPControlWord<0, FP_IOE> { };
 /// Disable division by zero floating point exceptions
 class FPExceptionDisableDivByZero : public FPControlWord<0, FP_DZE> { };
 
+#elif defined(JPH_CPU_RISCV)
+
+#error "RISC-V only implements manually checking if exceptions occurred by reading the fcsr register. It doesn't generate exceptions. JPH_FLOATING_POINT_EXCEPTIONS_ENABLED must be disabled."
+
+#elif defined(JPH_CPU_PPC)
+
+#error PowerPC floating point exception handling to be implemented. JPH_FLOATING_POINT_EXCEPTIONS_ENABLED must be disabled.
+
 #else
 
 #error Unsupported CPU architecture

diff --git a/thirdparty/jolt_physics/Jolt/Core/FPFlushDenormals.h b/thirdparty/jolt_physics/Jolt/Core/FPFlushDenormals.h
@@ -8,7 +8,7 @@
 
 JPH_NAMESPACE_BEGIN
 
-#if defined(JPH_CPU_WASM)
+#if defined(JPH_CPU_WASM) || defined(JPH_CPU_RISCV) || defined(JPH_CPU_PPC) || defined(JPH_CPU_LOONGARCH)
 
 // Not supported
 class FPFlushDenormals { };
@@ -21,6 +21,8 @@ class FPFlushDenormals : public FPControlWord<_MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_
 
 #elif defined(JPH_CPU_ARM) && defined(JPH_COMPILER_MSVC)
 
+/// Helper class that needs to be put on the stack to enable flushing denormals to zero
+/// This can make floating point operations much faster when working with very small numbers
 class FPFlushDenormals : public FPControlWord<_DN_FLUSH, _MCW_DN> { };
 
 #elif defined(JPH_CPU_ARM)

diff --git a/thirdparty/jolt_physics/Jolt/Core/FixedSizeFreeList.inl b/thirdparty/jolt_physics/Jolt/Core/FixedSizeFreeList.inl
@@ -79,7 +79,7 @@ uint32 FixedSizeFreeList<Object>::ConstructObject(Parameters &&... inParameters)
 			// Allocation successful
 			JPH_IF_ENABLE_ASSERTS(mNumFreeObjects.fetch_sub(1, memory_order_relaxed);)
 			ObjectStorage &storage = GetStorage(first_free);
-			::new (&storage.mObject) Object(std::forward<Parameters>(inParameters)...);
+			new (&storage.mObject) Object(std::forward<Parameters>(inParameters)...);
 			storage.mNextFreeObject.store(first_free, memory_order_release);
 			return first_free;
 		}
@@ -97,7 +97,7 @@ uint32 FixedSizeFreeList<Object>::ConstructObject(Parameters &&... inParameters)
 				// Allocation successful
 				JPH_IF_ENABLE_ASSERTS(mNumFreeObjects.fetch_sub(1, memory_order_relaxed);)
 				ObjectStorage &storage = GetStorage(first_free);
-				::new (&storage.mObject) Object(std::forward<Parameters>(inParameters)...);
+				new (&storage.mObject) Object(std::forward<Parameters>(inParameters)...);
 				storage.mNextFreeObject.store(first_free, memory_order_release);
 				return first_free;
 			}