-
Notifications
You must be signed in to change notification settings - Fork 59
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Acceleration Structure Conversion #790
base: master
Are you sure you want to change the base?
Conversation
Note that pointer/build param encoding stuff shouldn't be in the CPU side but don't touch anything. Also fix a typo, change the SRange to a std::span, and add default SPIR-V optimizer if none provided to asset converter.
…their storage. Change more stuff to span in `ICPUBottomLevelAccelerationStructure` Use a semantically better typedef/alias in `ILogicalDevice::createBottomLevelAccelerationStructure`
// finally the contents | ||
//TODO: hasher << lookup.asset->getContentHash(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
note to self, need to make the ICPUBottomLevelAccelerationStructure
and IPreHashed
{asset,uniqueCopyGroupID}, | ||
patch | ||
}; | ||
if (!visitor()) | ||
continue; | ||
const auto instanceCount = as->getInstances().size(); | ||
sizes = device->getAccelerationStructureBuildSizes(patch.hostBuild,buildFlags,motionBlur,instanceCount); | ||
inputSize = (motionBlur ? sizeof(IGPUTopLevelAccelerationStructure::DevicePolymorphicInstance):sizeof(IGPUTopLevelAccelerationStructure::DeviceStaticInstance))*instanceCount; | ||
} | ||
else | ||
{ | ||
const uint32_t* pMaxPrimitiveCounts = as->getGeometryPrimitiveCounts().data(); | ||
// the code here is not pretty, but DRY-ing is of this is for later | ||
if (buildFlags.hasFlags(ICPUBottomLevelAccelerationStructure::BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT)) | ||
{ | ||
const auto geoms = as->getAABBGeometries(); | ||
if (patch.hostBuild) | ||
{ | ||
const std::span<const IGPUBottomLevelAccelerationStructure::Triangles<const IGPUBuffer>> cpuGeoms = { | ||
reinterpret_cast<const IGPUBottomLevelAccelerationStructure::Triangles<const IGPUBuffer>*>(geoms.data()),geoms.size() | ||
}; | ||
sizes = device->getAccelerationStructureBuildSizes(buildFlags,motionBlur,cpuGeoms,pMaxPrimitiveCounts); | ||
} | ||
else | ||
{ | ||
const std::span<const IGPUBottomLevelAccelerationStructure::Triangles<const ICPUBuffer>> cpuGeoms = { | ||
reinterpret_cast<const IGPUBottomLevelAccelerationStructure::Triangles<const ICPUBuffer>*>(geoms.data()),geoms.size() | ||
}; | ||
sizes = device->getAccelerationStructureBuildSizes(buildFlags,motionBlur,cpuGeoms,pMaxPrimitiveCounts); | ||
// TODO: check if the strides need to be aligned to 4 bytes for AABBs | ||
for (const auto& geom : geoms) | ||
if (const auto aabbCount=*(pMaxPrimitiveCounts++); aabbCount) | ||
inputSize = core::roundUp(inputSize,sizeof(float))+aabbCount*geom.stride; | ||
} | ||
} | ||
else | ||
{ | ||
core::map<uint32_t,size_t> allocationsPerStride; | ||
const auto geoms = as->getTriangleGeometries(); | ||
if (patch.hostBuild) | ||
{ | ||
const std::span<const IGPUBottomLevelAccelerationStructure::Triangles<const IGPUBuffer>> cpuGeoms = { | ||
reinterpret_cast<const IGPUBottomLevelAccelerationStructure::Triangles<const IGPUBuffer>*>(geoms.data()),geoms.size() | ||
}; | ||
sizes = device->getAccelerationStructureBuildSizes(buildFlags,motionBlur,cpuGeoms,pMaxPrimitiveCounts); | ||
} | ||
else | ||
{ | ||
const std::span<const IGPUBottomLevelAccelerationStructure::Triangles<const ICPUBuffer>> cpuGeoms = { | ||
reinterpret_cast<const IGPUBottomLevelAccelerationStructure::Triangles<const ICPUBuffer>*>(geoms.data()),geoms.size() | ||
}; | ||
sizes = device->getAccelerationStructureBuildSizes(buildFlags,motionBlur,cpuGeoms,pMaxPrimitiveCounts); | ||
// TODO: check if the strides need to be aligned to 4 bytes for AABBs | ||
for (const auto& geom : geoms) | ||
if (const auto triCount=*(pMaxPrimitiveCounts++); triCount) | ||
{ | ||
switch (geom.indexType) | ||
{ | ||
case E_INDEX_TYPE::EIT_16BIT: | ||
allocationsPerStride[sizeof(uint16_t)] += triCount*3; | ||
break; | ||
case E_INDEX_TYPE::EIT_32BIT: | ||
allocationsPerStride[sizeof(uint32_t)] += triCount*3; | ||
break; | ||
default: | ||
break; | ||
} | ||
size_t bytesPerVertex = geom.vertexStride; | ||
if (geom.vertexData[1]) | ||
bytesPerVertex += bytesPerVertex; | ||
allocationsPerStride[geom.vertexStride] += geom.maxVertex; | ||
} | ||
} | ||
for (const auto& entry : allocationsPerStride) | ||
inputSize = core::roundUp<size_t>(inputSize,entry.first)+entry.first*entry.second; | ||
} | ||
} | ||
} | ||
if (!sizes) | ||
continue; | ||
// this is where it gets a bit weird, we need to create a buffer to back the acceleration structure | ||
IGPUBuffer::SCreationParams params = {}; | ||
constexpr size_t MinASBufferAlignment = 256u; | ||
params.size = core::roundUp(sizes.accelerationStructureSize,MinASBufferAlignment); | ||
params.usage = IGPUBuffer::E_USAGE_FLAGS::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT|IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; | ||
// concurrent ownership if any | ||
const auto outIx = i+entry.second.firstCopyIx; | ||
const auto uniqueCopyGroupID = gpuObjUniqueCopyGroupIDs[outIx]; | ||
const auto queueFamilies = inputs.getSharedOwnershipQueueFamilies(uniqueCopyGroupID,as,patch); | ||
params.queueFamilyIndexCount = queueFamilies.size(); | ||
params.queueFamilyIndices = queueFamilies.data(); | ||
// we need to save the buffer in a side-channel for later | ||
auto& out = accelerationStructureParams[IsTLAS][baseOffset+entry.second.firstCopyIx+i]; | ||
out = { | ||
.storage = device->createBuffer(std::move(params)), | ||
.scratchSize = sizes.buildScratchSize, | ||
.motionBlur = motionBlur, | ||
.compactAfterBuild = patch.compactAfterBuild, | ||
.inputSize = inputSize | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this needs some love from me
// This gets deferred till AFTER the Buffer Memory Allocations and Binding for Acceleration Structures | ||
if constexpr (!std::is_same_v<AssetType,ICPUBottomLevelAccelerationStructure> && !std::is_same_v<AssetType,ICPUTopLevelAccelerationStructure>) | ||
dfsCache.for_each([&](const instance_t<AssetType>& instance, dfs_cache<AssetType>::created_t& created)->void | ||
{ | ||
auto& stagingCache = std::get<SReserveResult::staging_cache_t<AssetType>>(retval.m_stagingCaches); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
need to pack up the lambda and defer it
// Deal with Deferred Creation of Acceleration structures | ||
{ | ||
for (auto asLevel=0; asLevel<2; asLevel++) | ||
{ | ||
// each of these stages must have a barrier inbetween | ||
size_t scratchSizeFullParallelBuild = 0; | ||
size_t scratchSizeFullParallelCompact = 0; | ||
// we collect that stats AFTER making sure that the BLAS / TLAS can actually be created | ||
for (const auto& deferredParams : accelerationStructureParams[asLevel]) | ||
{ | ||
// buffer failed to create/allocate | ||
if (!deferredParams.storage.get()) | ||
continue; | ||
IGPUAccelerationStructure::SCreationParams baseParams; | ||
{ | ||
auto* buf = deferredParams.storage.get(); | ||
const auto bufSz = buf->getSize(); | ||
using create_f = IGPUAccelerationStructure::SCreationParams::FLAGS; | ||
baseParams = { | ||
.bufferRange = {.offset=0,.size=bufSz,.buffer=smart_refctd_ptr<IGPUBuffer>(buf)}, | ||
.flags = deferredParams.motionBlur ? create_f::MOTION_BIT:create_f::NONE | ||
}; | ||
} | ||
smart_refctd_ptr<IGPUAccelerationStructure> as; | ||
if (asLevel) | ||
{ | ||
as = device->createBottomLevelAccelerationStructure({baseParams,deferredParams.maxInstanceCount}); | ||
} | ||
else | ||
{ | ||
as = device->createTopLevelAccelerationStructure({baseParams,deferredParams.maxInstanceCount}); | ||
} | ||
// note that in order to compact an AS you need to allocate a buffer range whose size is known only after the build | ||
const auto buildSize = deferredParams.inputSize+deferredParams.scratchSize; | ||
// sizes for building 1-by-1 vs parallel, note that | ||
retval.m_minASBuildScratchSize = core::max(buildSize,retval.m_minASBuildScratchSize); | ||
scratchSizeFullParallelBuild += buildSize; | ||
if (deferredParams.compactAfterBuild) | ||
scratchSizeFullParallelCompact += deferredParams.scratchSize; | ||
// triangles, AABBs or Instance Transforms will need to be supplied from VRAM | ||
// TODO: also mark somehow that we'll need a BUILD INPUT READ ONLY BUFFER WITH XFER usage | ||
if (deferredParams.inputSize) | ||
retval.m_queueFlags |= IQueue::FAMILY_FLAGS::TRANSFER_BIT; | ||
} | ||
// | ||
retval.m_maxASBuildScratchSize = core::max(core::max(scratchSizeFullParallelBuild,scratchSizeFullParallelCompact),retval.m_maxASBuildScratchSize); | ||
} | ||
// | ||
if (retval.m_minASBuildScratchSize) | ||
{ | ||
retval.m_queueFlags |= IQueue::FAMILY_FLAGS::COMPUTE_BIT; | ||
retval.m_maxASBuildScratchSize = core::max(core::max(scratchSizeFullParallelBLASBuild,scratchSizeFullParallelBLASCompact),core::max(scratchSizeFullParallelTLASBuild,scratchSizeFullParallelTLASCompact)); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
needs some love from me
…ice and Host build requests separately
Also update comments about what ends up in `m_gpuObjects`
Description
Conversion of ICPU BLAS and TLAS to IGPU including building.
We may need to support a list of IGPUBLAS in IGPUTLAS for sanity/lifetime coupling, but only if update/rebuild is not allowed or something (need to make a separate issue out of it because I have no clue how that's gonna be structured).
Testing
TODO list: