Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/metal/bvh.mm
- This file was added.
| /* | |||||
| * Copyright 2021 Blender Foundation | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifdef WITH_METAL | |||||
| # include "scene/hair.h" | |||||
| # include "scene/mesh.h" | |||||
| # include "scene/object.h" | |||||
| # include "util/progress.h" | |||||
| # include "device/metal/bvh.h" | |||||
| CCL_NAMESPACE_BEGIN | |||||
| # define BVH_status(...) \ | |||||
| { \ | |||||
| string str = string_printf(__VA_ARGS__); \ | |||||
| progress.set_substatus(str); \ | |||||
| } | |||||
| BVHMetal::BVHMetal(const BVHParams ¶ms_, | |||||
| const vector<Geometry *> &geometry_, | |||||
| const vector<Object *> &objects_, | |||||
| Device *device) | |||||
| : BVH(params_, geometry_, objects_), stats(device->stats) | |||||
| { | |||||
| } | |||||
| BVHMetal::~BVHMetal() | |||||
| { | |||||
| if (@available(macos 12.0, *)) { | |||||
| if (accel_struct) { | |||||
| stats.mem_free(accel_struct.allocatedSize); | |||||
| [accel_struct release]; | |||||
| } | |||||
| } | |||||
| } | |||||
| bool BVHMetal::build_BLAS_mesh(Progress &progress, | |||||
| id<MTLDevice> device, | |||||
| id<MTLCommandQueue> queue, | |||||
| Geometry *const geom, | |||||
| bool refit) | |||||
| { | |||||
| if (@available(macos 12.0, *)) { | |||||
| /* Build BLAS for triangle primitives */ | |||||
| Mesh *const mesh = static_cast<Mesh *const>(geom); | |||||
| if (mesh->num_triangles() == 0) { | |||||
| return false; | |||||
| } | |||||
| /*------------------------------------------------*/ | |||||
| BVH_status( | |||||
| "Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str()); | |||||
| /*------------------------------------------------*/ | |||||
| const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC); | |||||
| const array<float3> &verts = mesh->get_verts(); | |||||
| const array<int> &tris = mesh->get_triangles(); | |||||
| const size_t num_verts = verts.size(); | |||||
| const size_t num_indices = tris.size(); | |||||
| size_t num_motion_steps = 1; | |||||
| Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); | |||||
| if (motion_blur && mesh->get_use_motion_blur() && motion_keys) { | |||||
| num_motion_steps = mesh->get_motion_steps(); | |||||
| } | |||||
| MTLResourceOptions storage_mode; | |||||
| if (device.hasUnifiedMemory) { | |||||
| storage_mode = MTLResourceStorageModeShared; | |||||
| } | |||||
| else { | |||||
| storage_mode = MTLResourceStorageModeManaged; | |||||
| } | |||||
| /* Upload the mesh data to the GPU */ | |||||
| id<MTLBuffer> posBuf = nil; | |||||
| id<MTLBuffer> indexBuf = [device newBufferWithBytes:tris.data() | |||||
| length:num_indices * sizeof(tris.data()[0]) | |||||
| options:storage_mode]; | |||||
| if (num_motion_steps == 1) { | |||||
| posBuf = [device newBufferWithBytes:verts.data() | |||||
| length:num_verts * sizeof(verts.data()[0]) | |||||
| options:storage_mode]; | |||||
| } | |||||
| else { | |||||
| posBuf = [device newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0]) | |||||
| options:storage_mode]; | |||||
| float3 *dest_data = (float3 *)[posBuf contents]; | |||||
| size_t center_step = (num_motion_steps - 1) / 2; | |||||
| for (size_t step = 0; step < num_motion_steps; ++step) { | |||||
| const float3 *verts = mesh->get_verts().data(); | |||||
| /* The center step for motion vertices is not stored in the attribute. */ | |||||
| if (step != center_step) { | |||||
| verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts; | |||||
| } | |||||
| memcpy(dest_data + num_verts * step, verts, num_verts * sizeof(float3)); | |||||
| } | |||||
| if (storage_mode == MTLResourceStorageModeManaged) { | |||||
| [posBuf didModifyRange:NSMakeRange(0, posBuf.length)]; | |||||
| } | |||||
| } | |||||
| /* Create an acceleration structure. */ | |||||
| MTLAccelerationStructureGeometryDescriptor *geomDesc; | |||||
| if (num_motion_steps > 1) { | |||||
| std::vector<MTLMotionKeyframeData *> vertex_ptrs; | |||||
| vertex_ptrs.reserve(num_motion_steps); | |||||
| for (size_t step = 0; step < num_motion_steps; ++step) { | |||||
| MTLMotionKeyframeData *k = [MTLMotionKeyframeData data]; | |||||
| k.buffer = posBuf; | |||||
| k.offset = num_verts * step * sizeof(float3); | |||||
| vertex_ptrs.push_back(k); | |||||
| } | |||||
| MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion = | |||||
| [MTLAccelerationStructureMotionTriangleGeometryDescriptor descriptor]; | |||||
| geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data() | |||||
| count:vertex_ptrs.size()]; | |||||
| geomDescMotion.vertexStride = sizeof(verts.data()[0]); | |||||
| geomDescMotion.indexBuffer = indexBuf; | |||||
| geomDescMotion.indexBufferOffset = 0; | |||||
| geomDescMotion.indexType = MTLIndexTypeUInt32; | |||||
| geomDescMotion.triangleCount = num_indices / 3; | |||||
| geomDescMotion.intersectionFunctionTableOffset = 0; | |||||
| geomDesc = geomDescMotion; | |||||
| } | |||||
| else { | |||||
| MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion = | |||||
| [MTLAccelerationStructureTriangleGeometryDescriptor descriptor]; | |||||
| geomDescNoMotion.vertexBuffer = posBuf; | |||||
| geomDescNoMotion.vertexBufferOffset = 0; | |||||
| geomDescNoMotion.vertexStride = sizeof(verts.data()[0]); | |||||
| geomDescNoMotion.indexBuffer = indexBuf; | |||||
| geomDescNoMotion.indexBufferOffset = 0; | |||||
| geomDescNoMotion.indexType = MTLIndexTypeUInt32; | |||||
| geomDescNoMotion.triangleCount = num_indices / 3; | |||||
| geomDescNoMotion.intersectionFunctionTableOffset = 0; | |||||
| geomDesc = geomDescNoMotion; | |||||
| } | |||||
| /* Force a single any-hit call, so shadow record-all behavior works correctly */ | |||||
| /* (Match optix behaviour: unsigned int build_flags = | |||||
| * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */ | |||||
| geomDesc.allowDuplicateIntersectionFunctionInvocation = false; | |||||
| MTLPrimitiveAccelerationStructureDescriptor *accelDesc = | |||||
| [MTLPrimitiveAccelerationStructureDescriptor descriptor]; | |||||
| accelDesc.geometryDescriptors = @[ geomDesc ]; | |||||
| if (num_motion_steps > 1) { | |||||
| accelDesc.motionStartTime = 0.0f; | |||||
| accelDesc.motionEndTime = 1.0f; | |||||
| accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp; | |||||
| accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp; | |||||
| accelDesc.motionKeyframeCount = num_motion_steps; | |||||
| } | |||||
| if (!use_fast_trace_bvh) { | |||||
| accelDesc.usage |= (MTLAccelerationStructureUsageRefit | | |||||
| MTLAccelerationStructureUsagePreferFastBuild); | |||||
| } | |||||
| MTLAccelerationStructureSizes accelSizes = [device | |||||
| accelerationStructureSizesWithDescriptor:accelDesc]; | |||||
| id<MTLAccelerationStructure> accel_uncompressed = [device | |||||
| newAccelerationStructureWithSize:accelSizes.accelerationStructureSize]; | |||||
| id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize | |||||
| options:MTLResourceStorageModePrivate]; | |||||
| id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared]; | |||||
| id<MTLCommandBuffer> accelCommands = [queue commandBuffer]; | |||||
| id<MTLAccelerationStructureCommandEncoder> accelEnc = | |||||
| [accelCommands accelerationStructureCommandEncoder]; | |||||
| if (refit) { | |||||
| [accelEnc refitAccelerationStructure:accel_struct | |||||
| descriptor:accelDesc | |||||
| destination:accel_uncompressed | |||||
| scratchBuffer:scratchBuf | |||||
| scratchBufferOffset:0]; | |||||
| } | |||||
| else { | |||||
| [accelEnc buildAccelerationStructure:accel_uncompressed | |||||
| descriptor:accelDesc | |||||
| scratchBuffer:scratchBuf | |||||
| scratchBufferOffset:0]; | |||||
| } | |||||
| if (use_fast_trace_bvh) { | |||||
| [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed | |||||
| toBuffer:sizeBuf | |||||
| offset:0 | |||||
| sizeDataType:MTLDataTypeULong]; | |||||
| } | |||||
| [accelEnc endEncoding]; | |||||
| [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) { | |||||
| /* free temp resources */ | |||||
| [scratchBuf release]; | |||||
| [indexBuf release]; | |||||
| [posBuf release]; | |||||
| if (use_fast_trace_bvh) { | |||||
| /* Compact the accel structure */ | |||||
| uint64_t compressed_size = *(uint64_t *)sizeBuf.contents; | |||||
| dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ | |||||
| id<MTLCommandBuffer> accelCommands = [queue commandBuffer]; | |||||
| id<MTLAccelerationStructureCommandEncoder> accelEnc = | |||||
| [accelCommands accelerationStructureCommandEncoder]; | |||||
| id<MTLAccelerationStructure> accel = [device | |||||
| newAccelerationStructureWithSize:compressed_size]; | |||||
| [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed | |||||
| toAccelerationStructure:accel]; | |||||
| [accelEnc endEncoding]; | |||||
| [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) { | |||||
| uint64_t allocated_size = [accel allocatedSize]; | |||||
| stats.mem_alloc(allocated_size); | |||||
| accel_struct = accel; | |||||
| [accel_uncompressed release]; | |||||
| accel_struct_building = false; | |||||
| }]; | |||||
| [accelCommands commit]; | |||||
| }); | |||||
| } | |||||
| else { | |||||
| /* set our acceleration structure to the uncompressed structure */ | |||||
| accel_struct = accel_uncompressed; | |||||
| uint64_t allocated_size = [accel_struct allocatedSize]; | |||||
| stats.mem_alloc(allocated_size); | |||||
| accel_struct_building = false; | |||||
| } | |||||
| [sizeBuf release]; | |||||
| }]; | |||||
| accel_struct_building = true; | |||||
| [accelCommands commit]; | |||||
| return true; | |||||
| } | |||||
| return false; | |||||
| } | |||||
| bool BVHMetal::build_BLAS_hair(Progress &progress, | |||||
| id<MTLDevice> device, | |||||
| id<MTLCommandQueue> queue, | |||||
| Geometry *const geom, | |||||
| bool refit) | |||||
| { | |||||
| if (@available(macos 12.0, *)) { | |||||
| /* Build BLAS for hair curves */ | |||||
| Hair *hair = static_cast<Hair *>(geom); | |||||
| if (hair->num_curves() == 0) { | |||||
| return false; | |||||
| } | |||||
| /*------------------------------------------------*/ | |||||
| BVH_status( | |||||
| "Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str()); | |||||
| /*------------------------------------------------*/ | |||||
| const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC); | |||||
| const size_t num_segments = hair->num_segments(); | |||||
| size_t num_motion_steps = 1; | |||||
| Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); | |||||
| if (motion_blur && hair->get_use_motion_blur() && motion_keys) { | |||||
| num_motion_steps = hair->get_motion_steps(); | |||||
| } | |||||
| const size_t num_aabbs = num_segments * num_motion_steps; | |||||
| MTLResourceOptions storage_mode; | |||||
| if (device.hasUnifiedMemory) { | |||||
| storage_mode = MTLResourceStorageModeShared; | |||||
| } | |||||
| else { | |||||
| storage_mode = MTLResourceStorageModeManaged; | |||||
| } | |||||
| /* Allocate a GPU buffer for the AABB data and populate it */ | |||||
| id<MTLBuffer> aabbBuf = [device | |||||
| newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox) | |||||
| options:storage_mode]; | |||||
| MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents]; | |||||
| /* Get AABBs for each motion step */ | |||||
| size_t center_step = (num_motion_steps - 1) / 2; | |||||
| for (size_t step = 0; step < num_motion_steps; ++step) { | |||||
| /* The center step for motion vertices is not stored in the attribute */ | |||||
| const float3 *keys = hair->get_curve_keys().data(); | |||||
| if (step != center_step) { | |||||
| size_t attr_offset = (step > center_step) ? step - 1 : step; | |||||
| /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4) */ | |||||
| keys = motion_keys->data_float3() + attr_offset * hair->get_curve_keys().size(); | |||||
| } | |||||
| for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) { | |||||
| const Hair::Curve curve = hair->get_curve(j); | |||||
| for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) { | |||||
| { | |||||
| BoundBox bounds = BoundBox::empty; | |||||
| curve.bounds_grow(segment, keys, hair->get_curve_radius().data(), bounds); | |||||
| const size_t index = step * num_segments + i; | |||||
| aabb_data[index].min = (MTLPackedFloat3 &)bounds.min; | |||||
| aabb_data[index].max = (MTLPackedFloat3 &)bounds.max; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| if (storage_mode == MTLResourceStorageModeManaged) { | |||||
| [aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)]; | |||||
| } | |||||
| # if 0 | |||||
| for (size_t i=0; i<num_aabbs && i < 400; i++) { | |||||
| MTLAxisAlignedBoundingBox& bb = aabb_data[i]; | |||||
| printf(" %d: %.1f,%.1f,%.1f -- %.1f,%.1f,%.1f\n", int(i), bb.min.x, bb.min.y, bb.min.z, bb.max.x, bb.max.y, bb.max.z); | |||||
| } | |||||
| # endif | |||||
| MTLAccelerationStructureGeometryDescriptor *geomDesc; | |||||
| if (motion_blur) { | |||||
| std::vector<MTLMotionKeyframeData *> aabb_ptrs; | |||||
| aabb_ptrs.reserve(num_motion_steps); | |||||
| for (size_t step = 0; step < num_motion_steps; ++step) { | |||||
| MTLMotionKeyframeData *k = [MTLMotionKeyframeData data]; | |||||
| k.buffer = aabbBuf; | |||||
| k.offset = step * num_segments * sizeof(MTLAxisAlignedBoundingBox); | |||||
| aabb_ptrs.push_back(k); | |||||
| } | |||||
| MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion = | |||||
| [MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor]; | |||||
| geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data() | |||||
| count:aabb_ptrs.size()]; | |||||
| geomDescMotion.boundingBoxCount = num_segments; | |||||
| geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]); | |||||
| geomDescMotion.intersectionFunctionTableOffset = 1; | |||||
| /* Force a single any-hit call, so shadow record-all behavior works correctly */ | |||||
| /* (Match optix behaviour: unsigned int build_flags = | |||||
| * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */ | |||||
| geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false; | |||||
| geomDescMotion.opaque = true; | |||||
| geomDesc = geomDescMotion; | |||||
| } | |||||
| else { | |||||
| MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion = | |||||
| [MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor]; | |||||
| geomDescNoMotion.boundingBoxBuffer = aabbBuf; | |||||
| geomDescNoMotion.boundingBoxBufferOffset = 0; | |||||
| geomDescNoMotion.boundingBoxCount = int(num_aabbs); | |||||
| geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]); | |||||
| geomDescNoMotion.intersectionFunctionTableOffset = 1; | |||||
| /* Force a single any-hit call, so shadow record-all behavior works correctly */ | |||||
| /* (Match optix behaviour: unsigned int build_flags = | |||||
| * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */ | |||||
| geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false; | |||||
| geomDescNoMotion.opaque = true; | |||||
| geomDesc = geomDescNoMotion; | |||||
| } | |||||
| MTLPrimitiveAccelerationStructureDescriptor *accelDesc = | |||||
| [MTLPrimitiveAccelerationStructureDescriptor descriptor]; | |||||
| accelDesc.geometryDescriptors = @[ geomDesc ]; | |||||
| if (motion_blur) { | |||||
| accelDesc.motionStartTime = 0.0f; | |||||
| accelDesc.motionEndTime = 1.0f; | |||||
| accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish; | |||||
| accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish; | |||||
| accelDesc.motionKeyframeCount = num_motion_steps; | |||||
| } | |||||
| if (!use_fast_trace_bvh) { | |||||
| accelDesc.usage |= (MTLAccelerationStructureUsageRefit | | |||||
| MTLAccelerationStructureUsagePreferFastBuild); | |||||
| } | |||||
| MTLAccelerationStructureSizes accelSizes = [device | |||||
| accelerationStructureSizesWithDescriptor:accelDesc]; | |||||
| id<MTLAccelerationStructure> accel_uncompressed = [device | |||||
| newAccelerationStructureWithSize:accelSizes.accelerationStructureSize]; | |||||
| id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize | |||||
| options:MTLResourceStorageModePrivate]; | |||||
| id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared]; | |||||
| id<MTLCommandBuffer> accelCommands = [queue commandBuffer]; | |||||
| id<MTLAccelerationStructureCommandEncoder> accelEnc = | |||||
| [accelCommands accelerationStructureCommandEncoder]; | |||||
| if (refit) { | |||||
| [accelEnc refitAccelerationStructure:accel_struct | |||||
| descriptor:accelDesc | |||||
| destination:accel_uncompressed | |||||
| scratchBuffer:scratchBuf | |||||
| scratchBufferOffset:0]; | |||||
| } | |||||
| else { | |||||
| [accelEnc buildAccelerationStructure:accel_uncompressed | |||||
| descriptor:accelDesc | |||||
| scratchBuffer:scratchBuf | |||||
| scratchBufferOffset:0]; | |||||
| } | |||||
| if (use_fast_trace_bvh) { | |||||
| [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed | |||||
| toBuffer:sizeBuf | |||||
| offset:0 | |||||
| sizeDataType:MTLDataTypeULong]; | |||||
| } | |||||
| [accelEnc endEncoding]; | |||||
| [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) { | |||||
| /* free temp resources */ | |||||
| [scratchBuf release]; | |||||
| [aabbBuf release]; | |||||
| if (use_fast_trace_bvh) { | |||||
| /* Compact the accel structure */ | |||||
| uint64_t compressed_size = *(uint64_t *)sizeBuf.contents; | |||||
| dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ | |||||
| id<MTLCommandBuffer> accelCommands = [queue commandBuffer]; | |||||
| id<MTLAccelerationStructureCommandEncoder> accelEnc = | |||||
| [accelCommands accelerationStructureCommandEncoder]; | |||||
| id<MTLAccelerationStructure> accel = [device | |||||
| newAccelerationStructureWithSize:compressed_size]; | |||||
| [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed | |||||
| toAccelerationStructure:accel]; | |||||
| [accelEnc endEncoding]; | |||||
| [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) { | |||||
| uint64_t allocated_size = [accel allocatedSize]; | |||||
| stats.mem_alloc(allocated_size); | |||||
| accel_struct = accel; | |||||
| [accel_uncompressed release]; | |||||
| accel_struct_building = false; | |||||
| }]; | |||||
| [accelCommands commit]; | |||||
| }); | |||||
| } | |||||
| else { | |||||
| /* set our acceleration structure to the uncompressed structure */ | |||||
| accel_struct = accel_uncompressed; | |||||
| uint64_t allocated_size = [accel_struct allocatedSize]; | |||||
| stats.mem_alloc(allocated_size); | |||||
| accel_struct_building = false; | |||||
| } | |||||
| [sizeBuf release]; | |||||
| }]; | |||||
| accel_struct_building = true; | |||||
| [accelCommands commit]; | |||||
| return true; | |||||
| } | |||||
| return false; | |||||
| } | |||||
| bool BVHMetal::build_BLAS(Progress &progress, | |||||
| id<MTLDevice> device, | |||||
| id<MTLCommandQueue> queue, | |||||
| bool refit) | |||||
| { | |||||
| if (@available(macos 12.0, *)) { | |||||
| assert(objects.size() == 1 && geometry.size() == 1); | |||||
| /* Build bottom level acceleration structures (BLAS) */ | |||||
| Geometry *const geom = geometry[0]; | |||||
| switch (geom->geometry_type) { | |||||
| case Geometry::VOLUME: | |||||
| case Geometry::MESH: | |||||
| return build_BLAS_mesh(progress, device, queue, geom, refit); | |||||
| case Geometry::HAIR: | |||||
| return build_BLAS_hair(progress, device, queue, geom, refit); | |||||
| default: | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| bool BVHMetal::build_TLAS(Progress &progress, | |||||
| id<MTLDevice> device, | |||||
| id<MTLCommandQueue> queue, | |||||
| bool refit) | |||||
| { | |||||
| if (@available(macos 12.0, *)) { | |||||
| /* we need to sync here and ensure that all BLAS have completed async generation by both GCD | |||||
| * and Metal */ | |||||
| { | |||||
| __block bool complete_bvh = false; | |||||
| while (!complete_bvh) { | |||||
| dispatch_sync(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ | |||||
| complete_bvh = true; | |||||
| for (Object *ob : objects) { | |||||
| /* Skip non-traceable objects */ | |||||
| if (!ob->is_traceable()) | |||||
| continue; | |||||
| Geometry const *geom = ob->get_geometry(); | |||||
| BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh); | |||||
| if (blas->accel_struct_building) { | |||||
| complete_bvh = false; | |||||
| /* We're likely waiting on a command buffer that's in flight to complete. | |||||
| * Queue up a command buffer and wait for it complete before checking the BLAS again | |||||
| */ | |||||
| id<MTLCommandBuffer> command_buffer = [queue commandBuffer]; | |||||
| [command_buffer commit]; | |||||
| [command_buffer waitUntilCompleted]; | |||||
| break; | |||||
| } | |||||
| } | |||||
| }); | |||||
| } | |||||
| } | |||||
| uint32_t num_instances = 0; | |||||
| uint32_t num_motion_transforms = 0; | |||||
| for (Object *ob : objects) { | |||||
| /* Skip non-traceable objects */ | |||||
| if (!ob->is_traceable()) | |||||
| continue; | |||||
| num_instances++; | |||||
| if (ob->use_motion()) { | |||||
| num_motion_transforms += max(1, ob->get_motion().size()); | |||||
| } | |||||
| else { | |||||
| num_motion_transforms++; | |||||
| } | |||||
| } | |||||
| /*------------------------------------------------*/ | |||||
| BVH_status("Building TLAS | %7d instances", (int)num_instances); | |||||
| /*------------------------------------------------*/ | |||||
| const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC); | |||||
| NSMutableArray *all_blas = [NSMutableArray array]; | |||||
| unordered_map<BVHMetal const *, int> instance_mapping; | |||||
| /* Lambda function to build/retrieve the BLAS index mapping */ | |||||
| auto get_blas_index = [&](BVHMetal const *blas) { | |||||
| auto it = instance_mapping.find(blas); | |||||
| if (it != instance_mapping.end()) { | |||||
| return it->second; | |||||
| } | |||||
| else { | |||||
| int blas_index = (int)[all_blas count]; | |||||
| instance_mapping[blas] = blas_index; | |||||
| if (@available(macos 12.0, *)) { | |||||
| [all_blas addObject:blas->accel_struct]; | |||||
| } | |||||
| return blas_index; | |||||
| } | |||||
| }; | |||||
| MTLResourceOptions storage_mode; | |||||
| if (device.hasUnifiedMemory) { | |||||
| storage_mode = MTLResourceStorageModeShared; | |||||
| } | |||||
| else { | |||||
| storage_mode = MTLResourceStorageModeManaged; | |||||
| } | |||||
| size_t instance_size; | |||||
| if (motion_blur) { | |||||
| instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor); | |||||
| } | |||||
| else { | |||||
| instance_size = sizeof(MTLAccelerationStructureUserIDInstanceDescriptor); | |||||
| } | |||||
| /* Allocate a GPU buffer for the instance data and populate it */ | |||||
| id<MTLBuffer> instanceBuf = [device newBufferWithLength:num_instances * instance_size | |||||
| options:storage_mode]; | |||||
| id<MTLBuffer> motion_transforms_buf = nil; | |||||
| MTLPackedFloat4x3 *motion_transforms = nullptr; | |||||
| if (motion_blur && num_motion_transforms) { | |||||
| motion_transforms_buf = [device | |||||
| newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3) | |||||
| options:storage_mode]; | |||||
| motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents; | |||||
| } | |||||
| uint32_t instance_index = 0; | |||||
| uint32_t motion_transform_index = 0; | |||||
| for (Object *ob : objects) { | |||||
| /* Skip non-traceable objects */ | |||||
| if (!ob->is_traceable()) | |||||
| continue; | |||||
| Geometry const *geom = ob->get_geometry(); | |||||
| BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh); | |||||
| uint32_t accel_struct_index = get_blas_index(blas); | |||||
| /* Add some of the object visibility bits to the mask. | |||||
| * __prim_visibility contains the combined visibility bits of all instances, so is not | |||||
| * reliable if they differ between instances. | |||||
| * | |||||
| * METAL_WIP: OptiX visibility mask can only contain 8 bits, so have to trade-off here | |||||
| * and select just a few important ones. | |||||
| */ | |||||
| uint32_t mask = ob->visibility_for_tracing() & 0xFF; | |||||
| /* Have to have at least one bit in the mask, or else instance would always be culled. */ | |||||
| if (0 == mask) { | |||||
| mask = 0xFF; | |||||
| } | |||||
| /* Set user instance ID to object index */ | |||||
| int object_index = ob->get_device_index(); | |||||
| uint32_t user_id = uint32_t(object_index); | |||||
| /* Bake into the appropriate descriptor */ | |||||
| if (motion_blur) { | |||||
| MTLAccelerationStructureMotionInstanceDescriptor *instances = | |||||
| (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents]; | |||||
| MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++]; | |||||
| desc.accelerationStructureIndex = accel_struct_index; | |||||
| desc.userID = user_id; | |||||
| desc.mask = mask; | |||||
| desc.motionStartTime = 0.0f; | |||||
| desc.motionEndTime = 1.0f; | |||||
| desc.motionTransformsStartIndex = motion_transform_index; | |||||
| desc.motionStartBorderMode = MTLMotionBorderModeVanish; | |||||
| desc.motionEndBorderMode = MTLMotionBorderModeVanish; | |||||
| desc.intersectionFunctionTableOffset = 0; | |||||
| int key_count = ob->get_motion().size(); | |||||
| if (key_count) { | |||||
| desc.motionTransformsCount = key_count; | |||||
| Transform *keys = ob->get_motion().data(); | |||||
| for (int i = 0; i < key_count; i++) { | |||||
| float *t = (float *)&motion_transforms[motion_transform_index++]; | |||||
| /* Transpose transform */ | |||||
| auto src = (float const *)&keys[i]; | |||||
| for (int i = 0; i < 12; i++) { | |||||
| t[i] = src[(i / 3) + 4 * (i % 3)]; | |||||
| } | |||||
| } | |||||
| } | |||||
| else { | |||||
| desc.motionTransformsCount = 1; | |||||
| float *t = (float *)&motion_transforms[motion_transform_index++]; | |||||
| if (ob->get_geometry()->is_instanced()) { | |||||
| /* Transpose transform */ | |||||
| auto src = (float const *)&ob->get_tfm(); | |||||
| for (int i = 0; i < 12; i++) { | |||||
| t[i] = src[(i / 3) + 4 * (i % 3)]; | |||||
| } | |||||
| } | |||||
| else { | |||||
| /* Clear transform to identity matrix */ | |||||
| t[0] = t[4] = t[8] = 1.0f; | |||||
| } | |||||
| } | |||||
| } | |||||
| else { | |||||
| MTLAccelerationStructureUserIDInstanceDescriptor *instances = | |||||
| (MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents]; | |||||
| MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++]; | |||||
| desc.accelerationStructureIndex = accel_struct_index; | |||||
| desc.userID = user_id; | |||||
| desc.mask = mask; | |||||
| desc.intersectionFunctionTableOffset = 0; | |||||
| float *t = (float *)&desc.transformationMatrix; | |||||
| if (ob->get_geometry()->is_instanced()) { | |||||
| /* Transpose transform */ | |||||
| auto src = (float const *)&ob->get_tfm(); | |||||
| for (int i = 0; i < 12; i++) { | |||||
| t[i] = src[(i / 3) + 4 * (i % 3)]; | |||||
| } | |||||
| } | |||||
| else { | |||||
| /* Clear transform to identity matrix */ | |||||
| t[0] = t[4] = t[8] = 1.0f; | |||||
| } | |||||
| } | |||||
| } | |||||
| if (storage_mode == MTLResourceStorageModeManaged) { | |||||
| [instanceBuf didModifyRange:NSMakeRange(0, instanceBuf.length)]; | |||||
| if (motion_transforms_buf) { | |||||
| [motion_transforms_buf didModifyRange:NSMakeRange(0, motion_transforms_buf.length)]; | |||||
| assert(num_motion_transforms == motion_transform_index); | |||||
| } | |||||
| } | |||||
| MTLInstanceAccelerationStructureDescriptor *accelDesc = | |||||
| [MTLInstanceAccelerationStructureDescriptor descriptor]; | |||||
| accelDesc.instanceCount = num_instances; | |||||
| accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID; | |||||
| accelDesc.instanceDescriptorBuffer = instanceBuf; | |||||
| accelDesc.instanceDescriptorBufferOffset = 0; | |||||
| accelDesc.instanceDescriptorStride = instance_size; | |||||
| accelDesc.instancedAccelerationStructures = all_blas; | |||||
| if (motion_blur) { | |||||
| accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion; | |||||
| accelDesc.motionTransformBuffer = motion_transforms_buf; | |||||
| accelDesc.motionTransformCount = num_motion_transforms; | |||||
| } | |||||
| if (!use_fast_trace_bvh) { | |||||
| accelDesc.usage |= (MTLAccelerationStructureUsageRefit | | |||||
| MTLAccelerationStructureUsagePreferFastBuild); | |||||
| } | |||||
| MTLAccelerationStructureSizes accelSizes = [device | |||||
| accelerationStructureSizesWithDescriptor:accelDesc]; | |||||
| id<MTLAccelerationStructure> accel = [device | |||||
| newAccelerationStructureWithSize:accelSizes.accelerationStructureSize]; | |||||
| id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize | |||||
| options:MTLResourceStorageModePrivate]; | |||||
| id<MTLCommandBuffer> accelCommands = [queue commandBuffer]; | |||||
| id<MTLAccelerationStructureCommandEncoder> accelEnc = | |||||
| [accelCommands accelerationStructureCommandEncoder]; | |||||
| if (refit) { | |||||
| [accelEnc refitAccelerationStructure:accel_struct | |||||
| descriptor:accelDesc | |||||
| destination:accel | |||||
| scratchBuffer:scratchBuf | |||||
| scratchBufferOffset:0]; | |||||
| } | |||||
| else { | |||||
| [accelEnc buildAccelerationStructure:accel | |||||
| descriptor:accelDesc | |||||
| scratchBuffer:scratchBuf | |||||
| scratchBufferOffset:0]; | |||||
| } | |||||
| [accelEnc endEncoding]; | |||||
| [accelCommands commit]; | |||||
| [accelCommands waitUntilCompleted]; | |||||
| if (motion_transforms_buf) { | |||||
| [motion_transforms_buf release]; | |||||
| } | |||||
| [instanceBuf release]; | |||||
| [scratchBuf release]; | |||||
| uint64_t allocated_size = [accel allocatedSize]; | |||||
| stats.mem_alloc(allocated_size); | |||||
| /* Cache top and bottom-level acceleration structs */ | |||||
| accel_struct = accel; | |||||
| blas_array.clear(); | |||||
| blas_array.reserve(all_blas.count); | |||||
| for (id<MTLAccelerationStructure> blas in all_blas) { | |||||
| blas_array.push_back(blas); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| return false; | |||||
| } | |||||
| bool BVHMetal::build(Progress &progress, | |||||
| id<MTLDevice> device, | |||||
| id<MTLCommandQueue> queue, | |||||
| bool refit) | |||||
| { | |||||
| if (@available(macos 12.0, *)) { | |||||
| if (refit && params.bvh_type != BVH_TYPE_STATIC) { | |||||
| assert(accel_struct); | |||||
| } | |||||
| else { | |||||
| if (accel_struct) { | |||||
| stats.mem_free(accel_struct.allocatedSize); | |||||
| [accel_struct release]; | |||||
| accel_struct = nil; | |||||
| } | |||||
| } | |||||
| } | |||||
| if (!params.top_level) { | |||||
| return build_BLAS(progress, device, queue, refit); | |||||
| } | |||||
| else { | |||||
| return build_TLAS(progress, device, queue, refit); | |||||
| } | |||||
| } | |||||
| CCL_NAMESPACE_END | |||||
| #endif /* WITH_METAL */ | |||||