Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/optix/device_impl.cpp
| Show All 40 Lines | |||||
| # define __KERNEL_OPTIX__ | # define __KERNEL_OPTIX__ | ||||
| # include "kernel/device/optix/globals.h" | # include "kernel/device/optix/globals.h" | ||||
| # include <optix_denoiser_tiling.h> | # include <optix_denoiser_tiling.h> | ||||
| CCL_NAMESPACE_BEGIN | CCL_NAMESPACE_BEGIN | ||||
| OptiXDevice::Denoiser::Denoiser(OptiXDevice *device) | OptiXDevice::Denoiser::Denoiser(OptiXDevice *device) | ||||
| : device(device), queue(device), state(device, "__denoiser_state") | : device(device), queue(device), state(device, "__denoiser_state", true) | ||||
| { | { | ||||
| } | } | ||||
| OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler) | OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler) | ||||
| : CUDADevice(info, stats, profiler), | : CUDADevice(info, stats, profiler), | ||||
| sbt_data(this, "__sbt", MEM_READ_ONLY), | sbt_data(this, "__sbt", MEM_READ_ONLY), | ||||
| launch_params(this, "__params"), | launch_params(this, "__params", false), | ||||
| denoiser_(this) | denoiser_(this) | ||||
| { | { | ||||
| /* Make the CUDA context current. */ | /* Make the CUDA context current. */ | ||||
| if (!cuContext) { | if (!cuContext) { | ||||
| /* Do not initialize if CUDA context creation failed already. */ | /* Do not initialize if CUDA context creation failed already. */ | ||||
| return; | return; | ||||
| } | } | ||||
| const CUDAContextScope scope(this); | const CUDAContextScope scope(this); | ||||
| ▲ Show 20 Lines • Show All 453 Lines • ▼ Show 20 Lines | |||||
| */ | */ | ||||
| class OptiXDevice::DenoiseContext { | class OptiXDevice::DenoiseContext { | ||||
| public: | public: | ||||
| explicit DenoiseContext(OptiXDevice *device, const DeviceDenoiseTask &task) | explicit DenoiseContext(OptiXDevice *device, const DeviceDenoiseTask &task) | ||||
| : denoise_params(task.params), | : denoise_params(task.params), | ||||
| render_buffers(task.render_buffers), | render_buffers(task.render_buffers), | ||||
| buffer_params(task.buffer_params), | buffer_params(task.buffer_params), | ||||
| guiding_buffer(device, "denoiser guiding passes buffer"), | guiding_buffer(device, "denoiser guiding passes buffer", true), | ||||
| num_samples(task.num_samples) | num_samples(task.num_samples) | ||||
| { | { | ||||
| num_input_passes = 1; | num_input_passes = 1; | ||||
| if (denoise_params.use_pass_albedo) { | if (denoise_params.use_pass_albedo) { | ||||
| num_input_passes += 1; | num_input_passes += 1; | ||||
| use_pass_albedo = true; | use_pass_albedo = true; | ||||
| pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO); | pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO); | ||||
| if (denoise_params.use_pass_normal) { | if (denoise_params.use_pass_normal) { | ||||
| ▲ Show 20 Lines • Show All 475 Lines • ▼ Show 20 Lines | bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass) | ||||
| return true; | return true; | ||||
| } | } | ||||
| bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, | bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, | ||||
| OptixBuildOperation operation, | OptixBuildOperation operation, | ||||
| const OptixBuildInput &build_input, | const OptixBuildInput &build_input, | ||||
| uint16_t num_motion_steps) | uint16_t num_motion_steps) | ||||
| { | { | ||||
| /* Allocate and build acceleration structures only one at a time, to prevent parallel builds | |||||
| * from running out of memory (since both original and compacted acceleration structure memory | |||||
| * may be allocated at the same time for the duration of this function). The builds would | |||||
| * otherwise happen on the same CUDA stream anyway. */ | |||||
| static thread_mutex mutex; | |||||
| thread_scoped_lock lock(mutex); | |||||
| const CUDAContextScope scope(this); | const CUDAContextScope scope(this); | ||||
| const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC); | const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC); | ||||
| /* Compute memory usage. */ | /* Compute memory usage. */ | ||||
| OptixAccelBufferSizes sizes = {}; | OptixAccelBufferSizes sizes = {}; | ||||
| OptixAccelBuildOptions options = {}; | OptixAccelBuildOptions options = {}; | ||||
| options.operation = operation; | options.operation = operation; | ||||
| Show All 9 Lines | bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, | ||||
| options.motionOptions.numKeys = num_motion_steps; | options.motionOptions.numKeys = num_motion_steps; | ||||
| options.motionOptions.flags = OPTIX_MOTION_FLAG_START_VANISH | OPTIX_MOTION_FLAG_END_VANISH; | options.motionOptions.flags = OPTIX_MOTION_FLAG_START_VANISH | OPTIX_MOTION_FLAG_END_VANISH; | ||||
| options.motionOptions.timeBegin = 0.0f; | options.motionOptions.timeBegin = 0.0f; | ||||
| options.motionOptions.timeEnd = 1.0f; | options.motionOptions.timeEnd = 1.0f; | ||||
| optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes)); | optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes)); | ||||
| /* Allocate required output buffers. */ | /* Allocate required output buffers. */ | ||||
| device_only_memory<char> temp_mem(this, "optix temp as build mem"); | device_only_memory<char> temp_mem(this, "optix temp as build mem", true); | ||||
| temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8); | temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8); | ||||
| if (!temp_mem.device_pointer) { | if (!temp_mem.device_pointer) { | ||||
| /* Make sure temporary memory allocation succeeded. */ | /* Make sure temporary memory allocation succeeded. */ | ||||
| return false; | return false; | ||||
| } | } | ||||
| /* Acceleration structure memory has to be allocated on the device (not allowed on the host). */ | |||||
| device_only_memory<char> &out_data = *bvh->as_data; | device_only_memory<char> &out_data = *bvh->as_data; | ||||
| if (operation == OPTIX_BUILD_OPERATION_BUILD) { | if (operation == OPTIX_BUILD_OPERATION_BUILD) { | ||||
| assert(out_data.device == this); | assert(out_data.device == this); | ||||
| out_data.alloc_to_device(sizes.outputSizeInBytes); | out_data.alloc_to_device(sizes.outputSizeInBytes); | ||||
| if (!out_data.device_pointer) { | if (!out_data.device_pointer) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| } | } | ||||
| Show All 32 Lines | if (use_fast_trace_bvh) { | ||||
| uint64_t compacted_size = sizes.outputSizeInBytes; | uint64_t compacted_size = sizes.outputSizeInBytes; | ||||
| cuda_assert(cuMemcpyDtoH(&compacted_size, compacted_size_prop.result, sizeof(compacted_size))); | cuda_assert(cuMemcpyDtoH(&compacted_size, compacted_size_prop.result, sizeof(compacted_size))); | ||||
| /* Temporary memory is no longer needed, so free it now to make space. */ | /* Temporary memory is no longer needed, so free it now to make space. */ | ||||
| temp_mem.free(); | temp_mem.free(); | ||||
| /* There is no point compacting if the size does not change. */ | /* There is no point compacting if the size does not change. */ | ||||
| if (compacted_size < sizes.outputSizeInBytes) { | if (compacted_size < sizes.outputSizeInBytes) { | ||||
| device_only_memory<char> compacted_data(this, "optix compacted as"); | device_only_memory<char> compacted_data(this, "optix compacted as", false); | ||||
| compacted_data.alloc_to_device(compacted_size); | compacted_data.alloc_to_device(compacted_size); | ||||
| if (!compacted_data.device_pointer) | if (!compacted_data.device_pointer) { | ||||
| /* Do not compact if memory allocation for compacted acceleration structure fails. | /* Do not compact if memory allocation for compacted acceleration structure fails. | ||||
| * Can just use the uncompacted one then, so succeed here regardless. */ | * Can just use the uncompacted one then, so succeed here regardless. */ | ||||
| return !have_error(); | return !have_error(); | ||||
| } | |||||
| optix_assert(optixAccelCompact( | optix_assert(optixAccelCompact( | ||||
| context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle)); | context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle)); | ||||
| bvh->traversable_handle = static_cast<uint64_t>(out_handle); | bvh->traversable_handle = static_cast<uint64_t>(out_handle); | ||||
| /* Wait for compaction to finish. */ | /* Wait for compaction to finish. */ | ||||
| cuda_assert(cuStreamSynchronize(NULL)); | cuda_assert(cuStreamSynchronize(NULL)); | ||||
| std::swap(out_data.device_size, compacted_data.device_size); | std::swap(out_data.device_size, compacted_data.device_size); | ||||
| std::swap(out_data.device_pointer, compacted_data.device_pointer); | std::swap(out_data.device_pointer, compacted_data.device_pointer); | ||||
| /* Original acceleration structure memory is freed when 'compacted_data' goes out of scope. | |||||
| */ | |||||
| } | } | ||||
| } | } | ||||
| return !have_error(); | return !have_error(); | ||||
| } | } | ||||
| void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) | void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) | ||||
| { | { | ||||
| ▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines | # if OPTIX_ABI_VERSION >= 55 | ||||
| v[3] = make_float4(keys[kb].x, keys[kb].y, keys[kb].z, curve_radius[kb]); | v[3] = make_float4(keys[kb].x, keys[kb].y, keys[kb].z, curve_radius[kb]); | ||||
| # else | # else | ||||
| const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x); | const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x); | ||||
| const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y); | const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y); | ||||
| const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z); | const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z); | ||||
| const float4 pw = make_float4( | const float4 pw = make_float4( | ||||
| curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]); | curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]); | ||||
| /* Convert Catmull-Rom data to Bezier spline. */ | /* Convert Catmull-Rom data to B-spline. */ | ||||
| static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f; | static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f; | ||||
| static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f; | static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f; | ||||
| static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f; | static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f; | ||||
| static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f; | static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f; | ||||
| v[0] = make_float4( | v[0] = make_float4( | ||||
| dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw)); | dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw)); | ||||
| v[1] = make_float4( | v[1] = make_float4( | ||||
| ▲ Show 20 Lines • Show All 395 Lines • Show Last 20 Lines | |||||