Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/optix/queue.cpp
| Show All 18 Lines | |||||
| { | { | ||||
| } | } | ||||
| void OptiXDeviceQueue::init_execution() | void OptiXDeviceQueue::init_execution() | ||||
| { | { | ||||
| CUDADeviceQueue::init_execution(); | CUDADeviceQueue::init_execution(); | ||||
| } | } | ||||
| static bool is_optix_specific_kernel(DeviceKernel kernel) | static bool is_optix_specific_kernel(DeviceKernel kernel, bool use_osl) | ||||
| { | { | ||||
| return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || | # ifdef WITH_OSL | ||||
| kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE || | /* OSL uses direct callables to execute, so shading needs to be done in OptiX if OSL is used. */ | ||||
| kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || | if (use_osl && device_kernel_has_shading(kernel)) { | ||||
brecht: We should add utility functions like `device_kernel_has_shading` and… | |||||
| kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW || | return true; | ||||
| kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE || | } | ||||
| kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK); | # else | ||||
| (void)use_osl; | |||||
| # endif | |||||
| return device_kernel_has_intersection(kernel); | |||||
| } | } | ||||
| bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, | bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, | ||||
| const int work_size, | const int work_size, | ||||
| DeviceKernelArguments const &args) | DeviceKernelArguments const &args) | ||||
| { | { | ||||
| if (!is_optix_specific_kernel(kernel)) { | OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_); | ||||
| # ifdef WITH_OSL | |||||
| const bool use_osl = static_cast<OSLGlobals *>(optix_device->get_cpu_osl_memory())->use; | |||||
| # else | |||||
| const bool use_osl = false; | |||||
| # endif | |||||
| if (!is_optix_specific_kernel(kernel, use_osl)) { | |||||
| return CUDADeviceQueue::enqueue(kernel, work_size, args); | return CUDADeviceQueue::enqueue(kernel, work_size, args); | ||||
| } | } | ||||
| if (cuda_device_->have_error()) { | if (cuda_device_->have_error()) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| debug_enqueue_begin(kernel, work_size); | debug_enqueue_begin(kernel, work_size); | ||||
| const CUDAContextScope scope(cuda_device_); | const CUDAContextScope scope(cuda_device_); | ||||
| OptiXDevice *const optix_device = static_cast<OptiXDevice *>(cuda_device_); | |||||
| const device_ptr sbt_data_ptr = optix_device->sbt_data.device_pointer; | const device_ptr sbt_data_ptr = optix_device->sbt_data.device_pointer; | ||||
| const device_ptr launch_params_ptr = optix_device->launch_params.device_pointer; | const device_ptr launch_params_ptr = optix_device->launch_params.device_pointer; | ||||
| cuda_device_assert( | cuda_device_assert( | ||||
| cuda_device_, | cuda_device_, | ||||
| cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, path_index_array), | cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, path_index_array), | ||||
| args.values[0], // &d_path_index | args.values[0], // &d_path_index | ||||
| sizeof(device_ptr), | sizeof(device_ptr), | ||||
| cuda_stream_)); | cuda_stream_)); | ||||
| if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || | if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || device_kernel_has_shading(kernel)) { | ||||
| kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || | |||||
| kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) { | |||||
| cuda_device_assert( | cuda_device_assert( | ||||
| cuda_device_, | cuda_device_, | ||||
| cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer), | cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer), | ||||
| args.values[1], // &d_render_buffer | args.values[1], // &d_render_buffer | ||||
| sizeof(device_ptr), | sizeof(device_ptr), | ||||
| cuda_stream_)); | cuda_stream_)); | ||||
| } | } | ||||
| if (kernel == DEVICE_KERNEL_SHADER_EVAL_DISPLACE || | |||||
| kernel == DEVICE_KERNEL_SHADER_EVAL_BACKGROUND || | |||||
| kernel == DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY) { | |||||
| cuda_device_assert(cuda_device_, | |||||
| cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, offset), | |||||
| args.values[2], // &d_offset | |||||
| sizeof(int32_t), | |||||
| cuda_stream_)); | |||||
| } | |||||
| cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_)); | cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_)); | ||||
| OptixPipeline pipeline = nullptr; | OptixPipeline pipeline = nullptr; | ||||
| OptixShaderBindingTable sbt_params = {}; | OptixShaderBindingTable sbt_params = {}; | ||||
| switch (kernel) { | switch (kernel) { | ||||
| case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND: | |||||
| pipeline = optix_device->pipelines[PIP_SHADE]; | |||||
| sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_BACKGROUND * sizeof(SbtRecord); | |||||
| break; | |||||
| case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT: | |||||
| pipeline = optix_device->pipelines[PIP_SHADE]; | |||||
| sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_LIGHT * sizeof(SbtRecord); | |||||
| break; | |||||
| case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE: | |||||
| pipeline = optix_device->pipelines[PIP_SHADE]; | |||||
| sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE * sizeof(SbtRecord); | |||||
| break; | |||||
| case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: | case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: | ||||
| pipeline = optix_device->pipelines[PIP_SHADE_RAYTRACE]; | pipeline = optix_device->pipelines[PIP_SHADE]; | ||||
| sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord); | sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord); | ||||
| break; | break; | ||||
| case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: | case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: | ||||
| pipeline = optix_device->pipelines[PIP_SHADE_MNEE]; | pipeline = optix_device->pipelines[PIP_SHADE]; | ||||
| sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord); | sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord); | ||||
| break; | break; | ||||
| case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: | |||||
| pipeline = optix_device->pipelines[PIP_SHADE]; | |||||
| sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_VOLUME * sizeof(SbtRecord); | |||||
| break; | |||||
| case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW: | |||||
| pipeline = optix_device->pipelines[PIP_SHADE]; | |||||
| sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SHADOW * sizeof(SbtRecord); | |||||
| break; | |||||
| case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: | case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: | ||||
| pipeline = optix_device->pipelines[PIP_INTERSECT]; | pipeline = optix_device->pipelines[PIP_INTERSECT]; | ||||
| sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord); | sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord); | ||||
| break; | break; | ||||
| case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW: | case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW: | ||||
| pipeline = optix_device->pipelines[PIP_INTERSECT]; | pipeline = optix_device->pipelines[PIP_INTERSECT]; | ||||
| sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_SHADOW * sizeof(SbtRecord); | sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_SHADOW * sizeof(SbtRecord); | ||||
| break; | break; | ||||
| case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE: | case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE: | ||||
| pipeline = optix_device->pipelines[PIP_INTERSECT]; | pipeline = optix_device->pipelines[PIP_INTERSECT]; | ||||
| sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_SUBSURFACE * sizeof(SbtRecord); | sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_SUBSURFACE * sizeof(SbtRecord); | ||||
| break; | break; | ||||
| case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: | case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: | ||||
| pipeline = optix_device->pipelines[PIP_INTERSECT]; | pipeline = optix_device->pipelines[PIP_INTERSECT]; | ||||
| sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_VOLUME_STACK * sizeof(SbtRecord); | sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_VOLUME_STACK * sizeof(SbtRecord); | ||||
| break; | break; | ||||
| case DEVICE_KERNEL_SHADER_EVAL_DISPLACE: | |||||
| pipeline = optix_device->pipelines[PIP_SHADE]; | |||||
| sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_DISPLACE * sizeof(SbtRecord); | |||||
| break; | |||||
| case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND: | |||||
| pipeline = optix_device->pipelines[PIP_SHADE]; | |||||
| sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_EVAL_BACKGROUND * sizeof(SbtRecord); | |||||
| break; | |||||
| case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY: | |||||
| pipeline = optix_device->pipelines[PIP_SHADE]; | |||||
| sbt_params.raygenRecord = sbt_data_ptr + | |||||
| PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY * sizeof(SbtRecord); | |||||
| break; | |||||
| default: | default: | ||||
| LOG(ERROR) << "Invalid kernel " << device_kernel_as_string(kernel) | LOG(ERROR) << "Invalid kernel " << device_kernel_as_string(kernel) | ||||
| << " is attempted to be enqueued."; | << " is attempted to be enqueued."; | ||||
| return false; | return false; | ||||
| } | } | ||||
| sbt_params.missRecordBase = sbt_data_ptr + MISS_PROGRAM_GROUP_OFFSET * sizeof(SbtRecord); | sbt_params.missRecordBase = sbt_data_ptr + MISS_PROGRAM_GROUP_OFFSET * sizeof(SbtRecord); | ||||
| sbt_params.missRecordStrideInBytes = sizeof(SbtRecord); | sbt_params.missRecordStrideInBytes = sizeof(SbtRecord); | ||||
| sbt_params.missRecordCount = NUM_MIS_PROGRAM_GROUPS; | sbt_params.missRecordCount = NUM_MISS_PROGRAM_GROUPS; | ||||
| sbt_params.hitgroupRecordBase = sbt_data_ptr + HIT_PROGAM_GROUP_OFFSET * sizeof(SbtRecord); | sbt_params.hitgroupRecordBase = sbt_data_ptr + HIT_PROGAM_GROUP_OFFSET * sizeof(SbtRecord); | ||||
| sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord); | sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord); | ||||
| sbt_params.hitgroupRecordCount = NUM_HIT_PROGRAM_GROUPS; | sbt_params.hitgroupRecordCount = NUM_HIT_PROGRAM_GROUPS; | ||||
| sbt_params.callablesRecordBase = sbt_data_ptr + CALLABLE_PROGRAM_GROUPS_BASE * sizeof(SbtRecord); | sbt_params.callablesRecordBase = sbt_data_ptr + CALLABLE_PROGRAM_GROUPS_BASE * sizeof(SbtRecord); | ||||
| sbt_params.callablesRecordCount = NUM_CALLABLE_PROGRAM_GROUPS; | sbt_params.callablesRecordCount = NUM_CALLABLE_PROGRAM_GROUPS; | ||||
| sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord); | sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord); | ||||
| # ifdef WITH_OSL | |||||
| if (use_osl) { | |||||
| sbt_params.callablesRecordCount += static_cast<unsigned int>(optix_device->osl_groups.size()); | |||||
| } | |||||
| # endif | |||||
| /* Launch the ray generation program. */ | /* Launch the ray generation program. */ | ||||
| optix_device_assert(optix_device, | optix_device_assert(optix_device, | ||||
| optixLaunch(pipeline, | optixLaunch(pipeline, | ||||
| cuda_stream_, | cuda_stream_, | ||||
| launch_params_ptr, | launch_params_ptr, | ||||
| optix_device->launch_params.data_elements, | optix_device->launch_params.data_elements, | ||||
| &sbt_params, | &sbt_params, | ||||
| work_size, | work_size, | ||||
| Show All 11 Lines | |||||
We should add utility functions like device_kernel_has_shading and device_kernel_has_intersection in device/kernel.h to deduplicate these kinds of checks.
Ideally things like the construction of group_descs, PG_RGEN indexes, could be done without every device backend hardcoding all the kernel names, but that's probably beyond the scope of this patch.