Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/device_optix.cpp
| Show First 20 Lines • Show All 125 Lines • ▼ Show 20 Lines | |||||
| class OptiXDevice : public CUDADevice { | class OptiXDevice : public CUDADevice { | ||||
| // List of OptiX program groups | // List of OptiX program groups | ||||
| enum { | enum { | ||||
| PG_RGEN, | PG_RGEN, | ||||
| PG_MISS, | PG_MISS, | ||||
| PG_HITD, // Default hit group | PG_HITD, // Default hit group | ||||
| PG_HITL, // __BVH_LOCAL__ hit group | |||||
| PG_HITS, // __SHADOW_RECORD_ALL__ hit group | PG_HITS, // __SHADOW_RECORD_ALL__ hit group | ||||
| PG_HITL, // __BVH_LOCAL__ hit group (only used for triangles) | |||||
| # if OPTIX_ABI_VERSION >= 36 | |||||
| PG_HITD_MOTION, | |||||
| PG_HITS_MOTION, | |||||
| # endif | |||||
| # ifdef WITH_CYCLES_DEBUG | # ifdef WITH_CYCLES_DEBUG | ||||
| PG_EXCP, | PG_EXCP, | ||||
| # endif | # endif | ||||
| PG_BAKE, // kernel_bake_evaluate | PG_BAKE, // kernel_bake_evaluate | ||||
| PG_DISP, // kernel_displace_evaluate | PG_DISP, // kernel_displace_evaluate | ||||
| PG_BACK, // kernel_background_evaluate | PG_BACK, // kernel_background_evaluate | ||||
| NUM_PROGRAM_GROUPS | NUM_PROGRAM_GROUPS | ||||
| }; | }; | ||||
| Show All 28 Lines | # endif | ||||
| // Use a pool with multiple threads to support launches with multiple CUDA streams | // Use a pool with multiple threads to support launches with multiple CUDA streams | ||||
| TaskPool task_pool; | TaskPool task_pool; | ||||
| vector<CUstream> cuda_stream; | vector<CUstream> cuda_stream; | ||||
| OptixDeviceContext context = NULL; | OptixDeviceContext context = NULL; | ||||
| OptixModule optix_module = NULL; // All necessary OptiX kernels are in one module | OptixModule optix_module = NULL; // All necessary OptiX kernels are in one module | ||||
| OptixModule builtin_modules[2] = {}; | |||||
| OptixPipeline pipelines[NUM_PIPELINES] = {}; | OptixPipeline pipelines[NUM_PIPELINES] = {}; | ||||
| bool motion_blur = false; | bool motion_blur = false; | ||||
| device_vector<SbtRecord> sbt_data; | device_vector<SbtRecord> sbt_data; | ||||
| device_only_memory<KernelParams> launch_params; | device_only_memory<KernelParams> launch_params; | ||||
| vector<CUdeviceptr> as_mem; | vector<CUdeviceptr> as_mem; | ||||
| OptixTraversableHandle tlas_handle = 0; | OptixTraversableHandle tlas_handle = 0; | ||||
| ▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines | ~OptiXDevice() | ||||
| sbt_data.free(); | sbt_data.free(); | ||||
| texture_info.free(); | texture_info.free(); | ||||
| launch_params.free(); | launch_params.free(); | ||||
| denoiser_state.free(); | denoiser_state.free(); | ||||
| // Unload modules | // Unload modules | ||||
| if (optix_module != NULL) | if (optix_module != NULL) | ||||
| optixModuleDestroy(optix_module); | optixModuleDestroy(optix_module); | ||||
| for (unsigned int i = 0; i < 2; ++i) | |||||
| if (builtin_modules[i] != NULL) | |||||
| optixModuleDestroy(builtin_modules[i]); | |||||
| for (unsigned int i = 0; i < NUM_PIPELINES; ++i) | for (unsigned int i = 0; i < NUM_PIPELINES; ++i) | ||||
| if (pipelines[i] != NULL) | if (pipelines[i] != NULL) | ||||
| optixPipelineDestroy(pipelines[i]); | optixPipelineDestroy(pipelines[i]); | ||||
| // Destroy launch streams | // Destroy launch streams | ||||
| for (CUstream stream : cuda_stream) | for (CUstream stream : cuda_stream) | ||||
| cuStreamDestroy(stream); | cuStreamDestroy(stream); | ||||
| ▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | bool load_kernels(const DeviceRequestedFeatures &requested_features) override | ||||
| const CUDAContextScope scope(cuContext); | const CUDAContextScope scope(cuContext); | ||||
| // Unload existing OptiX module and pipelines first | // Unload existing OptiX module and pipelines first | ||||
| if (optix_module != NULL) { | if (optix_module != NULL) { | ||||
| optixModuleDestroy(optix_module); | optixModuleDestroy(optix_module); | ||||
| optix_module = NULL; | optix_module = NULL; | ||||
| } | } | ||||
| for (unsigned int i = 0; i < 2; ++i) { | |||||
| if (builtin_modules[i] != NULL) { | |||||
| optixModuleDestroy(builtin_modules[i]); | |||||
| builtin_modules[i] = NULL; | |||||
| } | |||||
| } | |||||
| for (unsigned int i = 0; i < NUM_PIPELINES; ++i) { | for (unsigned int i = 0; i < NUM_PIPELINES; ++i) { | ||||
| if (pipelines[i] != NULL) { | if (pipelines[i] != NULL) { | ||||
| optixPipelineDestroy(pipelines[i]); | optixPipelineDestroy(pipelines[i]); | ||||
| pipelines[i] = NULL; | pipelines[i] = NULL; | ||||
| } | } | ||||
| } | } | ||||
| OptixModuleCompileOptions module_options; | OptixModuleCompileOptions module_options; | ||||
| Show All 15 Lines | |||||
| # ifdef WITH_CYCLES_DEBUG | # ifdef WITH_CYCLES_DEBUG | ||||
| pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW | | pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW | | ||||
| OPTIX_EXCEPTION_FLAG_TRACE_DEPTH; | OPTIX_EXCEPTION_FLAG_TRACE_DEPTH; | ||||
| # else | # else | ||||
| pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE; | pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE; | ||||
| # endif | # endif | ||||
| pipeline_options.pipelineLaunchParamsVariableName = "__params"; // See kernel_globals.h | pipeline_options.pipelineLaunchParamsVariableName = "__params"; // See kernel_globals.h | ||||
| # if OPTIX_ABI_VERSION >= 36 | |||||
| pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE; | |||||
| if (requested_features.use_hair) { | |||||
| if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) { | |||||
| pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE; | |||||
| } | |||||
| else { | |||||
| pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM; | |||||
| } | |||||
| } | |||||
| # endif | |||||
| // Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds | // Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds | ||||
| // This is necessary since objects may be reported to have motion if the Vector pass is | // This is necessary since objects may be reported to have motion if the Vector pass is | ||||
| // active, but may still need to be rendered without motion blur if that isn't active as well | // active, but may still need to be rendered without motion blur if that isn't active as well | ||||
| motion_blur = requested_features.use_object_motion; | motion_blur = requested_features.use_object_motion; | ||||
| if (motion_blur) { | if (motion_blur) { | ||||
| pipeline_options.usesMotionBlur = true; | pipeline_options.usesMotionBlur = true; | ||||
| // Motion blur can insert motion transforms into the traversal graph | // Motion blur can insert motion transforms into the traversal graph | ||||
| ▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines | if (requested_features.use_hair) { | ||||
| // Ideally this should not be needed. | // Ideally this should not be needed. | ||||
| group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_all"; | group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_all"; | ||||
| group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_all"; | group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_all"; | ||||
| } | } | ||||
| else { | else { | ||||
| group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon"; | group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon"; | ||||
| group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon"; | group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon"; | ||||
| } | } | ||||
| # if OPTIX_ABI_VERSION >= 36 | |||||
| if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) { | |||||
| OptixBuiltinISOptions builtin_options; | |||||
| builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE; | |||||
| builtin_options.usesMotionBlur = false; | |||||
| check_result_optix_ret(optixBuiltinISModuleGet( | |||||
| context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[0])); | |||||
| group_descs[PG_HITD].hitgroup.moduleIS = builtin_modules[0]; | |||||
| group_descs[PG_HITD].hitgroup.entryFunctionNameIS = nullptr; | |||||
| group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0]; | |||||
| group_descs[PG_HITS].hitgroup.entryFunctionNameIS = nullptr; | |||||
| if (motion_blur) { | |||||
| builtin_options.usesMotionBlur = true; | |||||
| check_result_optix_ret(optixBuiltinISModuleGet( | |||||
| context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[1])); | |||||
| group_descs[PG_HITD_MOTION] = group_descs[PG_HITD]; | |||||
| group_descs[PG_HITD_MOTION].hitgroup.moduleIS = builtin_modules[1]; | |||||
| group_descs[PG_HITS_MOTION] = group_descs[PG_HITS]; | |||||
| group_descs[PG_HITS_MOTION].hitgroup.moduleIS = builtin_modules[1]; | |||||
| } | |||||
| } | |||||
| # endif | |||||
| } | } | ||||
| if (requested_features.use_subsurface || requested_features.use_shader_raytrace) { | if (requested_features.use_subsurface || requested_features.use_shader_raytrace) { | ||||
| // Add hit group for local intersections | // Add hit group for local intersections | ||||
| group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; | group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; | ||||
| group_descs[PG_HITL].hitgroup.moduleAH = optix_module; | group_descs[PG_HITL].hitgroup.moduleAH = optix_module; | ||||
| group_descs[PG_HITL].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_local_hit"; | group_descs[PG_HITL].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_local_hit"; | ||||
| } | } | ||||
| Show All 35 Lines | for (unsigned int i = 0; i < NUM_PROGRAM_GROUPS; ++i) { | ||||
| check_result_optix_ret(optixProgramGroupGetStackSize(groups[i], &stack_size[i])); | check_result_optix_ret(optixProgramGroupGetStackSize(groups[i], &stack_size[i])); | ||||
| } | } | ||||
| sbt_data.copy_to_device(); // Upload SBT to device | sbt_data.copy_to_device(); // Upload SBT to device | ||||
| // Calculate maximum trace continuation stack size | // Calculate maximum trace continuation stack size | ||||
| unsigned int trace_css = stack_size[PG_HITD].cssCH; | unsigned int trace_css = stack_size[PG_HITD].cssCH; | ||||
| // This is based on the maximum of closest-hit and any-hit/intersection programs | // This is based on the maximum of closest-hit and any-hit/intersection programs | ||||
| trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH); | trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH); | ||||
| trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH); | |||||
| trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH); | trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH); | ||||
| trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH); | |||||
| # if OPTIX_ABI_VERSION >= 36 | |||||
| trace_css = std::max(trace_css, | |||||
| stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH); | |||||
| trace_css = std::max(trace_css, | |||||
| stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH); | |||||
| # endif | |||||
| OptixPipelineLinkOptions link_options; | OptixPipelineLinkOptions link_options; | ||||
| link_options.maxTraceDepth = 1; | link_options.maxTraceDepth = 1; | ||||
| # ifdef WITH_CYCLES_DEBUG | # ifdef WITH_CYCLES_DEBUG | ||||
| link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; | link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; | ||||
| # else | # else | ||||
| link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO; | link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO; | ||||
| # endif | # endif | ||||
| link_options.overrideUsesMotionBlur = pipeline_options.usesMotionBlur; | # if OPTIX_ABI_VERSION < 24 | ||||
| link_options.overrideUsesMotionBlur = motion_blur; | |||||
| # endif | |||||
| { // Create path tracing pipeline | { // Create path tracing pipeline | ||||
| OptixProgramGroup pipeline_groups[] = { | OptixProgramGroup pipeline_groups[] = { | ||||
| groups[PG_RGEN], | groups[PG_RGEN], | ||||
| groups[PG_MISS], | groups[PG_MISS], | ||||
| groups[PG_HITD], | groups[PG_HITD], | ||||
| groups[PG_HITS], | groups[PG_HITS], | ||||
| groups[PG_HITL], | groups[PG_HITL], | ||||
| # if OPTIX_ABI_VERSION >= 36 | |||||
| groups[PG_HITD_MOTION], | |||||
| groups[PG_HITS_MOTION], | |||||
| # endif | |||||
| # ifdef WITH_CYCLES_DEBUG | # ifdef WITH_CYCLES_DEBUG | ||||
| groups[PG_EXCP], | groups[PG_EXCP], | ||||
| # endif | # endif | ||||
| }; | }; | ||||
| check_result_optix_ret( | check_result_optix_ret( | ||||
| optixPipelineCreate(context, | optixPipelineCreate(context, | ||||
| &pipeline_options, | &pipeline_options, | ||||
| &link_options, | &link_options, | ||||
| pipeline_groups, | pipeline_groups, | ||||
| (sizeof(pipeline_groups) / sizeof(pipeline_groups[0])), | (sizeof(pipeline_groups) / sizeof(pipeline_groups[0])), | ||||
| nullptr, | nullptr, | ||||
| 0, | 0, | ||||
| &pipelines[PIP_PATH_TRACE])); | &pipelines[PIP_PATH_TRACE])); | ||||
| // Combine ray generation and trace continuation stack size | // Combine ray generation and trace continuation stack size | ||||
| const unsigned int css = stack_size[PG_RGEN].cssRG + link_options.maxTraceDepth * trace_css; | const unsigned int css = stack_size[PG_RGEN].cssRG + link_options.maxTraceDepth * trace_css; | ||||
| // Set stack size depending on pipeline options | // Set stack size depending on pipeline options | ||||
| check_result_optix_ret(optixPipelineSetStackSize( | check_result_optix_ret( | ||||
| pipelines[PIP_PATH_TRACE], 0, 0, css, (pipeline_options.usesMotionBlur ? 3 : 2))); | optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE], 0, 0, css, (motion_blur ? 3 : 2))); | ||||
| } | } | ||||
| // Only need to create shader evaluation pipeline if one of these features is used: | // Only need to create shader evaluation pipeline if one of these features is used: | ||||
| const bool use_shader_eval_pipeline = requested_features.use_baking || | const bool use_shader_eval_pipeline = requested_features.use_baking || | ||||
| requested_features.use_background_light || | requested_features.use_background_light || | ||||
| requested_features.use_true_displacement; | requested_features.use_true_displacement; | ||||
| if (use_shader_eval_pipeline) { // Create shader evaluation pipeline | if (use_shader_eval_pipeline) { // Create shader evaluation pipeline | ||||
| OptixProgramGroup pipeline_groups[] = { | OptixProgramGroup pipeline_groups[] = { | ||||
| groups[PG_BAKE], | groups[PG_BAKE], | ||||
| groups[PG_DISP], | groups[PG_DISP], | ||||
| groups[PG_BACK], | groups[PG_BACK], | ||||
| groups[PG_MISS], | groups[PG_MISS], | ||||
| groups[PG_HITD], | groups[PG_HITD], | ||||
| groups[PG_HITS], | groups[PG_HITS], | ||||
| groups[PG_HITL], | groups[PG_HITL], | ||||
| # if OPTIX_ABI_VERSION >= 36 | |||||
| groups[PG_HITD_MOTION], | |||||
| groups[PG_HITS_MOTION], | |||||
| # endif | |||||
| # ifdef WITH_CYCLES_DEBUG | # ifdef WITH_CYCLES_DEBUG | ||||
| groups[PG_EXCP], | groups[PG_EXCP], | ||||
| # endif | # endif | ||||
| }; | }; | ||||
| check_result_optix_ret( | check_result_optix_ret( | ||||
| optixPipelineCreate(context, | optixPipelineCreate(context, | ||||
| &pipeline_options, | &pipeline_options, | ||||
| &link_options, | &link_options, | ||||
| pipeline_groups, | pipeline_groups, | ||||
| (sizeof(pipeline_groups) / sizeof(pipeline_groups[0])), | (sizeof(pipeline_groups) / sizeof(pipeline_groups[0])), | ||||
| ▲ Show 20 Lines • Show All 106 Lines • ▼ Show 20 Lines | |||||
| # ifdef WITH_CYCLES_DEBUG | # ifdef WITH_CYCLES_DEBUG | ||||
| sbt_params.exceptionRecord = sbt_data.device_pointer + PG_EXCP * sizeof(SbtRecord); | sbt_params.exceptionRecord = sbt_data.device_pointer + PG_EXCP * sizeof(SbtRecord); | ||||
| # endif | # endif | ||||
| sbt_params.missRecordBase = sbt_data.device_pointer + PG_MISS * sizeof(SbtRecord); | sbt_params.missRecordBase = sbt_data.device_pointer + PG_MISS * sizeof(SbtRecord); | ||||
| sbt_params.missRecordStrideInBytes = sizeof(SbtRecord); | sbt_params.missRecordStrideInBytes = sizeof(SbtRecord); | ||||
| sbt_params.missRecordCount = 1; | sbt_params.missRecordCount = 1; | ||||
| sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord); | sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord); | ||||
| sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord); | sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord); | ||||
| sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITL, PG_HITS | # if OPTIX_ABI_VERSION >= 36 | ||||
| sbt_params.hitgroupRecordCount = 5; // PG_HITD(_MOTION), PG_HITS(_MOTION), PG_HITL | |||||
| # else | |||||
| sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL | |||||
| # endif | |||||
| // Launch the ray generation program | // Launch the ray generation program | ||||
| check_result_optix(optixLaunch(pipelines[PIP_PATH_TRACE], | check_result_optix(optixLaunch(pipelines[PIP_PATH_TRACE], | ||||
| cuda_stream[thread_index], | cuda_stream[thread_index], | ||||
| launch_params_ptr, | launch_params_ptr, | ||||
| launch_params.data_elements, | launch_params.data_elements, | ||||
| &sbt_params, | &sbt_params, | ||||
| // Launch with samples close to each other for better locality | // Launch with samples close to each other for better locality | ||||
| ▲ Show 20 Lines • Show All 147 Lines • ▼ Show 20 Lines | # endif | ||||
| optixDenoiserDestroy(denoiser); | optixDenoiserDestroy(denoiser); | ||||
| } | } | ||||
| // Create OptiX denoiser handle on demand when it is first used | // Create OptiX denoiser handle on demand when it is first used | ||||
| OptixDenoiserOptions denoiser_options; | OptixDenoiserOptions denoiser_options; | ||||
| assert(task.denoising.optix_input_passes >= 1 && task.denoising.optix_input_passes <= 3); | assert(task.denoising.optix_input_passes >= 1 && task.denoising.optix_input_passes <= 3); | ||||
| denoiser_options.inputKind = static_cast<OptixDenoiserInputKind>( | denoiser_options.inputKind = static_cast<OptixDenoiserInputKind>( | ||||
| OPTIX_DENOISER_INPUT_RGB + (task.denoising.optix_input_passes - 1)); | OPTIX_DENOISER_INPUT_RGB + (task.denoising.optix_input_passes - 1)); | ||||
| # if OPTIX_ABI_VERSION < 28 | |||||
| denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3; | denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3; | ||||
| # endif | |||||
| check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser)); | check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser)); | ||||
| check_result_optix_ret( | check_result_optix_ret( | ||||
| optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0)); | optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0)); | ||||
| // OptiX denoiser handle was created with the requested number of input passes | // OptiX denoiser handle was created with the requested number of input passes | ||||
| denoiser_input_passes = task.denoising.optix_input_passes; | denoiser_input_passes = task.denoising.optix_input_passes; | ||||
| } | } | ||||
| OptixDenoiserSizes sizes = {}; | OptixDenoiserSizes sizes = {}; | ||||
| check_result_optix_ret( | check_result_optix_ret( | ||||
| optixDenoiserComputeMemoryResources(denoiser, rect_size.x, rect_size.y, &sizes)); | optixDenoiserComputeMemoryResources(denoiser, rect_size.x, rect_size.y, &sizes)); | ||||
| # if OPTIX_ABI_VERSION < 28 | |||||
| const size_t scratch_size = sizes.recommendedScratchSizeInBytes; | const size_t scratch_size = sizes.recommendedScratchSizeInBytes; | ||||
| # else | |||||
| const size_t scratch_size = sizes.withOverlapScratchSizeInBytes; | |||||
| # endif | |||||
| const size_t scratch_offset = sizes.stateSizeInBytes; | const size_t scratch_offset = sizes.stateSizeInBytes; | ||||
| // Allocate denoiser state if tile size has changed since last setup | // Allocate denoiser state if tile size has changed since last setup | ||||
| if (recreate_denoiser || (denoiser_state.data_width != rect_size.x || | if (recreate_denoiser || (denoiser_state.data_width != rect_size.x || | ||||
| denoiser_state.data_height != rect_size.y)) { | denoiser_state.data_height != rect_size.y)) { | ||||
| denoiser_state.alloc_to_device(scratch_offset + scratch_size); | denoiser_state.alloc_to_device(scratch_offset + scratch_size); | ||||
| // Initialize denoiser state for the current tile size | // Initialize denoiser state for the current tile size | ||||
| ▲ Show 20 Lines • Show All 127 Lines • ▼ Show 20 Lines | |||||
| # ifdef WITH_CYCLES_DEBUG | # ifdef WITH_CYCLES_DEBUG | ||||
| sbt_params.exceptionRecord = sbt_data.device_pointer + PG_EXCP * sizeof(SbtRecord); | sbt_params.exceptionRecord = sbt_data.device_pointer + PG_EXCP * sizeof(SbtRecord); | ||||
| # endif | # endif | ||||
| sbt_params.missRecordBase = sbt_data.device_pointer + PG_MISS * sizeof(SbtRecord); | sbt_params.missRecordBase = sbt_data.device_pointer + PG_MISS * sizeof(SbtRecord); | ||||
| sbt_params.missRecordStrideInBytes = sizeof(SbtRecord); | sbt_params.missRecordStrideInBytes = sizeof(SbtRecord); | ||||
| sbt_params.missRecordCount = 1; | sbt_params.missRecordCount = 1; | ||||
| sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord); | sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord); | ||||
| sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord); | sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord); | ||||
| sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITL, PG_HITS | # if OPTIX_ABI_VERSION >= 36 | ||||
| sbt_params.hitgroupRecordCount = 5; // PG_HITD(_MOTION), PG_HITS(_MOTION), PG_HITL | |||||
| # else | |||||
| sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL | |||||
| # endif | |||||
| check_result_optix(optixLaunch(pipelines[PIP_SHADER_EVAL], | check_result_optix(optixLaunch(pipelines[PIP_SHADER_EVAL], | ||||
| cuda_stream[thread_index], | cuda_stream[thread_index], | ||||
| launch_params_ptr, | launch_params_ptr, | ||||
| launch_params.data_elements, | launch_params.data_elements, | ||||
| &sbt_params, | &sbt_params, | ||||
| task.shader_w, | task.shader_w, | ||||
| 1, | 1, | ||||
| ▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines | bool build_optix_bvh(const OptixBuildInput &build_input, | ||||
| compacted_size_prop.result = align_up(temp_mem.device_pointer + sizes.tempSizeInBytes, 8); | compacted_size_prop.result = align_up(temp_mem.device_pointer + sizes.tempSizeInBytes, 8); | ||||
| check_result_optix_ret(optixAccelBuild(context, | check_result_optix_ret(optixAccelBuild(context, | ||||
| NULL, | NULL, | ||||
| &options, | &options, | ||||
| &build_input, | &build_input, | ||||
| 1, | 1, | ||||
| temp_mem.device_pointer, | temp_mem.device_pointer, | ||||
| temp_mem.device_size, | sizes.tempSizeInBytes, | ||||
| out_data, | out_data, | ||||
| sizes.outputSizeInBytes, | sizes.outputSizeInBytes, | ||||
| &out_handle, | &out_handle, | ||||
| background ? &compacted_size_prop : NULL, | background ? &compacted_size_prop : NULL, | ||||
| background ? 1 : 0)); | background ? 1 : 0)); | ||||
| // Wait for all operations to finish | // Wait for all operations to finish | ||||
| check_result_cuda_ret(cuStreamSynchronize(NULL)); | check_result_cuda_ret(cuStreamSynchronize(NULL)); | ||||
| ▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | for (Object *ob : bvh->objects) { | ||||
| if (geom->type == Geometry::HAIR) { | if (geom->type == Geometry::HAIR) { | ||||
| // Build BLAS for curve primitives | // Build BLAS for curve primitives | ||||
| Hair *const hair = static_cast<Hair *const>(ob->geometry); | Hair *const hair = static_cast<Hair *const>(ob->geometry); | ||||
| if (hair->num_curves() == 0) { | if (hair->num_curves() == 0) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| const size_t num_curves = hair->num_curves(); | |||||
| const size_t num_segments = hair->num_segments(); | const size_t num_segments = hair->num_segments(); | ||||
| size_t num_motion_steps = 1; | size_t num_motion_steps = 1; | ||||
| Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); | Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); | ||||
| if (motion_blur && hair->use_motion_blur && motion_keys) { | if (motion_blur && hair->use_motion_blur && motion_keys) { | ||||
| num_motion_steps = hair->motion_steps; | num_motion_steps = hair->motion_steps; | ||||
| } | } | ||||
| device_vector<OptixAabb> aabb_data(this, "temp_aabb_data", MEM_READ_ONLY); | device_vector<OptixAabb> aabb_data(this, "temp_aabb_data", MEM_READ_ONLY); | ||||
| # if OPTIX_ABI_VERSION >= 36 | |||||
| device_vector<int> index_data(this, "temp_index_data", MEM_READ_ONLY); | |||||
| device_vector<float4> vertex_data(this, "temp_vertex_data", MEM_READ_ONLY); | |||||
| // Four control points for each curve segment | |||||
| const size_t num_vertices = num_segments * 4; | |||||
| if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) { | |||||
| index_data.alloc(num_segments); | |||||
| vertex_data.alloc(num_vertices * num_motion_steps); | |||||
| } | |||||
| else | |||||
| # endif | |||||
| aabb_data.alloc(num_segments * num_motion_steps); | aabb_data.alloc(num_segments * num_motion_steps); | ||||
| // Get AABBs for each motion step | // Get AABBs for each motion step | ||||
| for (size_t step = 0; step < num_motion_steps; ++step) { | for (size_t step = 0; step < num_motion_steps; ++step) { | ||||
| // The center step for motion vertices is not stored in the attribute | // The center step for motion vertices is not stored in the attribute | ||||
| const float3 *keys = hair->curve_keys.data(); | const float3 *keys = hair->curve_keys.data(); | ||||
| size_t center_step = (num_motion_steps - 1) / 2; | size_t center_step = (num_motion_steps - 1) / 2; | ||||
| if (step != center_step) { | if (step != center_step) { | ||||
| size_t attr_offset = (step > center_step) ? step - 1 : step; | size_t attr_offset = (step > center_step) ? step - 1 : step; | ||||
| // Technically this is a float4 array, but sizeof(float3) is the same as sizeof(float4) | // Technically this is a float4 array, but sizeof(float3) is the same as sizeof(float4) | ||||
| keys = motion_keys->data_float3() + attr_offset * hair->curve_keys.size(); | keys = motion_keys->data_float3() + attr_offset * hair->curve_keys.size(); | ||||
| } | } | ||||
| size_t i = step * num_segments; | for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) { | ||||
| for (size_t j = 0; j < num_curves; ++j) { | const Hair::Curve curve = hair->get_curve(j); | ||||
| const Hair::Curve c = hair->get_curve(j); | |||||
| for (size_t k = 0; k < c.num_segments(); ++i, ++k) { | for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) { | ||||
| # if OPTIX_ABI_VERSION >= 36 | |||||
| if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) { | |||||
| int k0 = curve.first_key + segment; | |||||
| int k1 = k0 + 1; | |||||
| int ka = max(k0 - 1, curve.first_key); | |||||
| int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1); | |||||
| const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x); | |||||
| const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y); | |||||
| const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z); | |||||
| const float4 pw = make_float4(hair->curve_radius[ka], | |||||
| hair->curve_radius[k0], | |||||
| hair->curve_radius[k1], | |||||
| hair->curve_radius[kb]); | |||||
| // Convert Catmull-Rom data to Bezier spline | |||||
| static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f; | |||||
| static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f; | |||||
| static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f; | |||||
| static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f; | |||||
| index_data[i] = i * 4; | |||||
| float4 *const v = vertex_data.data() + step * num_vertices + index_data[i]; | |||||
| v[0] = make_float4( | |||||
| dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw)); | |||||
| v[1] = make_float4( | |||||
| dot(cr2bsp1, px), dot(cr2bsp1, py), dot(cr2bsp1, pz), dot(cr2bsp1, pw)); | |||||
| v[2] = make_float4( | |||||
| dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw)); | |||||
| v[3] = make_float4( | |||||
| dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw)); | |||||
| } | |||||
| else | |||||
| # endif | |||||
| { | |||||
| BoundBox bounds = BoundBox::empty; | BoundBox bounds = BoundBox::empty; | ||||
| c.bounds_grow(k, keys, hair->curve_radius.data(), bounds); | curve.bounds_grow(segment, keys, hair->curve_radius.data(), bounds); | ||||
| aabb_data[i].minX = bounds.min.x; | const size_t index = step * num_segments + i; | ||||
| aabb_data[i].minY = bounds.min.y; | aabb_data[index].minX = bounds.min.x; | ||||
| aabb_data[i].minZ = bounds.min.z; | aabb_data[index].minY = bounds.min.y; | ||||
| aabb_data[i].maxX = bounds.max.x; | aabb_data[index].minZ = bounds.min.z; | ||||
| aabb_data[i].maxY = bounds.max.y; | aabb_data[index].maxX = bounds.max.x; | ||||
| aabb_data[i].maxZ = bounds.max.z; | aabb_data[index].maxY = bounds.max.y; | ||||
| aabb_data[index].maxZ = bounds.max.z; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| // Upload AABB data to GPU | // Upload AABB data to GPU | ||||
| aabb_data.copy_to_device(); | aabb_data.copy_to_device(); | ||||
| # if OPTIX_ABI_VERSION >= 36 | |||||
| index_data.copy_to_device(); | |||||
| vertex_data.copy_to_device(); | |||||
| # endif | |||||
| vector<device_ptr> aabb_ptrs; | vector<device_ptr> aabb_ptrs; | ||||
| aabb_ptrs.reserve(num_motion_steps); | aabb_ptrs.reserve(num_motion_steps); | ||||
| # if OPTIX_ABI_VERSION >= 36 | |||||
| vector<device_ptr> width_ptrs; | |||||
| vector<device_ptr> vertex_ptrs; | |||||
| width_ptrs.reserve(num_motion_steps); | |||||
| vertex_ptrs.reserve(num_motion_steps); | |||||
| # endif | |||||
| for (size_t step = 0; step < num_motion_steps; ++step) { | for (size_t step = 0; step < num_motion_steps; ++step) { | ||||
| aabb_ptrs.push_back(aabb_data.device_pointer + step * num_segments * sizeof(OptixAabb)); | aabb_ptrs.push_back(aabb_data.device_pointer + step * num_segments * sizeof(OptixAabb)); | ||||
| # if OPTIX_ABI_VERSION >= 36 | |||||
| const device_ptr base_ptr = vertex_data.device_pointer + | |||||
| step * num_vertices * sizeof(float4); | |||||
| width_ptrs.push_back(base_ptr + 3 * sizeof(float)); // Offset by vertex size | |||||
| vertex_ptrs.push_back(base_ptr); | |||||
| # endif | |||||
| } | } | ||||
| // Disable visibility test anyhit program, since it is already checked during intersection | // Force a single any-hit call, so shadow record-all behavior works correctly | ||||
| // Those trace calls that require anyhit can force it with OPTIX_RAY_FLAG_ENFORCE_ANYHIT | unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL; | ||||
| unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT; | |||||
| OptixBuildInput build_input = {}; | OptixBuildInput build_input = {}; | ||||
| # if OPTIX_ABI_VERSION >= 36 | |||||
| if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) { | |||||
| build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES; | |||||
| build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE; | |||||
| build_input.curveArray.numPrimitives = num_segments; | |||||
| build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data(); | |||||
| build_input.curveArray.numVertices = num_vertices; | |||||
| build_input.curveArray.vertexStrideInBytes = sizeof(float4); | |||||
| build_input.curveArray.widthBuffers = (CUdeviceptr *)width_ptrs.data(); | |||||
| build_input.curveArray.widthStrideInBytes = sizeof(float4); | |||||
| build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer; | |||||
| build_input.curveArray.indexStrideInBytes = sizeof(int); | |||||
| build_input.curveArray.flag = build_flags; | |||||
| build_input.curveArray.primitiveIndexOffset = hair->optix_prim_offset; | |||||
| } | |||||
| else | |||||
| # endif | |||||
| { | |||||
| // Disable visibility test any-hit program, since it is already checked during | |||||
| // intersection. Those trace calls that require anyhit can force it with a ray flag. | |||||
| build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT; | |||||
| build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES; | build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES; | ||||
| # if OPTIX_ABI_VERSION < 23 | |||||
| build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data(); | build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data(); | ||||
| build_input.aabbArray.numPrimitives = num_segments; | build_input.aabbArray.numPrimitives = num_segments; | ||||
| build_input.aabbArray.strideInBytes = sizeof(OptixAabb); | build_input.aabbArray.strideInBytes = sizeof(OptixAabb); | ||||
| build_input.aabbArray.flags = &build_flags; | build_input.aabbArray.flags = &build_flags; | ||||
| build_input.aabbArray.numSbtRecords = 1; | build_input.aabbArray.numSbtRecords = 1; | ||||
| build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset; | build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset; | ||||
| # else | |||||
| build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data(); | |||||
| build_input.customPrimitiveArray.numPrimitives = num_segments; | |||||
| build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb); | |||||
| build_input.customPrimitiveArray.flags = &build_flags; | |||||
| build_input.customPrimitiveArray.numSbtRecords = 1; | |||||
| build_input.customPrimitiveArray.primitiveIndexOffset = hair->optix_prim_offset; | |||||
| # endif | |||||
| } | |||||
| // Allocate memory for new BLAS and build it | // Allocate memory for new BLAS and build it | ||||
| OptixTraversableHandle handle; | OptixTraversableHandle handle; | ||||
| if (build_optix_bvh(build_input, num_motion_steps, handle)) { | if (build_optix_bvh(build_input, num_motion_steps, handle)) { | ||||
| geometry.insert({ob->geometry, handle}); | geometry.insert({ob->geometry, handle}); | ||||
| } | } | ||||
| else { | else { | ||||
| return false; | return false; | ||||
| Show All 38 Lines | # endif | ||||
| vertex_data.copy_to_device(); | vertex_data.copy_to_device(); | ||||
| vector<device_ptr> vertex_ptrs; | vector<device_ptr> vertex_ptrs; | ||||
| vertex_ptrs.reserve(num_motion_steps); | vertex_ptrs.reserve(num_motion_steps); | ||||
| for (size_t step = 0; step < num_motion_steps; ++step) { | for (size_t step = 0; step < num_motion_steps; ++step) { | ||||
| vertex_ptrs.push_back(vertex_data.device_pointer + num_verts * step * sizeof(float3)); | vertex_ptrs.push_back(vertex_data.device_pointer + num_verts * step * sizeof(float3)); | ||||
| } | } | ||||
| // No special build flags for triangle primitives | // Force a single any-hit call, so shadow record-all behavior works correctly | ||||
| unsigned int build_flags = OPTIX_GEOMETRY_FLAG_NONE; | unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL; | ||||
| OptixBuildInput build_input = {}; | OptixBuildInput build_input = {}; | ||||
| build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES; | build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES; | ||||
| build_input.triangleArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data(); | build_input.triangleArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data(); | ||||
| build_input.triangleArray.numVertices = num_verts; | build_input.triangleArray.numVertices = num_verts; | ||||
| build_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3; | build_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3; | ||||
| build_input.triangleArray.vertexStrideInBytes = sizeof(float3); | build_input.triangleArray.vertexStrideInBytes = sizeof(float3); | ||||
| build_input.triangleArray.indexBuffer = index_data.device_pointer; | build_input.triangleArray.indexBuffer = index_data.device_pointer; | ||||
| build_input.triangleArray.numIndexTriplets = mesh->num_triangles(); | build_input.triangleArray.numIndexTriplets = mesh->num_triangles(); | ||||
| ▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines | for (Object *ob : bvh->objects) { | ||||
| // Clear transform to identity matrix | // Clear transform to identity matrix | ||||
| instance.transform[0] = 1.0f; | instance.transform[0] = 1.0f; | ||||
| instance.transform[5] = 1.0f; | instance.transform[5] = 1.0f; | ||||
| instance.transform[10] = 1.0f; | instance.transform[10] = 1.0f; | ||||
| // Set user instance ID to object index | // Set user instance ID to object index | ||||
| instance.instanceId = ob->get_device_index(); | instance.instanceId = ob->get_device_index(); | ||||
| // Have to have at least one bit in the mask, or else instance would always be culled | |||||
| instance.visibilityMask = 1; | |||||
| if (ob->geometry->has_volume) { | |||||
| // Volumes have a special bit set in the visibility mask so a trace can mask only volumes | // Volumes have a special bit set in the visibility mask so a trace can mask only volumes | ||||
| // See 'scene_intersect_volume' in bvh.h | instance.visibilityMask |= 2; | ||||
| instance.visibilityMask = (ob->geometry->has_volume ? 3 : 1); | } | ||||
| if (ob->geometry->type == Geometry::HAIR) { | |||||
| // Same applies to curves (so they can be skipped in local trace calls) | |||||
| instance.visibilityMask |= 4; | |||||
| # if OPTIX_ABI_VERSION >= 36 | |||||
| if (motion_blur && ob->geometry->has_motion_blur() && DebugFlags().optix.curves_api && | |||||
| static_cast<const Hair *>(ob->geometry)->curve_shape == CURVE_THICK) { | |||||
| // Select between motion blur and non-motion blur built-in intersection module | |||||
| instance.sbtOffset = PG_HITD_MOTION - PG_HITD; | |||||
| } | |||||
| # endif | |||||
| } | |||||
| // Insert motion traversable if object has motion | // Insert motion traversable if object has motion | ||||
| if (motion_blur && ob->use_motion()) { | if (motion_blur && ob->use_motion()) { | ||||
| size_t motion_keys = max(ob->motion.size(), 2) - 2; | size_t motion_keys = max(ob->motion.size(), 2) - 2; | ||||
| size_t motion_transform_size = sizeof(OptixSRTMotionTransform) + | size_t motion_transform_size = sizeof(OptixSRTMotionTransform) + | ||||
| motion_keys * sizeof(OptixSRTData); | motion_keys * sizeof(OptixSRTData); | ||||
| const CUDAContextScope scope(cuContext); | const CUDAContextScope scope(cuContext); | ||||
| ▲ Show 20 Lines • Show All 240 Lines • Show Last 20 Lines | |||||