Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/optix/device_impl.cpp
| Show First 20 Lines • Show All 306 Lines • ▼ Show 20 Lines | OptiXDevice::~OptiXDevice() | ||||
| sbt_data.free(); | sbt_data.free(); | ||||
| texture_info.free(); | texture_info.free(); | ||||
| launch_params.free(); | launch_params.free(); | ||||
| /* Unload modules. */ | /* Unload modules. */ | ||||
| if (optix_module != NULL) { | if (optix_module != NULL) { | ||||
| optixModuleDestroy(optix_module); | optixModuleDestroy(optix_module); | ||||
| } | } | ||||
| for (unsigned int i = 0; i < 2; ++i) { | for (int i = 0; i < 2; ++i) { | ||||
| if (builtin_modules[i] != NULL) { | if (builtin_modules[i] != NULL) { | ||||
| optixModuleDestroy(builtin_modules[i]); | optixModuleDestroy(builtin_modules[i]); | ||||
| } | } | ||||
| } | } | ||||
| for (unsigned int i = 0; i < NUM_PIPELINES; ++i) { | for (int i = 0; i < NUM_PIPELINES; ++i) { | ||||
| if (pipelines[i] != NULL) { | if (pipelines[i] != NULL) { | ||||
| optixPipelineDestroy(pipelines[i]); | optixPipelineDestroy(pipelines[i]); | ||||
| } | } | ||||
| } | } | ||||
| for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) { | |||||
| if (groups[i] != NULL) { | |||||
| optixProgramGroupDestroy(groups[i]); | |||||
| } | |||||
| } | |||||
| # ifdef WITH_OSL | |||||
| for (const OptixModule &module : osl_modules) { | |||||
| if (module != NULL) { | |||||
| optixModuleDestroy(module); | |||||
| } | |||||
| } | |||||
| for (const OptixProgramGroup &group : osl_groups) { | |||||
| if (group != NULL) { | |||||
| optixProgramGroupDestroy(group); | |||||
| } | |||||
| } | |||||
| # endif | |||||
| /* Make sure denoiser is destroyed before device context! */ | /* Make sure denoiser is destroyed before device context! */ | ||||
| if (denoiser_.optix_denoiser != nullptr) { | if (denoiser_.optix_denoiser != nullptr) { | ||||
| optixDenoiserDestroy(denoiser_.optix_denoiser); | optixDenoiserDestroy(denoiser_.optix_denoiser); | ||||
| } | } | ||||
| optixDeviceContextDestroy(context); | optixDeviceContextDestroy(context); | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | |||||
| bool OptiXDevice::load_kernels(const uint kernel_features) | bool OptiXDevice::load_kernels(const uint kernel_features) | ||||
| { | { | ||||
| if (have_error()) { | if (have_error()) { | ||||
| /* Abort early if context creation failed already. */ | /* Abort early if context creation failed already. */ | ||||
| return false; | return false; | ||||
| } | } | ||||
| # ifdef WITH_OSL | |||||
| const bool use_osl = (kernel_features & KERNEL_FEATURE_OSL); | |||||
| # else | |||||
| const bool use_osl = false; | |||||
| # endif | |||||
| /* Skip creating OptiX module if only doing denoising. */ | /* Skip creating OptiX module if only doing denoising. */ | ||||
| const bool need_optix_kernels = (kernel_features & | const bool need_optix_kernels = (kernel_features & | ||||
| (KERNEL_FEATURE_PATH_TRACING | KERNEL_FEATURE_BAKING)); | (KERNEL_FEATURE_PATH_TRACING | KERNEL_FEATURE_BAKING)); | ||||
| /* Detect existence of OptiX kernel and SDK here early. So we can error out | /* Detect existence of OptiX kernel and SDK here early. So we can error out | ||||
| * before compiling the CUDA kernels, to avoid failing right after when | * before compiling the CUDA kernels, to avoid failing right after when | ||||
| * compiling the OptiX kernel. */ | * compiling the OptiX kernel. */ | ||||
| string suffix = use_osl ? "_osl" : | |||||
| (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ? | |||||
| "_shader_raytrace" : | |||||
| ""; | |||||
| string ptx_filename; | string ptx_filename; | ||||
| if (need_optix_kernels) { | if (need_optix_kernels) { | ||||
| ptx_filename = path_get( | ptx_filename = path_get("lib/kernel_optix" + suffix + ".ptx"); | ||||
| (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ? | |||||
| "lib/kernel_optix_shader_raytrace.ptx" : | |||||
| "lib/kernel_optix.ptx"); | |||||
| if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) { | if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) { | ||||
| std::string optix_include_dir = get_optix_include_dir(); | std::string optix_include_dir = get_optix_include_dir(); | ||||
| if (optix_include_dir.empty()) { | if (optix_include_dir.empty()) { | ||||
| set_error( | set_error( | ||||
| "Unable to compile OptiX kernels at runtime. Set OPTIX_ROOT_DIR environment variable " | "Unable to compile OptiX kernels at runtime. Set OPTIX_ROOT_DIR environment variable " | ||||
| "to a directory containing the OptiX SDK."); | "to a directory containing the OptiX SDK."); | ||||
| return false; | return false; | ||||
| } | } | ||||
| Show All 19 Lines | # endif | ||||
| const CUDAContextScope scope(this); | const CUDAContextScope scope(this); | ||||
| /* Unload existing OptiX module and pipelines first. */ | /* Unload existing OptiX module and pipelines first. */ | ||||
| if (optix_module != NULL) { | if (optix_module != NULL) { | ||||
| optixModuleDestroy(optix_module); | optixModuleDestroy(optix_module); | ||||
| optix_module = NULL; | optix_module = NULL; | ||||
| } | } | ||||
| for (unsigned int i = 0; i < 2; ++i) { | for (int i = 0; i < 2; ++i) { | ||||
| if (builtin_modules[i] != NULL) { | if (builtin_modules[i] != NULL) { | ||||
| optixModuleDestroy(builtin_modules[i]); | optixModuleDestroy(builtin_modules[i]); | ||||
| builtin_modules[i] = NULL; | builtin_modules[i] = NULL; | ||||
| } | } | ||||
| } | } | ||||
| for (unsigned int i = 0; i < NUM_PIPELINES; ++i) { | for (int i = 0; i < NUM_PIPELINES; ++i) { | ||||
| if (pipelines[i] != NULL) { | if (pipelines[i] != NULL) { | ||||
| optixPipelineDestroy(pipelines[i]); | optixPipelineDestroy(pipelines[i]); | ||||
| pipelines[i] = NULL; | pipelines[i] = NULL; | ||||
| } | } | ||||
| } | } | ||||
| for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) { | |||||
| if (groups[i] != NULL) { | |||||
| optixProgramGroupDestroy(groups[i]); | |||||
| groups[i] = NULL; | |||||
| } | |||||
| } | |||||
| # ifdef WITH_OSL | |||||
| /* Recreating base OptiX module invalidates all OSL modules too, since they link against it. */ | |||||
| for (const OptixModule &module : osl_modules) { | |||||
| if (module != NULL) { | |||||
| optixModuleDestroy(module); | |||||
| } | |||||
| } | |||||
| osl_modules.clear(); | |||||
| for (const OptixProgramGroup &group : osl_groups) { | |||||
| if (group != NULL) { | |||||
| optixProgramGroupDestroy(group); | |||||
| } | |||||
| } | |||||
| osl_groups.clear(); | |||||
| # endif | |||||
| OptixModuleCompileOptions module_options = {}; | OptixModuleCompileOptions module_options = {}; | ||||
| module_options.maxRegisterCount = 0; /* Do not set an explicit register limit. */ | module_options.maxRegisterCount = 0; /* Do not set an explicit register limit. */ | ||||
| if (DebugFlags().optix.use_debug) { | if (DebugFlags().optix.use_debug) { | ||||
| module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0; | module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0; | ||||
| module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; | module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; | ||||
| } | } | ||||
| else { | else { | ||||
| module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3; | module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3; | ||||
| module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE; | module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE; | ||||
| } | } | ||||
| module_options.boundValues = nullptr; | module_options.boundValues = nullptr; | ||||
| module_options.numBoundValues = 0; | module_options.numBoundValues = 0; | ||||
| # if OPTIX_ABI_VERSION >= 55 | # if OPTIX_ABI_VERSION >= 55 | ||||
| module_options.payloadTypes = nullptr; | module_options.payloadTypes = nullptr; | ||||
| module_options.numPayloadTypes = 0; | module_options.numPayloadTypes = 0; | ||||
| # endif | # endif | ||||
| OptixPipelineCompileOptions pipeline_options = {}; | |||||
| /* Default to no motion blur and two-level graph, since it is the fastest option. */ | /* Default to no motion blur and two-level graph, since it is the fastest option. */ | ||||
| pipeline_options.usesMotionBlur = false; | pipeline_options.usesMotionBlur = false; | ||||
| pipeline_options.traversableGraphFlags = | pipeline_options.traversableGraphFlags = | ||||
| OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING; | OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING; | ||||
| pipeline_options.numPayloadValues = 8; | pipeline_options.numPayloadValues = 8; | ||||
| pipeline_options.numAttributeValues = 2; /* u, v */ | pipeline_options.numAttributeValues = 2; /* u, v */ | ||||
| pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE; | pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE; | ||||
| pipeline_options.pipelineLaunchParamsVariableName = "kernel_params"; /* See globals.h */ | pipeline_options.pipelineLaunchParamsVariableName = "kernel_params"; /* See globals.h */ | ||||
| Show All 12 Lines | # endif | ||||
| } | } | ||||
| if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { | if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { | ||||
| pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM; | pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM; | ||||
| } | } | ||||
| /* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds | /* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds | ||||
| * This is necessary since objects may be reported to have motion if the Vector pass is | * This is necessary since objects may be reported to have motion if the Vector pass is | ||||
| * active, but may still need to be rendered without motion blur if that isn't active as well. */ | * active, but may still need to be rendered without motion blur if that isn't active as well. */ | ||||
| motion_blur = (kernel_features & KERNEL_FEATURE_OBJECT_MOTION) != 0; | if (kernel_features & KERNEL_FEATURE_OBJECT_MOTION) { | ||||
| if (motion_blur) { | |||||
| pipeline_options.usesMotionBlur = true; | pipeline_options.usesMotionBlur = true; | ||||
| /* Motion blur can insert motion transforms into the traversal graph. | /* Motion blur can insert motion transforms into the traversal graph. | ||||
| * It is no longer a two-level graph then, so need to set flags to allow any configuration. */ | * It is no longer a two-level graph then, so need to set flags to allow any configuration. */ | ||||
| pipeline_options.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY; | pipeline_options.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY; | ||||
| } | } | ||||
| { /* Load and compile PTX module with OptiX kernels. */ | { /* Load and compile PTX module with OptiX kernels. */ | ||||
| string ptx_data; | string ptx_data; | ||||
| if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) { | if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) { | ||||
| string cflags = compile_kernel_get_common_cflags(kernel_features); | string cflags = compile_kernel_get_common_cflags(kernel_features); | ||||
| ptx_filename = compile_kernel( | ptx_filename = compile_kernel(cflags, ("kernel" + suffix).c_str(), "optix", true); | ||||
| cflags, | |||||
| (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ? | |||||
| "kernel_shader_raytrace" : | |||||
| "kernel", | |||||
| "optix", | |||||
| true); | |||||
| } | } | ||||
| if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) { | if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) { | ||||
| set_error(string_printf("Failed to load OptiX kernel from '%s'", ptx_filename.c_str())); | set_error(string_printf("Failed to load OptiX kernel from '%s'", ptx_filename.c_str())); | ||||
| return false; | return false; | ||||
| } | } | ||||
| # if OPTIX_ABI_VERSION >= 55 | # if OPTIX_ABI_VERSION >= 55 | ||||
| OptixTask task = nullptr; | OptixTask task = nullptr; | ||||
| Show All 25 Lines | if (result != OPTIX_SUCCESS) { | ||||
| set_error(string_printf("Failed to load OptiX kernel from '%s' (%s)", | set_error(string_printf("Failed to load OptiX kernel from '%s' (%s)", | ||||
| ptx_filename.c_str(), | ptx_filename.c_str(), | ||||
| optixGetErrorName(result))); | optixGetErrorName(result))); | ||||
| return false; | return false; | ||||
| } | } | ||||
| } | } | ||||
| /* Create program groups. */ | /* Create program groups. */ | ||||
| OptixProgramGroup groups[NUM_PROGRAM_GROUPS] = {}; | |||||
| OptixProgramGroupDesc group_descs[NUM_PROGRAM_GROUPS] = {}; | OptixProgramGroupDesc group_descs[NUM_PROGRAM_GROUPS] = {}; | ||||
| OptixProgramGroupOptions group_options = {}; /* There are no options currently. */ | OptixProgramGroupOptions group_options = {}; /* There are no options currently. */ | ||||
| group_descs[PG_RGEN_INTERSECT_CLOSEST].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | group_descs[PG_RGEN_INTERSECT_CLOSEST].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | ||||
| group_descs[PG_RGEN_INTERSECT_CLOSEST].raygen.module = optix_module; | group_descs[PG_RGEN_INTERSECT_CLOSEST].raygen.module = optix_module; | ||||
| group_descs[PG_RGEN_INTERSECT_CLOSEST].raygen.entryFunctionName = | group_descs[PG_RGEN_INTERSECT_CLOSEST].raygen.entryFunctionName = | ||||
| "__raygen__kernel_optix_integrator_intersect_closest"; | "__raygen__kernel_optix_integrator_intersect_closest"; | ||||
| group_descs[PG_RGEN_INTERSECT_SHADOW].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | group_descs[PG_RGEN_INTERSECT_SHADOW].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | ||||
| group_descs[PG_RGEN_INTERSECT_SHADOW].raygen.module = optix_module; | group_descs[PG_RGEN_INTERSECT_SHADOW].raygen.module = optix_module; | ||||
| ▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines | # endif | ||||
| optix_assert(optixBuiltinISModuleGet( | optix_assert(optixBuiltinISModuleGet( | ||||
| context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[0])); | context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[0])); | ||||
| group_descs[PG_HITD].hitgroup.moduleIS = builtin_modules[0]; | group_descs[PG_HITD].hitgroup.moduleIS = builtin_modules[0]; | ||||
| group_descs[PG_HITD].hitgroup.entryFunctionNameIS = nullptr; | group_descs[PG_HITD].hitgroup.entryFunctionNameIS = nullptr; | ||||
| group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0]; | group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0]; | ||||
| group_descs[PG_HITS].hitgroup.entryFunctionNameIS = nullptr; | group_descs[PG_HITS].hitgroup.entryFunctionNameIS = nullptr; | ||||
| if (motion_blur) { | if (pipeline_options.usesMotionBlur) { | ||||
| builtin_options.usesMotionBlur = true; | builtin_options.usesMotionBlur = true; | ||||
| optix_assert(optixBuiltinISModuleGet( | optix_assert(optixBuiltinISModuleGet( | ||||
| context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[1])); | context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[1])); | ||||
| group_descs[PG_HITD_MOTION] = group_descs[PG_HITD]; | group_descs[PG_HITD_MOTION] = group_descs[PG_HITD]; | ||||
| group_descs[PG_HITD_MOTION].hitgroup.moduleIS = builtin_modules[1]; | group_descs[PG_HITD_MOTION].hitgroup.moduleIS = builtin_modules[1]; | ||||
| group_descs[PG_HITS_MOTION] = group_descs[PG_HITS]; | group_descs[PG_HITS_MOTION] = group_descs[PG_HITS]; | ||||
| group_descs[PG_HITS_MOTION].hitgroup.moduleIS = builtin_modules[1]; | group_descs[PG_HITS_MOTION].hitgroup.moduleIS = builtin_modules[1]; | ||||
| } | } | ||||
| } | } | ||||
| else { | else { | ||||
| /* Custom ribbon intersection. */ | /* Custom ribbon intersection. */ | ||||
| group_descs[PG_HITD].hitgroup.moduleIS = optix_module; | group_descs[PG_HITD].hitgroup.moduleIS = optix_module; | ||||
| group_descs[PG_HITS].hitgroup.moduleIS = optix_module; | group_descs[PG_HITS].hitgroup.moduleIS = optix_module; | ||||
| group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon"; | group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon"; | ||||
| group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon"; | group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon"; | ||||
| } | } | ||||
| } | } | ||||
| /* Pointclouds */ | |||||
| if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { | if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { | ||||
| group_descs[PG_HITD_POINTCLOUD] = group_descs[PG_HITD]; | group_descs[PG_HITD_POINTCLOUD] = group_descs[PG_HITD]; | ||||
| group_descs[PG_HITD_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; | group_descs[PG_HITD_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; | ||||
| group_descs[PG_HITD_POINTCLOUD].hitgroup.moduleIS = optix_module; | group_descs[PG_HITD_POINTCLOUD].hitgroup.moduleIS = optix_module; | ||||
| group_descs[PG_HITD_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point"; | group_descs[PG_HITD_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point"; | ||||
| group_descs[PG_HITS_POINTCLOUD] = group_descs[PG_HITS]; | group_descs[PG_HITS_POINTCLOUD] = group_descs[PG_HITS]; | ||||
| group_descs[PG_HITS_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; | group_descs[PG_HITS_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; | ||||
| group_descs[PG_HITS_POINTCLOUD].hitgroup.moduleIS = optix_module; | group_descs[PG_HITS_POINTCLOUD].hitgroup.moduleIS = optix_module; | ||||
| group_descs[PG_HITS_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point"; | group_descs[PG_HITS_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point"; | ||||
| } | } | ||||
| if (kernel_features & (KERNEL_FEATURE_SUBSURFACE | KERNEL_FEATURE_NODE_RAYTRACE)) { | |||||
| /* Add hit group for local intersections. */ | /* Add hit group for local intersections. */ | ||||
| if (kernel_features & (KERNEL_FEATURE_SUBSURFACE | KERNEL_FEATURE_NODE_RAYTRACE)) { | |||||
| group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; | group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; | ||||
| group_descs[PG_HITL].hitgroup.moduleAH = optix_module; | group_descs[PG_HITL].hitgroup.moduleAH = optix_module; | ||||
| group_descs[PG_HITL].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_local_hit"; | group_descs[PG_HITL].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_local_hit"; | ||||
| } | } | ||||
| /* Shader raytracing replaces some functions with direct callables. */ | /* Shader raytracing replaces some functions with direct callables. */ | ||||
| if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) { | if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) { | ||||
| group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | ||||
| group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.module = optix_module; | group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.module = optix_module; | ||||
| group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.entryFunctionName = | group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.entryFunctionName = | ||||
| "__raygen__kernel_optix_integrator_shade_surface_raytrace"; | "__raygen__kernel_optix_integrator_shade_surface_raytrace"; | ||||
| /* Kernels with OSL support are built without SVM, so can skip those direct callables there. */ | |||||
| if (!use_osl) { | |||||
| group_descs[PG_CALL_SVM_AO].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; | group_descs[PG_CALL_SVM_AO].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; | ||||
| group_descs[PG_CALL_SVM_AO].callables.moduleDC = optix_module; | group_descs[PG_CALL_SVM_AO].callables.moduleDC = optix_module; | ||||
| group_descs[PG_CALL_SVM_AO].callables.entryFunctionNameDC = "__direct_callable__svm_node_ao"; | group_descs[PG_CALL_SVM_AO].callables.entryFunctionNameDC = "__direct_callable__svm_node_ao"; | ||||
| group_descs[PG_CALL_SVM_BEVEL].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; | group_descs[PG_CALL_SVM_BEVEL].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; | ||||
| group_descs[PG_CALL_SVM_BEVEL].callables.moduleDC = optix_module; | group_descs[PG_CALL_SVM_BEVEL].callables.moduleDC = optix_module; | ||||
| group_descs[PG_CALL_SVM_BEVEL].callables.entryFunctionNameDC = | group_descs[PG_CALL_SVM_BEVEL].callables.entryFunctionNameDC = | ||||
| "__direct_callable__svm_node_bevel"; | "__direct_callable__svm_node_bevel"; | ||||
| } | } | ||||
| } | |||||
| /* MNEE. */ | |||||
| if (kernel_features & KERNEL_FEATURE_MNEE) { | if (kernel_features & KERNEL_FEATURE_MNEE) { | ||||
| group_descs[PG_RGEN_SHADE_SURFACE_MNEE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | group_descs[PG_RGEN_SHADE_SURFACE_MNEE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | ||||
| group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.module = optix_module; | group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.module = optix_module; | ||||
| group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.entryFunctionName = | group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.entryFunctionName = | ||||
| "__raygen__kernel_optix_integrator_shade_surface_mnee"; | "__raygen__kernel_optix_integrator_shade_surface_mnee"; | ||||
| } | } | ||||
| /* OSL uses direct callables to execute, so shading needs to be done in OptiX if OSL is used. */ | |||||
| if (use_osl) { | |||||
| group_descs[PG_RGEN_SHADE_BACKGROUND].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | |||||
| group_descs[PG_RGEN_SHADE_BACKGROUND].raygen.module = optix_module; | |||||
| group_descs[PG_RGEN_SHADE_BACKGROUND].raygen.entryFunctionName = | |||||
| "__raygen__kernel_optix_integrator_shade_background"; | |||||
| group_descs[PG_RGEN_SHADE_LIGHT].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | |||||
| group_descs[PG_RGEN_SHADE_LIGHT].raygen.module = optix_module; | |||||
| group_descs[PG_RGEN_SHADE_LIGHT].raygen.entryFunctionName = | |||||
| "__raygen__kernel_optix_integrator_shade_light"; | |||||
| group_descs[PG_RGEN_SHADE_SURFACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | |||||
| group_descs[PG_RGEN_SHADE_SURFACE].raygen.module = optix_module; | |||||
| group_descs[PG_RGEN_SHADE_SURFACE].raygen.entryFunctionName = | |||||
| "__raygen__kernel_optix_integrator_shade_surface"; | |||||
| group_descs[PG_RGEN_SHADE_VOLUME].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | |||||
| group_descs[PG_RGEN_SHADE_VOLUME].raygen.module = optix_module; | |||||
| group_descs[PG_RGEN_SHADE_VOLUME].raygen.entryFunctionName = | |||||
| "__raygen__kernel_optix_integrator_shade_volume"; | |||||
| group_descs[PG_RGEN_SHADE_SHADOW].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | |||||
| group_descs[PG_RGEN_SHADE_SHADOW].raygen.module = optix_module; | |||||
| group_descs[PG_RGEN_SHADE_SHADOW].raygen.entryFunctionName = | |||||
| "__raygen__kernel_optix_integrator_shade_shadow"; | |||||
| group_descs[PG_RGEN_EVAL_DISPLACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | |||||
| group_descs[PG_RGEN_EVAL_DISPLACE].raygen.module = optix_module; | |||||
| group_descs[PG_RGEN_EVAL_DISPLACE].raygen.entryFunctionName = | |||||
| "__raygen__kernel_optix_shader_eval_displace"; | |||||
| group_descs[PG_RGEN_EVAL_BACKGROUND].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | |||||
| group_descs[PG_RGEN_EVAL_BACKGROUND].raygen.module = optix_module; | |||||
| group_descs[PG_RGEN_EVAL_BACKGROUND].raygen.entryFunctionName = | |||||
| "__raygen__kernel_optix_shader_eval_background"; | |||||
| group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; | |||||
| group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].raygen.module = optix_module; | |||||
| group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].raygen.entryFunctionName = | |||||
| "__raygen__kernel_optix_shader_eval_curve_shadow_transparency"; | |||||
| } | |||||
| optix_assert(optixProgramGroupCreate( | optix_assert(optixProgramGroupCreate( | ||||
| context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups)); | context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups)); | ||||
| /* Get program stack sizes. */ | /* Get program stack sizes. */ | ||||
| OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {}; | OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {}; | ||||
| /* Set up SBT, which in this case is used only to select between different programs. */ | /* Set up SBT, which in this case is used only to select between different programs. */ | ||||
| sbt_data.alloc(NUM_PROGRAM_GROUPS); | sbt_data.alloc(NUM_PROGRAM_GROUPS); | ||||
| memset(sbt_data.host_pointer, 0, sizeof(SbtRecord) * NUM_PROGRAM_GROUPS); | memset(sbt_data.host_pointer, 0, sizeof(SbtRecord) * NUM_PROGRAM_GROUPS); | ||||
| for (unsigned int i = 0; i < NUM_PROGRAM_GROUPS; ++i) { | for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) { | ||||
| optix_assert(optixSbtRecordPackHeader(groups[i], &sbt_data[i])); | optix_assert(optixSbtRecordPackHeader(groups[i], &sbt_data[i])); | ||||
| optix_assert(optixProgramGroupGetStackSize(groups[i], &stack_size[i])); | optix_assert(optixProgramGroupGetStackSize(groups[i], &stack_size[i])); | ||||
| } | } | ||||
| sbt_data.copy_to_device(); /* Upload SBT to device. */ | sbt_data.copy_to_device(); /* Upload SBT to device. */ | ||||
| /* Calculate maximum trace continuation stack size. */ | /* Calculate maximum trace continuation stack size. */ | ||||
| unsigned int trace_css = stack_size[PG_HITD].cssCH; | unsigned int trace_css = stack_size[PG_HITD].cssCH; | ||||
| /* This is based on the maximum of closest-hit and any-hit/intersection programs. */ | /* This is based on the maximum of closest-hit and any-hit/intersection programs. */ | ||||
| trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH); | trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH); | ||||
| trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH); | trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH); | ||||
| trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH); | trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH); | ||||
| trace_css = std::max(trace_css, stack_size[PG_HITV].cssIS + stack_size[PG_HITV].cssAH); | trace_css = std::max(trace_css, stack_size[PG_HITV].cssIS + stack_size[PG_HITV].cssAH); | ||||
| trace_css = std::max(trace_css, | trace_css = std::max(trace_css, | ||||
| stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH); | stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH); | ||||
| trace_css = std::max(trace_css, | trace_css = std::max(trace_css, | ||||
| stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH); | stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH); | ||||
| trace_css = std::max( | trace_css = std::max( | ||||
| trace_css, stack_size[PG_HITD_POINTCLOUD].cssIS + stack_size[PG_HITD_POINTCLOUD].cssAH); | trace_css, stack_size[PG_HITD_POINTCLOUD].cssIS + stack_size[PG_HITD_POINTCLOUD].cssAH); | ||||
| trace_css = std::max( | trace_css = std::max( | ||||
| trace_css, stack_size[PG_HITS_POINTCLOUD].cssIS + stack_size[PG_HITS_POINTCLOUD].cssAH); | trace_css, stack_size[PG_HITS_POINTCLOUD].cssIS + stack_size[PG_HITS_POINTCLOUD].cssAH); | ||||
| OptixPipelineLinkOptions link_options = {}; | OptixPipelineLinkOptions link_options = {}; | ||||
| link_options.maxTraceDepth = 1; | link_options.maxTraceDepth = 1; | ||||
| link_options.debugLevel = module_options.debugLevel; | |||||
| if (DebugFlags().optix.use_debug) { | if (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE) && !use_osl) { | ||||
| link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; | /* Create shader raytracing and MNEE pipeline. */ | ||||
| } | |||||
| else { | |||||
| link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE; | |||||
| } | |||||
| if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) { | |||||
| /* Create shader raytracing pipeline. */ | |||||
| vector<OptixProgramGroup> pipeline_groups; | vector<OptixProgramGroup> pipeline_groups; | ||||
| pipeline_groups.reserve(NUM_PROGRAM_GROUPS); | pipeline_groups.reserve(NUM_PROGRAM_GROUPS); | ||||
| if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) { | |||||
| pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_RAYTRACE]); | pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_RAYTRACE]); | ||||
| pipeline_groups.push_back(groups[PG_CALL_SVM_AO]); | |||||
| pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]); | |||||
| } | |||||
| if (kernel_features & KERNEL_FEATURE_MNEE) { | |||||
| pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]); | |||||
| } | |||||
| pipeline_groups.push_back(groups[PG_MISS]); | pipeline_groups.push_back(groups[PG_MISS]); | ||||
| pipeline_groups.push_back(groups[PG_HITD]); | pipeline_groups.push_back(groups[PG_HITD]); | ||||
| pipeline_groups.push_back(groups[PG_HITS]); | pipeline_groups.push_back(groups[PG_HITS]); | ||||
| pipeline_groups.push_back(groups[PG_HITL]); | pipeline_groups.push_back(groups[PG_HITL]); | ||||
| pipeline_groups.push_back(groups[PG_HITV]); | pipeline_groups.push_back(groups[PG_HITV]); | ||||
| if (motion_blur) { | if (pipeline_options.usesMotionBlur) { | ||||
| pipeline_groups.push_back(groups[PG_HITD_MOTION]); | pipeline_groups.push_back(groups[PG_HITD_MOTION]); | ||||
| pipeline_groups.push_back(groups[PG_HITS_MOTION]); | pipeline_groups.push_back(groups[PG_HITS_MOTION]); | ||||
| } | } | ||||
| if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { | if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { | ||||
| pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]); | pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]); | ||||
| pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]); | pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]); | ||||
| } | } | ||||
| pipeline_groups.push_back(groups[PG_CALL_SVM_AO]); | |||||
| pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]); | |||||
| optix_assert(optixPipelineCreate(context, | optix_assert(optixPipelineCreate(context, | ||||
| &pipeline_options, | &pipeline_options, | ||||
| &link_options, | &link_options, | ||||
| pipeline_groups.data(), | pipeline_groups.data(), | ||||
| pipeline_groups.size(), | pipeline_groups.size(), | ||||
| nullptr, | nullptr, | ||||
| 0, | 0, | ||||
| &pipelines[PIP_SHADE_RAYTRACE])); | &pipelines[PIP_SHADE])); | ||||
| /* Combine ray generation and trace continuation stack size. */ | /* Combine ray generation and trace continuation stack size. */ | ||||
| const unsigned int css = stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG + | const unsigned int css = std::max(stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG, | ||||
| stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG) + | |||||
| link_options.maxTraceDepth * trace_css; | link_options.maxTraceDepth * trace_css; | ||||
| const unsigned int dss = std::max(stack_size[PG_CALL_SVM_AO].dssDC, | const unsigned int dss = std::max(stack_size[PG_CALL_SVM_AO].dssDC, | ||||
| stack_size[PG_CALL_SVM_BEVEL].dssDC); | stack_size[PG_CALL_SVM_BEVEL].dssDC); | ||||
| /* Set stack size depending on pipeline options. */ | /* Set stack size depending on pipeline options. */ | ||||
| optix_assert(optixPipelineSetStackSize( | optix_assert(optixPipelineSetStackSize( | ||||
| pipelines[PIP_SHADE_RAYTRACE], 0, dss, css, motion_blur ? 3 : 2)); | pipelines[PIP_SHADE], 0, dss, css, pipeline_options.usesMotionBlur ? 3 : 2)); | ||||
| } | } | ||||
| if (kernel_features & KERNEL_FEATURE_MNEE) { | { /* Create intersection-only pipeline. */ | ||||
| /* Create MNEE pipeline. */ | |||||
| vector<OptixProgramGroup> pipeline_groups; | vector<OptixProgramGroup> pipeline_groups; | ||||
| pipeline_groups.reserve(NUM_PROGRAM_GROUPS); | pipeline_groups.reserve(NUM_PROGRAM_GROUPS); | ||||
| pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]); | pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_CLOSEST]); | ||||
| pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SHADOW]); | |||||
| pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SUBSURFACE]); | |||||
| pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_VOLUME_STACK]); | |||||
| pipeline_groups.push_back(groups[PG_MISS]); | pipeline_groups.push_back(groups[PG_MISS]); | ||||
| pipeline_groups.push_back(groups[PG_HITD]); | pipeline_groups.push_back(groups[PG_HITD]); | ||||
| pipeline_groups.push_back(groups[PG_HITS]); | pipeline_groups.push_back(groups[PG_HITS]); | ||||
| pipeline_groups.push_back(groups[PG_HITL]); | pipeline_groups.push_back(groups[PG_HITL]); | ||||
| pipeline_groups.push_back(groups[PG_HITV]); | pipeline_groups.push_back(groups[PG_HITV]); | ||||
| if (motion_blur) { | if (pipeline_options.usesMotionBlur) { | ||||
| pipeline_groups.push_back(groups[PG_HITD_MOTION]); | pipeline_groups.push_back(groups[PG_HITD_MOTION]); | ||||
| pipeline_groups.push_back(groups[PG_HITS_MOTION]); | pipeline_groups.push_back(groups[PG_HITS_MOTION]); | ||||
| } | } | ||||
| if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { | if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { | ||||
| pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]); | pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]); | ||||
| pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]); | pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]); | ||||
| } | } | ||||
| pipeline_groups.push_back(groups[PG_CALL_SVM_AO]); | |||||
| pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]); | |||||
| optix_assert(optixPipelineCreate(context, | optix_assert(optixPipelineCreate(context, | ||||
| &pipeline_options, | &pipeline_options, | ||||
| &link_options, | &link_options, | ||||
| pipeline_groups.data(), | pipeline_groups.data(), | ||||
| pipeline_groups.size(), | pipeline_groups.size(), | ||||
| nullptr, | nullptr, | ||||
| 0, | 0, | ||||
| &pipelines[PIP_SHADE_MNEE])); | &pipelines[PIP_INTERSECT])); | ||||
| /* Combine ray generation and trace continuation stack size. */ | /* Calculate continuation stack size based on the maximum of all ray generation stack sizes. */ | ||||
| const unsigned int css = stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG + | const unsigned int css = | ||||
| std::max(stack_size[PG_RGEN_INTERSECT_CLOSEST].cssRG, | |||||
| std::max(stack_size[PG_RGEN_INTERSECT_SHADOW].cssRG, | |||||
| std::max(stack_size[PG_RGEN_INTERSECT_SUBSURFACE].cssRG, | |||||
| stack_size[PG_RGEN_INTERSECT_VOLUME_STACK].cssRG))) + | |||||
| link_options.maxTraceDepth * trace_css; | link_options.maxTraceDepth * trace_css; | ||||
| const unsigned int dss = 0; | |||||
| /* Set stack size depending on pipeline options. */ | optix_assert(optixPipelineSetStackSize( | ||||
| optix_assert( | pipelines[PIP_INTERSECT], 0, 0, css, pipeline_options.usesMotionBlur ? 3 : 2)); | ||||
| optixPipelineSetStackSize(pipelines[PIP_SHADE_MNEE], 0, dss, css, motion_blur ? 3 : 2)); | |||||
| } | } | ||||
| { /* Create intersection-only pipeline. */ | return !have_error(); | ||||
| } | |||||
| bool OptiXDevice::load_osl_kernels() | |||||
| { | |||||
| # ifdef WITH_OSL | |||||
| if (have_error()) { | |||||
| return false; | |||||
| } | |||||
| struct OSLKernel { | |||||
| string ptx; | |||||
| string init_entry; | |||||
| string exec_entry; | |||||
| }; | |||||
| /* This has to be in the same order as the ShaderType enum, so that the index calculation in | |||||
| * osl_eval_nodes checks out */ | |||||
| vector<OSLKernel> osl_kernels; | |||||
| for (ShaderType type = SHADER_TYPE_SURFACE; type <= SHADER_TYPE_BUMP; | |||||
| type = static_cast<ShaderType>(type + 1)) { | |||||
| const vector<OSL::ShaderGroupRef> &groups = (type == SHADER_TYPE_SURFACE ? | |||||
| osl_globals.surface_state : | |||||
| type == SHADER_TYPE_VOLUME ? | |||||
| osl_globals.volume_state : | |||||
| type == SHADER_TYPE_DISPLACEMENT ? | |||||
| osl_globals.displacement_state : | |||||
| osl_globals.bump_state); | |||||
| for (const OSL::ShaderGroupRef &group : groups) { | |||||
| if (group) { | |||||
| string osl_ptx, init_name, entry_name; | |||||
| osl_globals.ss->getattribute(group.get(), "group_init_name", init_name); | |||||
| osl_globals.ss->getattribute(group.get(), "group_entry_name", entry_name); | |||||
| osl_globals.ss->getattribute( | |||||
| group.get(), "ptx_compiled_version", OSL::TypeDesc::PTR, &osl_ptx); | |||||
| int groupdata_size = 0; | |||||
| osl_globals.ss->getattribute(group.get(), "groupdata_size", groupdata_size); | |||||
| if (groupdata_size > 2048) { /* See 'group_data' array in kernel/osl/osl.h */ | |||||
| set_error( | |||||
| string_printf("Requested OSL group data size (%d) is greater than the maximum " | |||||
| "supported with OptiX (2048)", | |||||
| groupdata_size)); | |||||
| return false; | |||||
| } | |||||
| osl_kernels.push_back({std::move(osl_ptx), std::move(init_name), std::move(entry_name)}); | |||||
| } | |||||
| else { | |||||
| /* Add empty entry for non-existent shader groups, so that the index stays stable. */ | |||||
| osl_kernels.emplace_back(); | |||||
| } | |||||
| } | |||||
| } | |||||
| const CUDAContextScope scope(this); | |||||
| if (pipelines[PIP_SHADE]) { | |||||
| optixPipelineDestroy(pipelines[PIP_SHADE]); | |||||
| } | |||||
| for (OptixModule &module : osl_modules) { | |||||
| if (module != NULL) { | |||||
| optixModuleDestroy(module); | |||||
| module = NULL; | |||||
| } | |||||
| } | |||||
| for (OptixProgramGroup &group : osl_groups) { | |||||
| if (group != NULL) { | |||||
| optixProgramGroupDestroy(group); | |||||
| group = NULL; | |||||
| } | |||||
| } | |||||
| OptixProgramGroupOptions group_options = {}; /* There are no options currently. */ | |||||
| OptixModuleCompileOptions module_options = {}; | |||||
| module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3; | |||||
| module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE; | |||||
| osl_groups.resize(osl_kernels.size() * 2 + 1); | |||||
| osl_modules.resize(osl_kernels.size() + 1); | |||||
| { /* Load and compile PTX module with OSL services. */ | |||||
| string ptx_data, ptx_filename = path_get("lib/kernel_optix_osl_services.ptx"); | |||||
| if (!path_read_text(ptx_filename, ptx_data)) { | |||||
| set_error(string_printf("Failed to load OptiX OSL services kernel from '%s'", | |||||
| ptx_filename.c_str())); | |||||
| return false; | |||||
| } | |||||
| const OptixResult result = optixModuleCreateFromPTX(context, | |||||
| &module_options, | |||||
| &pipeline_options, | |||||
| ptx_data.data(), | |||||
| ptx_data.size(), | |||||
| nullptr, | |||||
| 0, | |||||
| &osl_modules.back()); | |||||
| if (result != OPTIX_SUCCESS) { | |||||
| set_error(string_printf("Failed to load OptiX OSL services kernel from '%s' (%s)", | |||||
| ptx_filename.c_str(), | |||||
| optixGetErrorName(result))); | |||||
| return false; | |||||
| } | |||||
| OptixProgramGroupDesc group_desc = {}; | |||||
| group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; | |||||
| group_desc.callables.entryFunctionNameDC = "__direct_callable__dummy_services"; | |||||
| group_desc.callables.moduleDC = osl_modules.back(); | |||||
| optix_assert(optixProgramGroupCreate( | |||||
| context, &group_desc, 1, &group_options, nullptr, 0, &osl_groups.back())); | |||||
| } | |||||
| TaskPool pool; | |||||
| vector<OptixResult> results(osl_kernels.size(), OPTIX_SUCCESS); | |||||
| for (size_t i = 0; i < osl_kernels.size(); ++i) { | |||||
| if (osl_kernels[i].ptx.empty()) { | |||||
| continue; | |||||
| } | |||||
| # if OPTIX_ABI_VERSION >= 55 | |||||
| OptixTask task = nullptr; | |||||
| results[i] = optixModuleCreateFromPTXWithTasks(context, | |||||
| &module_options, | |||||
| &pipeline_options, | |||||
| osl_kernels[i].ptx.data(), | |||||
| osl_kernels[i].ptx.size(), | |||||
| nullptr, | |||||
| nullptr, | |||||
| &osl_modules[i], | |||||
| &task); | |||||
| if (results[i] == OPTIX_SUCCESS) { | |||||
| execute_optix_task(pool, task, results[i]); | |||||
| } | |||||
| # else | |||||
| pool.push([this, &results, i, &module_options, &osl_kernels]() { | |||||
| results[i] = optixModuleCreateFromPTX(context, | |||||
| &module_options, | |||||
| &pipeline_options, | |||||
| osl_kernels[i].ptx.data(), | |||||
| osl_kernels[i].ptx.size(), | |||||
| nullptr, | |||||
| 0, | |||||
| &osl_modules[i]); | |||||
| }); | |||||
| # endif | |||||
| } | |||||
| pool.wait_work(); | |||||
| for (size_t i = 0; i < osl_kernels.size(); ++i) { | |||||
| if (osl_kernels[i].ptx.empty()) { | |||||
| continue; | |||||
| } | |||||
| if (results[i] != OPTIX_SUCCESS) { | |||||
| set_error(string_printf("Failed to load OptiX OSL kernel for %s (%s)", | |||||
| osl_kernels[i].init_entry.c_str(), | |||||
| optixGetErrorName(results[i]))); | |||||
| return false; | |||||
| } | |||||
| OptixProgramGroupDesc group_descs[2] = {}; | |||||
| group_descs[0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; | |||||
| group_descs[0].callables.entryFunctionNameDC = osl_kernels[i].init_entry.c_str(); | |||||
| group_descs[0].callables.moduleDC = osl_modules[i]; | |||||
| group_descs[1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; | |||||
| group_descs[1].callables.entryFunctionNameDC = osl_kernels[i].exec_entry.c_str(); | |||||
| group_descs[1].callables.moduleDC = osl_modules[i]; | |||||
| optix_assert(optixProgramGroupCreate( | |||||
| context, group_descs, 2, &group_options, nullptr, 0, &osl_groups[i * 2])); | |||||
| } | |||||
| vector<OptixStackSizes> osl_stack_size(osl_groups.size()); | |||||
| /* Update SBT with new entries. */ | |||||
| sbt_data.alloc(NUM_PROGRAM_GROUPS + osl_groups.size()); | |||||
| for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) { | |||||
| optix_assert(optixSbtRecordPackHeader(groups[i], &sbt_data[i])); | |||||
| } | |||||
| for (size_t i = 0; i < osl_groups.size(); ++i) { | |||||
| if (osl_groups[i] != NULL) { | |||||
| optix_assert(optixSbtRecordPackHeader(osl_groups[i], &sbt_data[NUM_PROGRAM_GROUPS + i])); | |||||
| optix_assert(optixProgramGroupGetStackSize(osl_groups[i], &osl_stack_size[i])); | |||||
| } | |||||
| } | |||||
| sbt_data.copy_to_device(); /* Upload updated SBT to device. */ | |||||
| OptixPipelineLinkOptions link_options = {}; | |||||
| link_options.maxTraceDepth = 0; | |||||
| link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE; | |||||
| { | |||||
| vector<OptixProgramGroup> pipeline_groups; | vector<OptixProgramGroup> pipeline_groups; | ||||
| pipeline_groups.reserve(NUM_PROGRAM_GROUPS); | pipeline_groups.reserve(NUM_PROGRAM_GROUPS); | ||||
| pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_CLOSEST]); | pipeline_groups.push_back(groups[PG_RGEN_SHADE_BACKGROUND]); | ||||
| pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SHADOW]); | pipeline_groups.push_back(groups[PG_RGEN_SHADE_LIGHT]); | ||||
| pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SUBSURFACE]); | pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE]); | ||||
| pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_VOLUME_STACK]); | pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_RAYTRACE]); | ||||
| pipeline_groups.push_back(groups[PG_MISS]); | pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]); | ||||
| pipeline_groups.push_back(groups[PG_HITD]); | pipeline_groups.push_back(groups[PG_RGEN_SHADE_VOLUME]); | ||||
| pipeline_groups.push_back(groups[PG_HITS]); | pipeline_groups.push_back(groups[PG_RGEN_SHADE_SHADOW]); | ||||
| pipeline_groups.push_back(groups[PG_HITL]); | pipeline_groups.push_back(groups[PG_RGEN_EVAL_DISPLACE]); | ||||
| pipeline_groups.push_back(groups[PG_HITV]); | pipeline_groups.push_back(groups[PG_RGEN_EVAL_BACKGROUND]); | ||||
| if (motion_blur) { | pipeline_groups.push_back(groups[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY]); | ||||
| pipeline_groups.push_back(groups[PG_HITD_MOTION]); | |||||
| pipeline_groups.push_back(groups[PG_HITS_MOTION]); | for (const OptixProgramGroup &group : osl_groups) { | ||||
| if (group != NULL) { | |||||
| pipeline_groups.push_back(group); | |||||
| } | } | ||||
| if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { | |||||
| pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]); | |||||
| pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]); | |||||
| } | } | ||||
| optix_assert(optixPipelineCreate(context, | optix_assert(optixPipelineCreate(context, | ||||
| &pipeline_options, | &pipeline_options, | ||||
| &link_options, | &link_options, | ||||
| pipeline_groups.data(), | pipeline_groups.data(), | ||||
| pipeline_groups.size(), | pipeline_groups.size(), | ||||
| nullptr, | nullptr, | ||||
| 0, | 0, | ||||
| &pipelines[PIP_INTERSECT])); | &pipelines[PIP_SHADE])); | ||||
| /* Calculate continuation stack size based on the maximum of all ray generation stack sizes. */ | unsigned int dss = 0; | ||||
| const unsigned int css = | for (unsigned int i = 0; i < osl_stack_size.size(); ++i) { | ||||
| std::max(stack_size[PG_RGEN_INTERSECT_CLOSEST].cssRG, | dss = std::max(dss, osl_stack_size[i].dssDC); | ||||
| std::max(stack_size[PG_RGEN_INTERSECT_SHADOW].cssRG, | } | ||||
| std::max(stack_size[PG_RGEN_INTERSECT_SUBSURFACE].cssRG, | |||||
| stack_size[PG_RGEN_INTERSECT_VOLUME_STACK].cssRG))) + | |||||
| link_options.maxTraceDepth * trace_css; | |||||
| optix_assert( | optix_assert(optixPipelineSetStackSize( | ||||
| optixPipelineSetStackSize(pipelines[PIP_INTERSECT], 0, 0, css, motion_blur ? 3 : 2)); | pipelines[PIP_SHADE], 0, dss, 0, pipeline_options.usesMotionBlur ? 3 : 2)); | ||||
| } | } | ||||
| /* Clean up program group objects. */ | return !have_error(); | ||||
| for (unsigned int i = 0; i < NUM_PROGRAM_GROUPS; ++i) { | # else | ||||
| optixProgramGroupDestroy(groups[i]); | return false; | ||||
| # endif | |||||
| } | } | ||||
| return true; | void *OptiXDevice::get_cpu_osl_memory() | ||||
| { | |||||
| # ifdef WITH_OSL | |||||
| return &osl_globals; | |||||
| # else | |||||
| return NULL; | |||||
| # endif | |||||
| } | } | ||||
| /* -------------------------------------------------------------------- | /* -------------------------------------------------------------------- | ||||
| * Buffer denoising. | * Buffer denoising. | ||||
| */ | */ | ||||
| class OptiXDevice::DenoiseContext { | class OptiXDevice::DenoiseContext { | ||||
| public: | public: | ||||
| ▲ Show 20 Lines • Show All 710 Lines • ▼ Show 20 Lines | if (geom->geometry_type == Geometry::HAIR) { | ||||
| if (hair->num_curves() == 0) { | if (hair->num_curves() == 0) { | ||||
| return; | return; | ||||
| } | } | ||||
| const size_t num_segments = hair->num_segments(); | const size_t num_segments = hair->num_segments(); | ||||
| size_t num_motion_steps = 1; | size_t num_motion_steps = 1; | ||||
| Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); | Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); | ||||
| if (motion_blur && hair->get_use_motion_blur() && motion_keys) { | if (pipeline_options.usesMotionBlur && hair->get_use_motion_blur() && motion_keys) { | ||||
| num_motion_steps = hair->get_motion_steps(); | num_motion_steps = hair->get_motion_steps(); | ||||
| } | } | ||||
| device_vector<OptixAabb> aabb_data(this, "optix temp aabb data", MEM_READ_ONLY); | device_vector<OptixAabb> aabb_data(this, "optix temp aabb data", MEM_READ_ONLY); | ||||
| device_vector<int> index_data(this, "optix temp index data", MEM_READ_ONLY); | device_vector<int> index_data(this, "optix temp index data", MEM_READ_ONLY); | ||||
| device_vector<float4> vertex_data(this, "optix temp vertex data", MEM_READ_ONLY); | device_vector<float4> vertex_data(this, "optix temp vertex data", MEM_READ_ONLY); | ||||
| /* Four control points for each curve segment. */ | /* Four control points for each curve segment. */ | ||||
| const size_t num_vertices = num_segments * 4; | const size_t num_vertices = num_segments * 4; | ||||
| ▲ Show 20 Lines • Show All 137 Lines • ▼ Show 20 Lines | else if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) { | ||||
| if (mesh->num_triangles() == 0) { | if (mesh->num_triangles() == 0) { | ||||
| return; | return; | ||||
| } | } | ||||
| const size_t num_verts = mesh->get_verts().size(); | const size_t num_verts = mesh->get_verts().size(); | ||||
| size_t num_motion_steps = 1; | size_t num_motion_steps = 1; | ||||
| Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); | Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); | ||||
| if (motion_blur && mesh->get_use_motion_blur() && motion_keys) { | if (pipeline_options.usesMotionBlur && mesh->get_use_motion_blur() && motion_keys) { | ||||
| num_motion_steps = mesh->get_motion_steps(); | num_motion_steps = mesh->get_motion_steps(); | ||||
| } | } | ||||
| device_vector<int> index_data(this, "optix temp index data", MEM_READ_ONLY); | device_vector<int> index_data(this, "optix temp index data", MEM_READ_ONLY); | ||||
| index_data.alloc(mesh->get_triangles().size()); | index_data.alloc(mesh->get_triangles().size()); | ||||
| memcpy(index_data.data(), | memcpy(index_data.data(), | ||||
| mesh->get_triangles().data(), | mesh->get_triangles().data(), | ||||
| mesh->get_triangles().size() * sizeof(int)); | mesh->get_triangles().size() * sizeof(int)); | ||||
| ▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | else if (geom->geometry_type == Geometry::POINTCLOUD) { | ||||
| PointCloud *const pointcloud = static_cast<PointCloud *const>(geom); | PointCloud *const pointcloud = static_cast<PointCloud *const>(geom); | ||||
| const size_t num_points = pointcloud->num_points(); | const size_t num_points = pointcloud->num_points(); | ||||
| if (num_points == 0) { | if (num_points == 0) { | ||||
| return; | return; | ||||
| } | } | ||||
| size_t num_motion_steps = 1; | size_t num_motion_steps = 1; | ||||
| Attribute *motion_points = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); | Attribute *motion_points = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); | ||||
| if (motion_blur && pointcloud->get_use_motion_blur() && motion_points) { | if (pipeline_options.usesMotionBlur && pointcloud->get_use_motion_blur() && motion_points) { | ||||
| num_motion_steps = pointcloud->get_motion_steps(); | num_motion_steps = pointcloud->get_motion_steps(); | ||||
| } | } | ||||
| device_vector<OptixAabb> aabb_data(this, "optix temp aabb data", MEM_READ_ONLY); | device_vector<OptixAabb> aabb_data(this, "optix temp aabb data", MEM_READ_ONLY); | ||||
| aabb_data.alloc(num_points * num_motion_steps); | aabb_data.alloc(num_points * num_motion_steps); | ||||
| /* Get AABBs for each motion step. */ | /* Get AABBs for each motion step. */ | ||||
| for (size_t step = 0; step < num_motion_steps; ++step) { | for (size_t step = 0; step < num_motion_steps; ++step) { | ||||
| ▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines | else { | ||||
| } | } | ||||
| /* Fill instance descriptions. */ | /* Fill instance descriptions. */ | ||||
| device_vector<OptixInstance> instances(this, "optix tlas instances", MEM_READ_ONLY); | device_vector<OptixInstance> instances(this, "optix tlas instances", MEM_READ_ONLY); | ||||
| instances.alloc(bvh->objects.size()); | instances.alloc(bvh->objects.size()); | ||||
| /* Calculate total motion transform size and allocate memory for them. */ | /* Calculate total motion transform size and allocate memory for them. */ | ||||
| size_t motion_transform_offset = 0; | size_t motion_transform_offset = 0; | ||||
| if (motion_blur) { | if (pipeline_options.usesMotionBlur) { | ||||
| size_t total_motion_transform_size = 0; | size_t total_motion_transform_size = 0; | ||||
| for (Object *const ob : bvh->objects) { | for (Object *const ob : bvh->objects) { | ||||
| if (ob->is_traceable() && ob->use_motion()) { | if (ob->is_traceable() && ob->use_motion()) { | ||||
| total_motion_transform_size = align_up(total_motion_transform_size, | total_motion_transform_size = align_up(total_motion_transform_size, | ||||
| OPTIX_TRANSFORM_BYTE_ALIGNMENT); | OPTIX_TRANSFORM_BYTE_ALIGNMENT); | ||||
| const size_t motion_keys = max(ob->get_motion().size(), (size_t)2) - 2; | const size_t motion_keys = max(ob->get_motion().size(), (size_t)2) - 2; | ||||
| total_motion_transform_size = total_motion_transform_size + | total_motion_transform_size = total_motion_transform_size + | ||||
| sizeof(OptixSRTMotionTransform) + | sizeof(OptixSRTMotionTransform) + | ||||
| Show All 34 Lines | for (Object *ob : bvh->objects) { | ||||
| /* Have to have at least one bit in the mask, or else instance would always be culled. */ | /* Have to have at least one bit in the mask, or else instance would always be culled. */ | ||||
| if (0 == instance.visibilityMask) { | if (0 == instance.visibilityMask) { | ||||
| instance.visibilityMask = 0xFF; | instance.visibilityMask = 0xFF; | ||||
| } | } | ||||
| if (ob->get_geometry()->geometry_type == Geometry::HAIR && | if (ob->get_geometry()->geometry_type == Geometry::HAIR && | ||||
| static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) { | static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) { | ||||
| if (motion_blur && ob->get_geometry()->has_motion_blur()) { | if (pipeline_options.usesMotionBlur && ob->get_geometry()->has_motion_blur()) { | ||||
| /* Select between motion blur and non-motion blur built-in intersection module. */ | /* Select between motion blur and non-motion blur built-in intersection module. */ | ||||
| instance.sbtOffset = PG_HITD_MOTION - PG_HITD; | instance.sbtOffset = PG_HITD_MOTION - PG_HITD; | ||||
| } | } | ||||
| } | } | ||||
| else if (ob->get_geometry()->geometry_type == Geometry::POINTCLOUD) { | else if (ob->get_geometry()->geometry_type == Geometry::POINTCLOUD) { | ||||
| /* Use the hit group that has an intersection program for point clouds. */ | /* Use the hit group that has an intersection program for point clouds. */ | ||||
| instance.sbtOffset = PG_HITD_POINTCLOUD - PG_HITD; | instance.sbtOffset = PG_HITD_POINTCLOUD - PG_HITD; | ||||
| Show All 11 Lines | # endif | ||||
| * | * | ||||
| * It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit | * It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit | ||||
| * programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT. | * programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT. | ||||
| */ | */ | ||||
| instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT; | instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT; | ||||
| } | } | ||||
| /* Insert motion traversable if object has motion. */ | /* Insert motion traversable if object has motion. */ | ||||
| if (motion_blur && ob->use_motion()) { | if (pipeline_options.usesMotionBlur && ob->use_motion()) { | ||||
| size_t motion_keys = max(ob->get_motion().size(), (size_t)2) - 2; | size_t motion_keys = max(ob->get_motion().size(), (size_t)2) - 2; | ||||
| size_t motion_transform_size = sizeof(OptixSRTMotionTransform) + | size_t motion_transform_size = sizeof(OptixSRTMotionTransform) + | ||||
| motion_keys * sizeof(OptixSRTData); | motion_keys * sizeof(OptixSRTData); | ||||
| const CUDAContextScope scope(this); | const CUDAContextScope scope(this); | ||||
| motion_transform_offset = align_up(motion_transform_offset, | motion_transform_offset = align_up(motion_transform_offset, | ||||
| OPTIX_TRANSFORM_BYTE_ALIGNMENT); | OPTIX_TRANSFORM_BYTE_ALIGNMENT); | ||||
| ▲ Show 20 Lines • Show All 141 Lines • Show Last 20 Lines | |||||