Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/opencl/opencl_split.cpp
| Show All 35 Lines | texture_slot_t(const string& name, int slot) | ||||
| : name(name), | : name(name), | ||||
| slot(slot) { | slot(slot) { | ||||
| } | } | ||||
| string name; | string name; | ||||
| int slot; | int slot; | ||||
| }; | }; | ||||
| static const string fast_compiled_kernels = | static const string fast_compiled_kernels = | ||||
| "data_init " | |||||
| "path_init " | "path_init " | ||||
| "state_buffer_size " | |||||
| "scene_intersect " | "scene_intersect " | ||||
| "queue_enqueue " | "queue_enqueue " | ||||
| "shader_setup " | "shader_setup " | ||||
| "shader_sort " | "shader_sort " | ||||
| "enqueue_inactive " | "enqueue_inactive " | ||||
| "next_iteration_setup " | "next_iteration_setup " | ||||
| "indirect_subsurface " | "indirect_subsurface " | ||||
| "buffer_update"; | "buffer_update"; | ||||
| const string OpenCLDevice::get_opencl_program_name(bool single_program, const string& kernel_name) | const string OpenCLDevice::get_opencl_program_name(bool single_program, const string& kernel_name) | ||||
| { | { | ||||
| if (single_program) { | if (single_program) { | ||||
| return "split"; | return "split"; | ||||
jbakker: TODO: remove this program. It is bundled in the split_bundle. | |||||
| } | } | ||||
| else { | else { | ||||
| if (fast_compiled_kernels.find(kernel_name) != std::string::npos) { | if (fast_compiled_kernels.find(kernel_name) != std::string::npos) { | ||||
| return "split_bundle"; | return "split_bundle"; | ||||
| } | } | ||||
| else { | else { | ||||
| return "split_" + kernel_name; | return "split_" + kernel_name; | ||||
| } | } | ||||
| Show All 10 Lines | if (fast_compiled_kernels.find(kernel_name) != std::string::npos) { | ||||
| return "kernel_split_bundle.cl"; | return "kernel_split_bundle.cl"; | ||||
| } | } | ||||
| else { | else { | ||||
| return "kernel_" + kernel_name + ".cl"; | return "kernel_" + kernel_name + ".cl"; | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_features) | string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name) | ||||
| { | { | ||||
| string build_options = "-D__SPLIT_KERNEL__ "; | /* first check for non-split kernel programs */ | ||||
| build_options += requested_features.get_build_options(); | if (opencl_program_name == "base" || opencl_program_name == "denoising") { | ||||
| return ""; | |||||
| } | |||||
| else if (opencl_program_name == "bake") { | |||||
| /* Note: get_build_options for bake is only requested when baking is enabled. | |||||
| displace and background are always requested. | |||||
| `__SPLIT_KERNEL__` must not be present in the compile directives for bake */ | |||||
| DeviceRequestedFeatures features(requested_features); | |||||
| features.use_denoising = false; | |||||
| features.use_object_motion = false; | |||||
| features.use_camera_motion = false; | |||||
| return features.get_build_options(); | |||||
| } | |||||
| else if (opencl_program_name == "displace") { | |||||
| /* As displacement does not use any nodes from the Shading group (eg BSDF). | |||||
| We disable all features that are related to shading. */ | |||||
| DeviceRequestedFeatures features(requested_features); | |||||
| features.use_denoising = false; | |||||
| features.use_object_motion = false; | |||||
| features.use_camera_motion = false; | |||||
| features.use_baking = false; | |||||
| features.use_transparent = false; | |||||
| features.use_shadow_tricks = false; | |||||
| features.use_subsurface = false; | |||||
| features.use_volume = false; | |||||
| features.nodes_features &= ~NODE_FEATURE_VOLUME; | |||||
Done Inline ActionsWhy not &= ~NODE_FEATURE_VOLUME? brecht: Why not `&= ~NODE_FEATURE_VOLUME`? | |||||
| features.use_denoising = false; | |||||
| features.use_principled = false; | |||||
| return features.get_build_options(); | |||||
| } | |||||
| else if (opencl_program_name == "background") { | |||||
| /* Background uses Background shading | |||||
| It is save to disable shadow features, subsurface and volumetric. */ | |||||
| DeviceRequestedFeatures features(requested_features); | |||||
| features.use_baking = false; | |||||
| features.use_transparent = false; | |||||
| features.use_shadow_tricks = false; | |||||
| features.use_denoising = false; | |||||
| /* NOTE: currently possible to use surface nodes like `Hair Info`, `Bump` node. | |||||
| Perhaps we should remove them in UI as it does not make any sense when | |||||
Done Inline ActionsIs this meant to be &= ~NODE_FEATURE_VOLUME? brecht: Is this meant to be `&= ~NODE_FEATURE_VOLUME`? | |||||
Done Inline ActionsMy hypothesis was that background didn't had any surface data so didn't need any surface based features like:
I left NODE_FEATURE_VOLUME for to the point density texture. But even that one needs a surface. Going into more detail it is possible to use NODE_FEATURE_BUMP and NODE_FEATURE_BUMP_STATE and NODE_FEATURE_HAIR info in the background, expect that they won't do much... jbakker: My hypothesis was that background didn't had any surface data so didn't need any surface based… | |||||
| rendering background. */ | |||||
| features.nodes_features &= ~NODE_FEATURE_VOLUME; | |||||
| features.use_subsurface = false; | |||||
| features.use_volume = false; | |||||
| return features.get_build_options(); | |||||
| } | |||||
| string build_options = "-D__SPLIT_KERNEL__ "; | |||||
| DeviceRequestedFeatures nofeatures; | |||||
| /* Set compute device build option. */ | /* Set compute device build option. */ | ||||
| cl_device_type device_type; | cl_device_type device_type; | ||||
| OpenCLInfo::get_device_type(this->cdDevice, &device_type, &this->ciErr); | OpenCLInfo::get_device_type(this->cdDevice, &device_type, &this->ciErr); | ||||
| assert(this->ciErr == CL_SUCCESS); | assert(this->ciErr == CL_SUCCESS); | ||||
| if(device_type == CL_DEVICE_TYPE_GPU) { | if(device_type == CL_DEVICE_TYPE_GPU) { | ||||
| build_options += " -D__COMPUTE_DEVICE_GPU__"; | build_options += "-D__COMPUTE_DEVICE_GPU__ "; | ||||
| } | } | ||||
| return build_options; | /* Add program specific optimized compile directives */ | ||||
| if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) { | |||||
| build_options += nofeatures.get_build_options(); | |||||
| } | |||||
| else if (opencl_program_name == "split_subsurface_scatter" && !requested_features.use_subsurface) { | |||||
| /* When subsurface is off, the kernel updates indexes and does not need any | |||||
| Compile directives */ | |||||
| build_options += nofeatures.get_build_options(); | |||||
| } | } | ||||
| else { | |||||
| DeviceRequestedFeatures features(requested_features); | |||||
| string OpenCLDevice::get_build_options_for_bake(const DeviceRequestedFeatures& requested_features) | /* Always turn off baking at this point. Baking is only usefull when building the bake kernel. | ||||
| { | this also makes sure that the kernels that are build during baking can be reused | ||||
| return requested_features.get_build_options(); | when not doing any baking. */ | ||||
| features.use_baking = false; | |||||
| /* Do not vary on shaders when program doesn't do any shading. | |||||
| We have bundled them in a single program. */ | |||||
| if (opencl_program_name == "split_bundle") { | |||||
| features.max_nodes_group = 0; | |||||
| features.nodes_features = 0; | |||||
| } | |||||
| /* No specific settings, just add the regular ones */ | |||||
| build_options += features.get_build_options(); | |||||
Done Inline ActionsTODO: only check on split_bundle jbakker: TODO: only check on split_bundle | |||||
| } | |||||
| return build_options; | |||||
| } | } | ||||
| namespace { | namespace { | ||||
| /* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to | /* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to | ||||
| * fetch its size. | * fetch its size. | ||||
| */ | */ | ||||
| typedef struct KernelGlobalsDummy { | typedef struct KernelGlobalsDummy { | ||||
| ▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines | public: | ||||
| } | } | ||||
| virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name, | virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name, | ||||
| const DeviceRequestedFeatures& requested_features) | const DeviceRequestedFeatures& requested_features) | ||||
| { | { | ||||
| OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device, cached_memory); | OpenCLSplitKernelFunction* kernel = new OpenCLSplitKernelFunction(device, cached_memory); | ||||
| bool single_program = OpenCLInfo::use_single_program(); | bool single_program = OpenCLInfo::use_single_program(); | ||||
| const string program_name = device->get_opencl_program_name(single_program, kernel_name); | |||||
| kernel->program = | kernel->program = | ||||
| OpenCLDevice::OpenCLProgram(device, | OpenCLDevice::OpenCLProgram(device, | ||||
| device->get_opencl_program_name(single_program, kernel_name), | program_name, | ||||
| device->get_opencl_program_filename(single_program, kernel_name), | device->get_opencl_program_filename(single_program, kernel_name), | ||||
| device->get_build_options(requested_features)); | device->get_build_options(requested_features, program_name)); | ||||
| kernel->program.add_kernel(ustring("path_trace_" + kernel_name)); | kernel->program.add_kernel(ustring("path_trace_" + kernel_name)); | ||||
| kernel->program.load(); | kernel->program.load(); | ||||
| if(!kernel->program.is_loaded()) { | if(!kernel->program.is_loaded()) { | ||||
| delete kernel; | delete kernel; | ||||
| return NULL; | return NULL; | ||||
| } | } | ||||
| return kernel; | return kernel; | ||||
| } | } | ||||
| virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) | virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) | ||||
| { | { | ||||
| device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE); | device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE); | ||||
| size_buffer.alloc(1); | size_buffer.alloc(1); | ||||
| size_buffer.zero_to_device(); | size_buffer.zero_to_device(); | ||||
| uint threads = num_threads; | uint threads = num_threads; | ||||
| device->kernel_set_args(device->program_state_buffer_size(), 0, kg, data, threads, size_buffer); | cl_kernel kernel_state_buffer_size = device->program_split(ustring("path_trace_state_buffer_size")); | ||||
| device->kernel_set_args(kernel_state_buffer_size, 0, kg, data, threads, size_buffer); | |||||
| size_t global_size = 64; | size_t global_size = 64; | ||||
| device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, | device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, | ||||
| device->program_state_buffer_size(), | kernel_state_buffer_size, | ||||
| 1, | 1, | ||||
| NULL, | NULL, | ||||
| &global_size, | &global_size, | ||||
| NULL, | NULL, | ||||
| 0, | 0, | ||||
| NULL, | NULL, | ||||
| NULL); | NULL); | ||||
| Show All 28 Lines | virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim, | ||||
| cl_int dQueue_size = dim.global_size[0] * dim.global_size[1]; | cl_int dQueue_size = dim.global_size[0] * dim.global_size[1]; | ||||
| /* Set the range of samples to be processed for every ray in | /* Set the range of samples to be processed for every ray in | ||||
| * path-regeneration logic. | * path-regeneration logic. | ||||
| */ | */ | ||||
| cl_int start_sample = rtile.start_sample; | cl_int start_sample = rtile.start_sample; | ||||
| cl_int end_sample = rtile.start_sample + rtile.num_samples; | cl_int end_sample = rtile.start_sample + rtile.num_samples; | ||||
| cl_kernel kernel_data_init = device->program_split(ustring("path_trace_data_init")); | |||||
| cl_uint start_arg_index = | cl_uint start_arg_index = | ||||
| device->kernel_set_args(device->program_data_init(), | device->kernel_set_args(kernel_data_init, | ||||
| 0, | 0, | ||||
| kernel_globals, | kernel_globals, | ||||
| kernel_data, | kernel_data, | ||||
| split_data, | split_data, | ||||
| num_global_elements, | num_global_elements, | ||||
| ray_state); | ray_state); | ||||
| device->set_kernel_arg_buffers(device->program_data_init(), &start_arg_index); | device->set_kernel_arg_buffers(kernel_data_init, &start_arg_index); | ||||
| start_arg_index += | start_arg_index += | ||||
| device->kernel_set_args(device->program_data_init(), | device->kernel_set_args(kernel_data_init, | ||||
| start_arg_index, | start_arg_index, | ||||
| start_sample, | start_sample, | ||||
| end_sample, | end_sample, | ||||
| rtile.x, | rtile.x, | ||||
| rtile.y, | rtile.y, | ||||
| rtile.w, | rtile.w, | ||||
| rtile.h, | rtile.h, | ||||
| rtile.offset, | rtile.offset, | ||||
| rtile.stride, | rtile.stride, | ||||
| queue_index, | queue_index, | ||||
| dQueue_size, | dQueue_size, | ||||
| use_queues_flag, | use_queues_flag, | ||||
| work_pool_wgs, | work_pool_wgs, | ||||
| rtile.num_samples, | rtile.num_samples, | ||||
| rtile.buffer); | rtile.buffer); | ||||
| /* Enqueue ckPathTraceKernel_data_init kernel. */ | /* Enqueue ckPathTraceKernel_data_init kernel. */ | ||||
| device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, | device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, | ||||
| device->program_data_init(), | kernel_data_init, | ||||
| 2, | 2, | ||||
| NULL, | NULL, | ||||
| dim.global_size, | dim.global_size, | ||||
| dim.local_size, | dim.local_size, | ||||
| 0, | 0, | ||||
| NULL, | NULL, | ||||
| NULL); | NULL); | ||||
| ▲ Show 20 Lines • Show All 176 Lines • ▼ Show 20 Lines | OpenCLDevice::~OpenCLDevice() | ||||
| for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) { | for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) { | ||||
| delete mt->second; | delete mt->second; | ||||
| } | } | ||||
| base_program.release(); | base_program.release(); | ||||
| bake_program.release(); | bake_program.release(); | ||||
| displace_program.release(); | displace_program.release(); | ||||
| background_program.release(); | background_program.release(); | ||||
| program_split.release(); | |||||
| program_data_init.release(); | |||||
| if(cqCommandQueue) | if(cqCommandQueue) | ||||
| clReleaseCommandQueue(cqCommandQueue); | clReleaseCommandQueue(cqCommandQueue); | ||||
| if(cxContext) | if(cxContext) | ||||
| clReleaseContext(cxContext); | clReleaseContext(cxContext); | ||||
| delete split_kernel; | delete split_kernel; | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | if(!device_initialized) { | ||||
| fprintf(stderr, "OpenCL: failed to initialize device.\n"); | fprintf(stderr, "OpenCL: failed to initialize device.\n"); | ||||
| return false; | return false; | ||||
| } | } | ||||
| /* Verify we have right opencl version. */ | /* Verify we have right opencl version. */ | ||||
| if(!opencl_version_check()) | if(!opencl_version_check()) | ||||
| return false; | return false; | ||||
| base_program = OpenCLProgram(this, "base", "kernel_base.cl", ""); | vector<OpenCLProgram*> programs; | ||||
| base_program.add_kernel(ustring("convert_to_byte")); | displace_program = OpenCLProgram(this, "displace", "kernel_displace.cl", get_build_options(requested_features, "displace")); | ||||
| base_program.add_kernel(ustring("convert_to_half_float")); | |||||
| base_program.add_kernel(ustring("zero_buffer")); | |||||
| bake_program = OpenCLProgram(this, "bake", "kernel_bake.cl", get_build_options_for_bake(requested_features)); | |||||
| bake_program.add_kernel(ustring("bake")); | |||||
| displace_program = OpenCLProgram(this, "displace", "kernel_displace.cl", get_build_options_for_bake(requested_features)); | |||||
| displace_program.add_kernel(ustring("displace")); | displace_program.add_kernel(ustring("displace")); | ||||
| programs.push_back(&displace_program); | |||||
| background_program = OpenCLProgram(this, "background", "kernel_background.cl", get_build_options_for_bake(requested_features)); | background_program = OpenCLProgram(this, "background", "kernel_background.cl", get_build_options(requested_features, "background")); | ||||
| background_program.add_kernel(ustring("background")); | background_program.add_kernel(ustring("background")); | ||||
| denoising_program = OpenCLProgram(this, "denoising", "filter.cl", ""); | |||||
| denoising_program.add_kernel(ustring("filter_divide_shadow")); | |||||
| denoising_program.add_kernel(ustring("filter_get_feature")); | |||||
| denoising_program.add_kernel(ustring("filter_write_feature")); | |||||
| denoising_program.add_kernel(ustring("filter_detect_outliers")); | |||||
| denoising_program.add_kernel(ustring("filter_combine_halves")); | |||||
| denoising_program.add_kernel(ustring("filter_construct_transform")); | |||||
| denoising_program.add_kernel(ustring("filter_nlm_calc_difference")); | |||||
| denoising_program.add_kernel(ustring("filter_nlm_blur")); | |||||
| denoising_program.add_kernel(ustring("filter_nlm_calc_weight")); | |||||
| denoising_program.add_kernel(ustring("filter_nlm_update_output")); | |||||
| denoising_program.add_kernel(ustring("filter_nlm_normalize")); | |||||
| denoising_program.add_kernel(ustring("filter_nlm_construct_gramian")); | |||||
| denoising_program.add_kernel(ustring("filter_finalize")); | |||||
| vector<OpenCLProgram*> programs; | |||||
| programs.push_back(&bake_program); | |||||
| programs.push_back(&displace_program); | |||||
| programs.push_back(&background_program); | programs.push_back(&background_program); | ||||
| bool single_program = OpenCLInfo::use_single_program(); | bool single_program = OpenCLInfo::use_single_program(); | ||||
| program_data_init = OpenCLDevice::OpenCLProgram( | |||||
| this, | |||||
| get_opencl_program_name(single_program, "data_init"), | |||||
| get_opencl_program_filename(single_program, "data_init"), | |||||
| get_build_options(requested_features)); | |||||
| program_data_init.add_kernel(ustring("path_trace_data_init")); | |||||
| programs.push_back(&program_data_init); | |||||
| program_state_buffer_size = OpenCLDevice::OpenCLProgram( | |||||
| this, | |||||
| get_opencl_program_name(single_program, "state_buffer_size"), | |||||
| get_opencl_program_filename(single_program, "state_buffer_size"), | |||||
| get_build_options(requested_features)); | |||||
| program_state_buffer_size.add_kernel(ustring("path_trace_state_buffer_size")); | |||||
| programs.push_back(&program_state_buffer_size); | |||||
| #define ADD_SPLIT_KERNEL_SINGLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name)); | #define ADD_SPLIT_KERNEL_SINGLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name)); | ||||
| #define ADD_SPLIT_KERNEL_SPLIT_PROGRAM(kernel_name) \ | #define ADD_SPLIT_KERNEL_SPLIT_PROGRAM(kernel_name) \ | ||||
| const string program_name_##kernel_name = "split_"#kernel_name; \ | |||||
| program_##kernel_name = \ | program_##kernel_name = \ | ||||
| OpenCLDevice::OpenCLProgram(this, \ | OpenCLDevice::OpenCLProgram(this, \ | ||||
| "split_"#kernel_name, \ | program_name_##kernel_name, \ | ||||
| "kernel_"#kernel_name".cl", \ | "kernel_"#kernel_name".cl", \ | ||||
| get_build_options(requested_features)); \ | get_build_options(requested_features, program_name_##kernel_name)); \ | ||||
| program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \ | program_##kernel_name.add_kernel(ustring("path_trace_"#kernel_name)); \ | ||||
| programs.push_back(&program_##kernel_name); | programs.push_back(&program_##kernel_name); | ||||
| if (single_program) { | if (single_program) { | ||||
| program_split = OpenCLDevice::OpenCLProgram(this, | program_split = OpenCLDevice::OpenCLProgram(this, | ||||
| "split" , | "split" , | ||||
| "kernel_split.cl", | "kernel_split.cl", | ||||
| get_build_options(requested_features)); | get_build_options(requested_features, "split")); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(state_buffer_size); | |||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(data_init); | |||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(lamp_emission); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(lamp_emission); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(do_volume); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(do_volume); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_background); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_background); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_eval); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_eval); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(holdout_emission_blurring_pathtermination_ao); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(holdout_emission_blurring_pathtermination_ao); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(subsurface_scatter); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(subsurface_scatter); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(direct_lighting); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(direct_lighting); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_ao); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_ao); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_dl); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shadow_blocked_dl); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update); | ||||
| programs.push_back(&program_split); | programs.push_back(&program_split); | ||||
| } | } | ||||
| else { | else { | ||||
| /* Ordered with most complex kernels first, to reduce overall compile time. */ | /* Ordered with most complex kernels first, to reduce overall compile time. */ | ||||
| ADD_SPLIT_KERNEL_SPLIT_PROGRAM(subsurface_scatter); | ADD_SPLIT_KERNEL_SPLIT_PROGRAM(subsurface_scatter); | ||||
| if (requested_features.use_volume) { | |||||
| ADD_SPLIT_KERNEL_SPLIT_PROGRAM(do_volume); | ADD_SPLIT_KERNEL_SPLIT_PROGRAM(do_volume); | ||||
| } | |||||
| ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_dl); | ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_dl); | ||||
| ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_ao); | ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shadow_blocked_ao); | ||||
| ADD_SPLIT_KERNEL_SPLIT_PROGRAM(holdout_emission_blurring_pathtermination_ao); | ADD_SPLIT_KERNEL_SPLIT_PROGRAM(holdout_emission_blurring_pathtermination_ao); | ||||
| ADD_SPLIT_KERNEL_SPLIT_PROGRAM(lamp_emission); | ADD_SPLIT_KERNEL_SPLIT_PROGRAM(lamp_emission); | ||||
| ADD_SPLIT_KERNEL_SPLIT_PROGRAM(direct_lighting); | ADD_SPLIT_KERNEL_SPLIT_PROGRAM(direct_lighting); | ||||
| ADD_SPLIT_KERNEL_SPLIT_PROGRAM(indirect_background); | ADD_SPLIT_KERNEL_SPLIT_PROGRAM(indirect_background); | ||||
| ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shader_eval); | ADD_SPLIT_KERNEL_SPLIT_PROGRAM(shader_eval); | ||||
| /* Quick kernels bundled in a single program to reduce overhead of starting | /* Quick kernels bundled in a single program to reduce overhead of starting | ||||
| * Blender processes. */ | * Blender processes. */ | ||||
| program_split = OpenCLDevice::OpenCLProgram(this, | program_split = OpenCLDevice::OpenCLProgram(this, | ||||
| "split_bundle" , | "split_bundle" , | ||||
| "kernel_split_bundle.cl", | "kernel_split_bundle.cl", | ||||
| get_build_options(requested_features)); | get_build_options(requested_features, "split_bundle")); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(data_init); | |||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(state_buffer_size); | |||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(path_init); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(scene_intersect); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(queue_enqueue); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_setup); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(shader_sort); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(enqueue_inactive); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(next_iteration_setup); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(indirect_subsurface); | ||||
| ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update); | ADD_SPLIT_KERNEL_SINGLE_PROGRAM(buffer_update); | ||||
| programs.push_back(&program_split); | programs.push_back(&program_split); | ||||
| } | } | ||||
| #undef ADD_SPLIT_KERNEL_SPLIT_PROGRAM | #undef ADD_SPLIT_KERNEL_SPLIT_PROGRAM | ||||
| #undef ADD_SPLIT_KERNEL_SINGLE_PROGRAM | #undef ADD_SPLIT_KERNEL_SINGLE_PROGRAM | ||||
| base_program = OpenCLProgram(this, "base", "kernel_base.cl", get_build_options(requested_features, "base")); | |||||
| base_program.add_kernel(ustring("convert_to_byte")); | |||||
| base_program.add_kernel(ustring("convert_to_half_float")); | |||||
| base_program.add_kernel(ustring("zero_buffer")); | |||||
| programs.push_back(&base_program); | programs.push_back(&base_program); | ||||
| if (requested_features.use_baking) { | |||||
| bake_program = OpenCLProgram(this, "bake", "kernel_bake.cl", get_build_options(requested_features, "bake")); | |||||
| bake_program.add_kernel(ustring("bake")); | |||||
| programs.push_back(&bake_program); | |||||
| } | |||||
| denoising_program = OpenCLProgram(this, "denoising", "filter.cl", get_build_options(requested_features, "denoising")); | |||||
| denoising_program.add_kernel(ustring("filter_divide_shadow")); | |||||
| denoising_program.add_kernel(ustring("filter_get_feature")); | |||||
| denoising_program.add_kernel(ustring("filter_write_feature")); | |||||
| denoising_program.add_kernel(ustring("filter_detect_outliers")); | |||||
| denoising_program.add_kernel(ustring("filter_combine_halves")); | |||||
| denoising_program.add_kernel(ustring("filter_construct_transform")); | |||||
| denoising_program.add_kernel(ustring("filter_nlm_calc_difference")); | |||||
| denoising_program.add_kernel(ustring("filter_nlm_blur")); | |||||
| denoising_program.add_kernel(ustring("filter_nlm_calc_weight")); | |||||
| denoising_program.add_kernel(ustring("filter_nlm_update_output")); | |||||
| denoising_program.add_kernel(ustring("filter_nlm_normalize")); | |||||
| denoising_program.add_kernel(ustring("filter_nlm_construct_gramian")); | |||||
| denoising_program.add_kernel(ustring("filter_finalize")); | |||||
| programs.push_back(&denoising_program); | programs.push_back(&denoising_program); | ||||
| /* Parallel compilation of Cycles kernels, this launches multiple | /* Parallel compilation of Cycles kernels, this launches multiple | ||||
| * processes to workaround OpenCL frameworks serializing the calls | * processes to workaround OpenCL frameworks serializing the calls | ||||
| * internally within a single process. */ | * internally within a single process. */ | ||||
| TaskPool task_pool; | TaskPool task_pool; | ||||
| foreach(OpenCLProgram *program, programs) { | foreach(OpenCLProgram *program, programs) { | ||||
| task_pool.push(function_bind(&OpenCLProgram::load, program)); | task_pool.push(function_bind(&OpenCLProgram::load, program)); | ||||
| ▲ Show 20 Lines • Show All 1,008 Lines • ▼ Show 20 Lines | build_options += string_printf("-D__COMPUTE_CAPABILITY__=%u ", | ||||
| compute_capability_major * 100 + | compute_capability_major * 100 + | ||||
| compute_capability_minor * 10); | compute_capability_minor * 10); | ||||
| } | } | ||||
| else if(platform_name == "Apple") | else if(platform_name == "Apple") | ||||
| build_options += "-D__KERNEL_OPENCL_APPLE__ "; | build_options += "-D__KERNEL_OPENCL_APPLE__ "; | ||||
| else if(platform_name == "AMD Accelerated Parallel Processing") | else if(platform_name == "AMD Accelerated Parallel Processing") | ||||
| build_options += "-D__KERNEL_OPENCL_AMD__ "; | build_options += "-D__KERNEL_OPENCL_AMD__ "; | ||||
Done Inline ActionsAdd space back jbakker: Add space back | |||||
| else if(platform_name == "Intel(R) OpenCL") { | else if(platform_name == "Intel(R) OpenCL") { | ||||
| build_options += "-D__KERNEL_OPENCL_INTEL_CPU__ "; | build_options += "-D__KERNEL_OPENCL_INTEL_CPU__ "; | ||||
| /* Options for gdb source level kernel debugging. | /* Options for gdb source level kernel debugging. | ||||
| * this segfaults on linux currently. | * this segfaults on linux currently. | ||||
| */ | */ | ||||
| if(OpenCLInfo::use_debug() && debug_src) | if(OpenCLInfo::use_debug() && debug_src) | ||||
| ▲ Show 20 Lines • Show All 159 Lines • Show Last 20 Lines | |||||
TODO: remove this program. It is bundled in the split_bundle.