Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/opencl/opencl_split.cpp
| Show First 20 Lines • Show All 1,346 Lines • ▼ Show 20 Lines | while (task->acquire_tile(this, tile)) { | ||||
| * release_tile waits (stalling other devices from entering | * release_tile waits (stalling other devices from entering | ||||
| * release_tile) for all kernels to complete. If device1 (a | * release_tile) for all kernels to complete. If device1 (a | ||||
| * slow-render device) reaches release_tile first then it would | * slow-render device) reaches release_tile first then it would | ||||
| * stall device2 (a fast-render device) from proceeding to render | * stall device2 (a fast-render device) from proceeding to render | ||||
| * next tile. | * next tile. | ||||
| */ | */ | ||||
| clFinish(cqCommandQueue); | clFinish(cqCommandQueue); | ||||
| } | } | ||||
| else if (tile.task == RenderTile::BAKE) { | |||||
| bake(*task, tile); | |||||
| } | |||||
| else if (tile.task == RenderTile::DENOISE) { | else if (tile.task == RenderTile::DENOISE) { | ||||
| tile.sample = tile.start_sample + tile.num_samples; | tile.sample = tile.start_sample + tile.num_samples; | ||||
| denoise(tile, denoising); | denoise(tile, denoising); | ||||
| task->update_progress(&tile, tile.w * tile.h); | task->update_progress(&tile, tile.w * tile.h); | ||||
| } | } | ||||
| task->release_tile(tile); | task->release_tile(tile); | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 451 Lines • ▼ Show 20 Lines | void OpenCLDevice::shader(DeviceTask &task) | ||||
| cl_mem d_output = CL_MEM_PTR(task.shader_output); | cl_mem d_output = CL_MEM_PTR(task.shader_output); | ||||
| cl_int d_shader_eval_type = task.shader_eval_type; | cl_int d_shader_eval_type = task.shader_eval_type; | ||||
| cl_int d_shader_filter = task.shader_filter; | cl_int d_shader_filter = task.shader_filter; | ||||
| cl_int d_shader_x = task.shader_x; | cl_int d_shader_x = task.shader_x; | ||||
| cl_int d_shader_w = task.shader_w; | cl_int d_shader_w = task.shader_w; | ||||
| cl_int d_offset = task.offset; | cl_int d_offset = task.offset; | ||||
| OpenCLDevice::OpenCLProgram *program = &background_program; | OpenCLDevice::OpenCLProgram *program = &background_program; | ||||
| if (task.shader_eval_type >= SHADER_EVAL_BAKE) { | if (task.shader_eval_type == SHADER_EVAL_DISPLACE) { | ||||
| program = &bake_program; | |||||
| } | |||||
| else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) { | |||||
| program = &displace_program; | program = &displace_program; | ||||
| } | } | ||||
| program->wait_for_availability(); | program->wait_for_availability(); | ||||
| cl_kernel kernel = (*program)(); | cl_kernel kernel = (*program)(); | ||||
| cl_uint start_arg_index = kernel_set_args(kernel, 0, d_data, d_input, d_output); | cl_uint start_arg_index = kernel_set_args(kernel, 0, d_data, d_input, d_output); | ||||
| set_kernel_arg_buffers(kernel, &start_arg_index); | set_kernel_arg_buffers(kernel, &start_arg_index); | ||||
| Show All 14 Lines | for (int sample = 0; sample < task.num_samples; sample++) { | ||||
| enqueue_kernel(kernel, task.shader_w, 1); | enqueue_kernel(kernel, task.shader_w, 1); | ||||
| clFinish(cqCommandQueue); | clFinish(cqCommandQueue); | ||||
| task.update_progress(NULL); | task.update_progress(NULL); | ||||
| } | } | ||||
| } | } | ||||
| void OpenCLDevice::bake(DeviceTask &task, RenderTile &rtile) | |||||
| { | |||||
| scoped_timer timer(&rtile.buffers->render_time); | |||||
| /* Cast arguments to cl types. */ | |||||
| cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); | |||||
| cl_mem d_buffer = CL_MEM_PTR(rtile.buffer); | |||||
| cl_int d_x = rtile.x; | |||||
| cl_int d_y = rtile.y; | |||||
| cl_int d_w = rtile.w; | |||||
| cl_int d_h = rtile.h; | |||||
| cl_int d_offset = rtile.offset; | |||||
| cl_int d_stride = rtile.stride; | |||||
| bake_program.wait_for_availability(); | |||||
| cl_kernel kernel = bake_program(); | |||||
| cl_uint start_arg_index = kernel_set_args(kernel, 0, d_data, d_buffer); | |||||
| set_kernel_arg_buffers(kernel, &start_arg_index); | |||||
| start_arg_index += kernel_set_args( | |||||
| kernel, start_arg_index, d_x, d_y, d_w, d_h, d_offset, d_stride); | |||||
| int start_sample = rtile.start_sample; | |||||
| int end_sample = rtile.start_sample + rtile.num_samples; | |||||
| for (int sample = start_sample; sample < end_sample; sample++) { | |||||
| if (task.get_cancel()) { | |||||
| if (task.need_finish_queue == false) | |||||
| break; | |||||
| } | |||||
| kernel_set_args(kernel, start_arg_index, sample); | |||||
| enqueue_kernel(kernel, d_w, d_h); | |||||
| rtile.sample = sample + 1; | |||||
| task.update_progress(&rtile, rtile.w * rtile.h); | |||||
| } | |||||
| clFinish(cqCommandQueue); | |||||
| } | |||||
| string OpenCLDevice::kernel_build_options(const string *debug_src) | string OpenCLDevice::kernel_build_options(const string *debug_src) | ||||
| { | { | ||||
| string build_options = "-cl-no-signed-zeros -cl-mad-enable "; | string build_options = "-cl-no-signed-zeros -cl-mad-enable "; | ||||
| if (platform_name == "NVIDIA CUDA") { | if (platform_name == "NVIDIA CUDA") { | ||||
| build_options += | build_options += | ||||
| "-D__KERNEL_OPENCL_NVIDIA__ " | "-D__KERNEL_OPENCL_NVIDIA__ " | ||||
| "-cl-nv-maxrregcount=32 " | "-cl-nv-maxrregcount=32 " | ||||
| ▲ Show 20 Lines • Show All 183 Lines • Show Last 20 Lines | |||||