Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/integrator/shader_eval.cpp
- This file was added.
| /* | |||||
| * Copyright 2011-2021 Blender Foundation | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "integrator/shader_eval.h" | |||||
| #include "device/device.h" | |||||
| #include "device/device_queue.h" | |||||
| #include "device/cpu/kernel.h" | |||||
| #include "device/cpu/kernel_thread_globals.h" | |||||
| #include "util/util_logging.h" | |||||
| #include "util/util_progress.h" | |||||
| #include "util/util_tbb.h" | |||||
| CCL_NAMESPACE_BEGIN | |||||
| ShaderEval::ShaderEval(Device *device, Progress &progress) : device_(device), progress_(progress) | |||||
| { | |||||
| DCHECK_NE(device_, nullptr); | |||||
| } | |||||
| bool ShaderEval::eval(const ShaderEvalType type, | |||||
| const int max_num_points, | |||||
| const function<int(device_vector<KernelShaderEvalInput> &)> &fill_input, | |||||
| const function<void(device_vector<float4> &)> &read_output) | |||||
| { | |||||
| bool first_device = true; | |||||
| bool success = true; | |||||
| device_->foreach_device([&](Device *device) { | |||||
| if (!first_device) { | |||||
| LOG(ERROR) << "Multi-devices are not yet fully implemented, will evaluate shader on a " | |||||
| "single device."; | |||||
| return; | |||||
| } | |||||
| first_device = false; | |||||
| device_vector<KernelShaderEvalInput> input(device, "ShaderEval input", MEM_READ_ONLY); | |||||
| device_vector<float4> output(device, "ShaderEval output", MEM_READ_WRITE); | |||||
| /* Allocate and copy device buffers. */ | |||||
| DCHECK_EQ(input.device, device); | |||||
| DCHECK_EQ(output.device, device); | |||||
| DCHECK_LE(output.size(), input.size()); | |||||
| input.alloc(max_num_points); | |||||
| int num_points = fill_input(input); | |||||
| if (num_points == 0) { | |||||
| return; | |||||
| } | |||||
| input.copy_to_device(); | |||||
| output.alloc(num_points); | |||||
| output.zero_to_device(); | |||||
| /* Evaluate on CPU or GPU. */ | |||||
| success = (device->info.type == DEVICE_CPU) ? eval_cpu(device, type, input, output) : | |||||
| eval_gpu(device, type, input, output); | |||||
| /* Copy data back from device if not cancelled. */ | |||||
| if (success) { | |||||
| output.copy_from_device(0, 1, output.size()); | |||||
| read_output(output); | |||||
| } | |||||
| input.free(); | |||||
| output.free(); | |||||
| }); | |||||
| return success; | |||||
| } | |||||
| bool ShaderEval::eval_cpu(Device *device, | |||||
| const ShaderEvalType type, | |||||
| device_vector<KernelShaderEvalInput> &input, | |||||
| device_vector<float4> &output) | |||||
| { | |||||
| vector<CPUKernelThreadGlobals> kernel_thread_globals; | |||||
| device->get_cpu_kernel_thread_globals(kernel_thread_globals); | |||||
| /* Find required kernel function. */ | |||||
| const CPUKernels &kernels = *(device->get_cpu_kernels()); | |||||
| /* Simple parallel_for over all work items. */ | |||||
| const int64_t work_size = output.size(); | |||||
| KernelShaderEvalInput *input_data = input.data(); | |||||
| float4 *output_data = output.data(); | |||||
| bool success = true; | |||||
| tbb::task_arena local_arena(device->info.cpu_threads); | |||||
| local_arena.execute([&]() { | |||||
| tbb::parallel_for(int64_t(0), work_size, [&](int64_t work_index) { | |||||
| /* TODO: is this fast enough? */ | |||||
| if (progress_.get_cancel()) { | |||||
| success = false; | |||||
| return; | |||||
| } | |||||
| const int thread_index = tbb::this_task_arena::current_thread_index(); | |||||
| KernelGlobals *kg = &kernel_thread_globals[thread_index]; | |||||
| switch (type) { | |||||
| case SHADER_EVAL_DISPLACE: | |||||
| kernels.shader_eval_displace(kg, input_data, output_data, work_index); | |||||
| break; | |||||
| case SHADER_EVAL_BACKGROUND: | |||||
| kernels.shader_eval_background(kg, input_data, output_data, work_index); | |||||
| break; | |||||
| } | |||||
| }); | |||||
| }); | |||||
| return success; | |||||
| } | |||||
| bool ShaderEval::eval_gpu(Device *device, | |||||
| const ShaderEvalType type, | |||||
| device_vector<KernelShaderEvalInput> &input, | |||||
| device_vector<float4> &output) | |||||
| { | |||||
| /* Find required kernel function. */ | |||||
| DeviceKernel kernel; | |||||
| switch (type) { | |||||
| case SHADER_EVAL_DISPLACE: | |||||
| kernel = DEVICE_KERNEL_SHADER_EVAL_DISPLACE; | |||||
| break; | |||||
| case SHADER_EVAL_BACKGROUND: | |||||
| kernel = DEVICE_KERNEL_SHADER_EVAL_BACKGROUND; | |||||
| break; | |||||
| }; | |||||
| /* Create device queue. */ | |||||
| unique_ptr<DeviceQueue> queue = device->gpu_queue_create(); | |||||
| queue->init_execution(); | |||||
| /* Execute work on GPU in chunk, so we can cancel. | |||||
| * TODO : query appropriate size from device.*/ | |||||
| const int chunk_size = 65536; | |||||
| const int work_size = output.size(); | |||||
| void *d_input = (void *)input.device_pointer; | |||||
| void *d_output = (void *)output.device_pointer; | |||||
| for (int d_offset = 0; d_offset < work_size; d_offset += chunk_size) { | |||||
| int d_work_size = min(chunk_size, work_size - d_offset); | |||||
| void *args[] = {&d_input, &d_output, &d_offset, &d_work_size}; | |||||
| queue->enqueue(kernel, d_work_size, args); | |||||
| queue->synchronize(); | |||||
| if (progress_.get_cancel()) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| CCL_NAMESPACE_END | |||||