Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/integrator/path_trace.cpp
| Show All 24 Lines | |||||
| #include "util/util_algorithm.h" | #include "util/util_algorithm.h" | ||||
| #include "util/util_logging.h" | #include "util/util_logging.h" | ||||
| #include "util/util_progress.h" | #include "util/util_progress.h" | ||||
| #include "util/util_tbb.h" | #include "util/util_tbb.h" | ||||
| #include "util/util_time.h" | #include "util/util_time.h" | ||||
| CCL_NAMESPACE_BEGIN | CCL_NAMESPACE_BEGIN | ||||
| namespace { | |||||
| class TempCPURenderBuffers { | |||||
| public: | |||||
| /* `device_template` is used to access stats and profiler. */ | |||||
| explicit TempCPURenderBuffers(Device *device_template) | |||||
| { | |||||
| vector<DeviceInfo> cpu_devices; | |||||
| device_cpu_info(cpu_devices); | |||||
| device.reset( | |||||
| device_cpu_create(cpu_devices[0], device_template->stats, device_template->profiler)); | |||||
| buffers = make_unique<RenderBuffers>(device.get()); | |||||
| } | |||||
| unique_ptr<Device> device; | |||||
| unique_ptr<RenderBuffers> buffers; | |||||
| }; | |||||
| } // namespace | |||||
| PathTrace::PathTrace(Device *device, DeviceScene *device_scene, RenderScheduler &render_scheduler) | PathTrace::PathTrace(Device *device, DeviceScene *device_scene, RenderScheduler &render_scheduler) | ||||
| : device_(device), render_scheduler_(render_scheduler) | : device_(device), render_scheduler_(render_scheduler) | ||||
| { | { | ||||
| DCHECK_NE(device_, nullptr); | DCHECK_NE(device_, nullptr); | ||||
| /* Create path tracing work in advance, so that it can be reused by incremental sampling as much | /* Create path tracing work in advance, so that it can be reused by incremental sampling as much | ||||
| * as possible. */ | * as possible. */ | ||||
| device_->foreach_device([&](Device *path_trace_device) { | device_->foreach_device([&](Device *path_trace_device) { | ||||
| path_trace_works_.emplace_back( | path_trace_works_.emplace_back( | ||||
| PathTraceWork::create(path_trace_device, device_scene, &render_cancel_.is_requested)); | PathTraceWork::create(path_trace_device, device_scene, &render_cancel_.is_requested)); | ||||
| }); | }); | ||||
| work_balance_infos_.resize(path_trace_works_.size()); | |||||
| work_balance_do_initial(work_balance_infos_); | |||||
| } | } | ||||
| void PathTrace::load_kernels() | void PathTrace::load_kernels() | ||||
| { | { | ||||
| if (denoiser_) { | if (denoiser_) { | ||||
| denoiser_->load_kernels(progress_); | denoiser_->load_kernels(progress_); | ||||
| } | } | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines | void PathTrace::render_pipeline(RenderWork render_work) | ||||
| } | } | ||||
| denoise(render_work); | denoise(render_work); | ||||
| if (is_cancel_requested()) { | if (is_cancel_requested()) { | ||||
| return; | return; | ||||
| } | } | ||||
| update_display(render_work); | update_display(render_work); | ||||
| rebalance(render_work); | |||||
| progress_update_if_needed(); | progress_update_if_needed(); | ||||
| if (render_scheduler_.done()) { | if (render_scheduler_.done()) { | ||||
| buffer_write(); | buffer_write(); | ||||
| } | } | ||||
| } | } | ||||
| void PathTrace::render_init_kernel_execution() | void PathTrace::render_init_kernel_execution() | ||||
| { | { | ||||
| for (auto &&path_trace_work : path_trace_works_) { | for (auto &&path_trace_work : path_trace_works_) { | ||||
| path_trace_work->init_execution(); | path_trace_work->init_execution(); | ||||
| } | } | ||||
| } | } | ||||
| /* TODO(sergey): Look into `std::function` rather than using a template. Should not be a | /* TODO(sergey): Look into `std::function` rather than using a template. Should not be a | ||||
| * measurable performance impact at runtime, but will make compilation faster and binary somewhat | * measurable performance impact at runtime, but will make compilation faster and binary somewhat | ||||
| * smaller. */ | * smaller. */ | ||||
| template<typename Callback> | template<typename Callback> | ||||
| static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>> &path_trace_works, | static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>> &path_trace_works, | ||||
| const vector<WorkBalanceInfo> &work_balance_infos, | |||||
| const BufferParams &buffer_params, | const BufferParams &buffer_params, | ||||
| const Callback &callback) | const Callback &callback) | ||||
| { | { | ||||
| const int num_works = path_trace_works.size(); | const int num_works = path_trace_works.size(); | ||||
| const int height = buffer_params.height; | const int height = buffer_params.height; | ||||
| int current_y = 0; | int current_y = 0; | ||||
| for (int i = 0; i < num_works; ++i) { | for (int i = 0; i < num_works; ++i) { | ||||
| /* TODO(sergey): Support adaptive weight based on an observed device performance. */ | const double weight = work_balance_infos[i].weight; | ||||
| const float weight = 1.0f / num_works; | |||||
| const int slice_height = max(lround(height * weight), 1); | const int slice_height = max(lround(height * weight), 1); | ||||
| /* Disallow negative values to deal with situations when there are more compute devices than | /* Disallow negative values to deal with situations when there are more compute devices than | ||||
| * scanlines. */ | * scanlines. */ | ||||
| const int remaining_height = max(0, height - current_y); | const int remaining_height = max(0, height - current_y); | ||||
| BufferParams slide_params = buffer_params; | BufferParams slide_params = buffer_params; | ||||
| slide_params.full_y = buffer_params.full_y + current_y; | slide_params.full_y = buffer_params.full_y + current_y; | ||||
| Show All 10 Lines | for (int i = 0; i < num_works; ++i) { | ||||
| current_y += slide_params.height; | current_y += slide_params.height; | ||||
| } | } | ||||
| } | } | ||||
| void PathTrace::update_allocated_work_buffer_params() | void PathTrace::update_allocated_work_buffer_params() | ||||
| { | { | ||||
| foreach_sliced_buffer_params(path_trace_works_, | foreach_sliced_buffer_params(path_trace_works_, | ||||
| work_balance_infos_, | |||||
| big_tile_params_, | big_tile_params_, | ||||
| [](PathTraceWork *path_trace_work, const BufferParams ¶ms) { | [](PathTraceWork *path_trace_work, const BufferParams ¶ms) { | ||||
| RenderBuffers *buffers = path_trace_work->get_render_buffers(); | RenderBuffers *buffers = path_trace_work->get_render_buffers(); | ||||
| buffers->reset(params); | buffers->reset(params); | ||||
| }); | }); | ||||
| } | } | ||||
| static BufferParams scale_buffer_params(const BufferParams ¶ms, int resolution_divider) | static BufferParams scale_buffer_params(const BufferParams ¶ms, int resolution_divider) | ||||
| Show All 15 Lines | |||||
| void PathTrace::update_effective_work_buffer_params(const RenderWork &render_work) | void PathTrace::update_effective_work_buffer_params(const RenderWork &render_work) | ||||
| { | { | ||||
| const int resolution_divider = render_work.resolution_divider; | const int resolution_divider = render_work.resolution_divider; | ||||
| const BufferParams scaled_big_tile_params = scale_buffer_params(big_tile_params_, | const BufferParams scaled_big_tile_params = scale_buffer_params(big_tile_params_, | ||||
| resolution_divider); | resolution_divider); | ||||
| foreach_sliced_buffer_params(path_trace_works_, | foreach_sliced_buffer_params(path_trace_works_, | ||||
| work_balance_infos_, | |||||
| scaled_big_tile_params, | scaled_big_tile_params, | ||||
| [&](PathTraceWork *path_trace_work, const BufferParams params) { | [&](PathTraceWork *path_trace_work, const BufferParams params) { | ||||
| path_trace_work->set_effective_buffer_params( | path_trace_work->set_effective_buffer_params( | ||||
| scaled_big_tile_params, params); | scaled_big_tile_params, params); | ||||
| }); | }); | ||||
| render_state_.effective_big_tile_params = scaled_big_tile_params; | render_state_.effective_big_tile_params = scaled_big_tile_params; | ||||
| } | } | ||||
| Show All 34 Lines | if (!render_work.path_trace.num_samples) { | ||||
| return; | return; | ||||
| } | } | ||||
| VLOG(3) << "Will path trace " << render_work.path_trace.num_samples | VLOG(3) << "Will path trace " << render_work.path_trace.num_samples | ||||
| << " samples at the resolution divider " << render_work.resolution_divider; | << " samples at the resolution divider " << render_work.resolution_divider; | ||||
| const double start_time = time_dt(); | const double start_time = time_dt(); | ||||
| tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { | const int num_works = path_trace_works_.size(); | ||||
| tbb::parallel_for(0, num_works, [&](int i) { | |||||
| const double work_start_time = time_dt(); | |||||
| PathTraceWork *path_trace_work = path_trace_works_[i].get(); | |||||
| path_trace_work->render_samples(render_work.path_trace.start_sample, | path_trace_work->render_samples(render_work.path_trace.start_sample, | ||||
| render_work.path_trace.num_samples); | render_work.path_trace.num_samples); | ||||
| work_balance_infos_[i].time_spent += time_dt() - work_start_time; | |||||
| }); | }); | ||||
| render_scheduler_.report_path_trace_time( | render_scheduler_.report_path_trace_time( | ||||
| render_work, time_dt() - start_time, is_cancel_requested()); | render_work, time_dt() - start_time, is_cancel_requested()); | ||||
| } | } | ||||
| void PathTrace::adaptive_sample(RenderWork &render_work) | void PathTrace::adaptive_sample(RenderWork &render_work) | ||||
| { | { | ||||
| ▲ Show 20 Lines • Show All 91 Lines • ▼ Show 20 Lines | void PathTrace::denoise(const RenderWork &render_work) | ||||
| } | } | ||||
| VLOG(3) << "Perform denoising work."; | VLOG(3) << "Perform denoising work."; | ||||
| const double start_time = time_dt(); | const double start_time = time_dt(); | ||||
| RenderBuffers *buffer_to_denoise = nullptr; | RenderBuffers *buffer_to_denoise = nullptr; | ||||
| unique_ptr<Device> big_tile_device; | unique_ptr<TempCPURenderBuffers> big_tile_cpu_buffers; | ||||
| unique_ptr<RenderBuffers> big_tile_render_buffers; | |||||
| if (path_trace_works_.size() == 1) { | if (path_trace_works_.size() == 1) { | ||||
| buffer_to_denoise = path_trace_works_.front()->get_render_buffers(); | buffer_to_denoise = path_trace_works_.front()->get_render_buffers(); | ||||
| } | } | ||||
| else { | else { | ||||
| /* TODO(sergey): Try to reuse the buffer as much as possible. */ | /* TODO(sergey): Try to reuse the buffer as much as possible. */ | ||||
| /* TODO(sergey): Split the functionality into a separate function. */ | |||||
| /* Used to access stats and profiler. */ | |||||
| Device *device_template = path_trace_works_.front()->get_device(); | |||||
| /* TODO(sergey): Share same device as what will be used by the denoiser. */ | /* TODO(sergey): Share same device as what will be used by the denoiser. */ | ||||
| vector<DeviceInfo> cpu_devices; | |||||
| device_cpu_info(cpu_devices); | |||||
| big_tile_device.reset( | |||||
| device_cpu_create(cpu_devices[0], device_template->stats, device_template->profiler)); | |||||
| big_tile_render_buffers = make_unique<RenderBuffers>(big_tile_device.get()); | big_tile_cpu_buffers = make_unique<TempCPURenderBuffers>(device_); | ||||
| big_tile_render_buffers->reset(render_state_.effective_big_tile_params); | big_tile_cpu_buffers->buffers->reset(render_state_.effective_big_tile_params); | ||||
| buffer_to_denoise = big_tile_render_buffers.get(); | buffer_to_denoise = big_tile_cpu_buffers->buffers.get(); | ||||
| copy_to_render_buffers(big_tile_render_buffers.get()); | copy_to_render_buffers(big_tile_cpu_buffers->buffers.get()); | ||||
| } | } | ||||
| denoiser_->denoise_buffer( | denoiser_->denoise_buffer( | ||||
| render_state_.effective_big_tile_params, buffer_to_denoise, get_num_samples_in_buffer()); | render_state_.effective_big_tile_params, buffer_to_denoise, get_num_samples_in_buffer()); | ||||
| if (big_tile_render_buffers) { | if (big_tile_cpu_buffers) { | ||||
| copy_from_render_buffers(big_tile_render_buffers.get()); | copy_from_render_buffers(big_tile_cpu_buffers->buffers.get()); | ||||
| } | } | ||||
| render_scheduler_.report_denoise_time(render_work, time_dt() - start_time); | render_scheduler_.report_denoise_time(render_work, time_dt() - start_time); | ||||
| render_state_.has_denoised_result_ = true; | render_state_.has_denoised_result_ = true; | ||||
| } | } | ||||
| void PathTrace::set_gpu_display(unique_ptr<GPUDisplay> gpu_display) | void PathTrace::set_gpu_display(unique_ptr<GPUDisplay> gpu_display) | ||||
| ▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | for (auto &&path_trace_work : path_trace_works_) { | ||||
| path_trace_work->copy_to_gpu_display(gpu_display_.get(), pass_mode, num_samples); | path_trace_work->copy_to_gpu_display(gpu_display_.get(), pass_mode, num_samples); | ||||
| } | } | ||||
| gpu_display_->update_end(); | gpu_display_->update_end(); | ||||
| render_scheduler_.report_display_update_time(render_work, time_dt() - start_time); | render_scheduler_.report_display_update_time(render_work, time_dt() - start_time); | ||||
| } | } | ||||
| void PathTrace::rebalance(const RenderWork &render_work) | |||||
| { | |||||
| static const int kLogLevel = 3; | |||||
| scoped_timer timer; | |||||
| const int num_works = path_trace_works_.size(); | |||||
| if (!render_work.rebalance) { | |||||
| return; | |||||
| } | |||||
| if (num_works == 1) { | |||||
| VLOG(3) << "Ignoring rebalance work due to single device render."; | |||||
brecht: `VLOG(kLogLevel)`? | |||||
| return; | |||||
| } | |||||
| if (VLOG_IS_ON(kLogLevel)) { | |||||
| VLOG(kLogLevel) << "Perform rebalance work."; | |||||
| VLOG(kLogLevel) << "Per-device path tracing time (seconds):"; | |||||
| for (int i = 0; i < num_works; ++i) { | |||||
| VLOG(kLogLevel) << path_trace_works_[i]->get_device()->info.description << ": " | |||||
| << work_balance_infos_[i].time_spent; | |||||
| } | |||||
| } | |||||
| const bool did_rebalance = work_balance_do_rebalance(work_balance_infos_); | |||||
| if (VLOG_IS_ON(kLogLevel)) { | |||||
| VLOG(kLogLevel) << "Calculated per-device weights for works:"; | |||||
| for (int i = 0; i < num_works; ++i) { | |||||
| LOG(INFO) << path_trace_works_[i]->get_device()->info.description << ": " | |||||
| << work_balance_infos_[i].weight; | |||||
| } | |||||
| } | |||||
| if (!did_rebalance) { | |||||
| VLOG(3) << "Balance in path trace works did not change."; | |||||
brechtUnsubmitted Not Done Inline ActionsVLOG(kLogLevel)? brecht: `VLOG(kLogLevel)`? | |||||
| return; | |||||
| } | |||||
| TempCPURenderBuffers big_tile_cpu_buffers(device_); | |||||
| big_tile_cpu_buffers.buffers->reset(render_state_.effective_big_tile_params); | |||||
| copy_to_render_buffers(big_tile_cpu_buffers.buffers.get()); | |||||
| render_state_.need_reset_params = true; | |||||
| update_work_buffer_params_if_needed(render_work); | |||||
| copy_from_render_buffers(big_tile_cpu_buffers.buffers.get()); | |||||
| VLOG(kLogLevel) << "Rebalance time (seconds): " << timer.get_time(); | |||||
| } | |||||
| void PathTrace::cancel() | void PathTrace::cancel() | ||||
| { | { | ||||
| thread_scoped_lock lock(render_cancel_.mutex); | thread_scoped_lock lock(render_cancel_.mutex); | ||||
| render_cancel_.is_requested = true; | render_cancel_.is_requested = true; | ||||
| while (render_cancel_.is_rendering) { | while (render_cancel_.is_rendering) { | ||||
| render_cancel_.condition.wait(lock); | render_cancel_.condition.wait(lock); | ||||
| ▲ Show 20 Lines • Show All 264 Lines • Show Last 20 Lines | |||||
VLOG(kLogLevel)?