Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/integrator/path_trace_work_gpu.cpp
| Show First 20 Lines • Show All 174 Lines • ▼ Show 20 Lines | |||||
| { | { | ||||
| queue_->init_execution(); | queue_->init_execution(); | ||||
| /* Copy to device side struct in constant memory. */ | /* Copy to device side struct in constant memory. */ | ||||
| device_->const_copy_to( | device_->const_copy_to( | ||||
| "__integrator_state", &integrator_state_gpu_, sizeof(integrator_state_gpu_)); | "__integrator_state", &integrator_state_gpu_, sizeof(integrator_state_gpu_)); | ||||
| } | } | ||||
| void PathTraceWorkGPU::render_samples(int start_sample, int samples_num) | void PathTraceWorkGPU::render_samples(RenderStatistics &statistics, | ||||
| int start_sample, | |||||
| int samples_num) | |||||
| { | { | ||||
| /* Limit number of states for the tile and rely on a greedy scheduling of tiles. This allows to | /* Limit number of states for the tile and rely on a greedy scheduling of tiles. This allows to | ||||
| * add more work (because tiles are smaller, so there is higher chance that more paths will | * add more work (because tiles are smaller, so there is higher chance that more paths will | ||||
| * become busy after adding new tiles). This is especially important for the shadow catcher which | * become busy after adding new tiles). This is especially important for the shadow catcher which | ||||
| * schedules work in halves of available number of paths. */ | * schedules work in halves of available number of paths. */ | ||||
| work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8); | work_tile_scheduler_.set_max_num_path_states(max_num_paths_ / 8); | ||||
| work_tile_scheduler_.reset(effective_buffer_params_, start_sample, samples_num); | work_tile_scheduler_.reset(effective_buffer_params_, start_sample, samples_num); | ||||
| enqueue_reset(); | enqueue_reset(); | ||||
| int num_iterations = 0; | |||||
| uint64_t num_busy_accum = 0; | |||||
| /* TODO: set a hard limit in case of undetected kernel failures? */ | /* TODO: set a hard limit in case of undetected kernel failures? */ | ||||
| while (true) { | while (true) { | ||||
| /* Enqueue work from the scheduler, on start or when there are not enough | /* Enqueue work from the scheduler, on start or when there are not enough | ||||
| * paths to keep the device occupied. */ | * paths to keep the device occupied. */ | ||||
| bool finished; | bool finished; | ||||
| if (enqueue_work_tiles(finished)) { | if (enqueue_work_tiles(finished)) { | ||||
| /* Copy stats from the device. */ | /* Copy stats from the device. */ | ||||
| queue_->copy_from_device(integrator_queue_counter_); | queue_->copy_from_device(integrator_queue_counter_); | ||||
| Show All 20 Lines | if (enqueue_path_iteration()) { | ||||
| if (!queue_->synchronize()) { | if (!queue_->synchronize()) { | ||||
| break; /* Stop on error. */ | break; /* Stop on error. */ | ||||
| } | } | ||||
| } | } | ||||
| if (is_cancel_requested()) { | if (is_cancel_requested()) { | ||||
| break; | break; | ||||
| } | } | ||||
| num_busy_accum += get_num_active_paths(); | |||||
| ++num_iterations; | |||||
| } | } | ||||
| statistics.occupancy = static_cast<float>(num_busy_accum) / num_iterations / max_num_paths_; | |||||
| } | } | ||||
| DeviceKernel PathTraceWorkGPU::get_most_queued_kernel() const | DeviceKernel PathTraceWorkGPU::get_most_queued_kernel() const | ||||
| { | { | ||||
| const IntegratorQueueCounter *queue_counter = integrator_queue_counter_.data(); | const IntegratorQueueCounter *queue_counter = integrator_queue_counter_.data(); | ||||
| int max_num_queued = 0; | int max_num_queued = 0; | ||||
| DeviceKernel kernel = DEVICE_KERNEL_NUM; | DeviceKernel kernel = DEVICE_KERNEL_NUM; | ||||
| ▲ Show 20 Lines • Show All 684 Lines • Show Last 20 Lines | |||||