Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/integrator/path_trace_work_gpu.cpp
| Show All 17 Lines | |||||
| #include "integrator/path_trace_display.h" | #include "integrator/path_trace_display.h" | ||||
| #include "device/device.h" | #include "device/device.h" | ||||
| #include "integrator/pass_accessor_gpu.h" | #include "integrator/pass_accessor_gpu.h" | ||||
| #include "render/buffers.h" | #include "render/buffers.h" | ||||
| #include "render/scene.h" | #include "render/scene.h" | ||||
| #include "util/util_logging.h" | #include "util/util_logging.h" | ||||
| #include "util/util_string.h" | |||||
| #include "util/util_tbb.h" | #include "util/util_tbb.h" | ||||
| #include "util/util_time.h" | #include "util/util_time.h" | ||||
| #include "kernel/kernel_types.h" | #include "kernel/kernel_types.h" | ||||
| CCL_NAMESPACE_BEGIN | CCL_NAMESPACE_BEGIN | ||||
| static size_t estimate_single_state_size() | static size_t estimate_single_state_size(DeviceScene *device_scene) | ||||
| { | { | ||||
| size_t state_size = 0; | size_t state_size = 0; | ||||
| #define KERNEL_STRUCT_BEGIN(name) for (int array_index = 0;; array_index++) { | #define KERNEL_STRUCT_BEGIN(name) for (int array_index = 0;; array_index++) { | ||||
| #define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) state_size += sizeof(type); | #define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) state_size += sizeof(type); | ||||
| #define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) state_size += sizeof(type); | #define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) state_size += sizeof(type); | ||||
| #define KERNEL_STRUCT_END(name) \ | #define KERNEL_STRUCT_END(name) \ | ||||
| break; \ | break; \ | ||||
| } | } | ||||
| #define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \ | #define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \ | ||||
| if (array_index == gpu_array_size - 1) { \ | if (array_index == gpu_array_size - 1) { \ | ||||
| break; \ | break; \ | ||||
| } \ | } \ | ||||
| } | } | ||||
| #define KERNEL_STRUCT_VOLUME_STACK_SIZE (device_scene->data.volume_stack_size) | |||||
| #include "kernel/integrator/integrator_state_template.h" | #include "kernel/integrator/integrator_state_template.h" | ||||
| #undef KERNEL_STRUCT_BEGIN | #undef KERNEL_STRUCT_BEGIN | ||||
| #undef KERNEL_STRUCT_MEMBER | #undef KERNEL_STRUCT_MEMBER | ||||
| #undef KERNEL_STRUCT_ARRAY_MEMBER | #undef KERNEL_STRUCT_ARRAY_MEMBER | ||||
| #undef KERNEL_STRUCT_END | #undef KERNEL_STRUCT_END | ||||
| #undef KERNEL_STRUCT_END_ARRAY | #undef KERNEL_STRUCT_END_ARRAY | ||||
| #undef KERNEL_STRUCT_VOLUME_STACK_SIZE | |||||
| return state_size; | return state_size; | ||||
| } | } | ||||
| PathTraceWorkGPU::PathTraceWorkGPU(Device *device, | PathTraceWorkGPU::PathTraceWorkGPU(Device *device, | ||||
| Film *film, | Film *film, | ||||
| DeviceScene *device_scene, | DeviceScene *device_scene, | ||||
| bool *cancel_requested_flag) | bool *cancel_requested_flag) | ||||
| : PathTraceWork(device, film, device_scene, cancel_requested_flag), | : PathTraceWork(device, film, device_scene, cancel_requested_flag), | ||||
| queue_(device->gpu_queue_create()), | queue_(device->gpu_queue_create()), | ||||
| integrator_state_soa_kernel_features_(0), | integrator_state_soa_kernel_features_(0), | ||||
| integrator_queue_counter_(device, "integrator_queue_counter", MEM_READ_WRITE), | integrator_queue_counter_(device, "integrator_queue_counter", MEM_READ_WRITE), | ||||
| integrator_shader_sort_counter_(device, "integrator_shader_sort_counter", MEM_READ_WRITE), | integrator_shader_sort_counter_(device, "integrator_shader_sort_counter", MEM_READ_WRITE), | ||||
| integrator_shader_raytrace_sort_counter_( | integrator_shader_raytrace_sort_counter_( | ||||
| device, "integrator_shader_raytrace_sort_counter", MEM_READ_WRITE), | device, "integrator_shader_raytrace_sort_counter", MEM_READ_WRITE), | ||||
| integrator_next_shadow_catcher_path_index_( | integrator_next_shadow_catcher_path_index_( | ||||
| device, "integrator_next_shadow_catcher_path_index", MEM_READ_WRITE), | device, "integrator_next_shadow_catcher_path_index", MEM_READ_WRITE), | ||||
| queued_paths_(device, "queued_paths", MEM_READ_WRITE), | queued_paths_(device, "queued_paths", MEM_READ_WRITE), | ||||
| num_queued_paths_(device, "num_queued_paths", MEM_READ_WRITE), | num_queued_paths_(device, "num_queued_paths", MEM_READ_WRITE), | ||||
| work_tiles_(device, "work_tiles", MEM_READ_WRITE), | work_tiles_(device, "work_tiles", MEM_READ_WRITE), | ||||
| display_rgba_half_(device, "display buffer half", MEM_READ_WRITE), | display_rgba_half_(device, "display buffer half", MEM_READ_WRITE), | ||||
| max_num_paths_(queue_->num_concurrent_states(estimate_single_state_size())), | max_num_paths_(queue_->num_concurrent_states(estimate_single_state_size(device_scene))), | ||||
| min_num_active_paths_(queue_->num_concurrent_busy_states()), | min_num_active_paths_(queue_->num_concurrent_busy_states()), | ||||
| max_active_path_index_(0) | max_active_path_index_(0) | ||||
| { | { | ||||
| memset(&integrator_state_gpu_, 0, sizeof(integrator_state_gpu_)); | memset(&integrator_state_gpu_, 0, sizeof(integrator_state_gpu_)); | ||||
| /* Limit number of active paths to the half of the overall state. This is due to the logic in the | /* Limit number of active paths to the half of the overall state. This is due to the logic in the | ||||
| * path compaction which relies on the fact that regeneration does not happen sooner than half of | * path compaction which relies on the fact that regeneration does not happen sooner than half of | ||||
| * the states are available again. */ | * the states are available again. */ | ||||
| Show All 36 Lines | |||||
| #define KERNEL_STRUCT_END(name) \ | #define KERNEL_STRUCT_END(name) \ | ||||
| break; \ | break; \ | ||||
| } | } | ||||
| #define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \ | #define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \ | ||||
| if (array_index == gpu_array_size - 1) { \ | if (array_index == gpu_array_size - 1) { \ | ||||
| break; \ | break; \ | ||||
| } \ | } \ | ||||
| } | } | ||||
| #define KERNEL_STRUCT_VOLUME_STACK_SIZE (device_scene_->data.volume_stack_size) | |||||
| #include "kernel/integrator/integrator_state_template.h" | #include "kernel/integrator/integrator_state_template.h" | ||||
| #undef KERNEL_STRUCT_BEGIN | #undef KERNEL_STRUCT_BEGIN | ||||
| #undef KERNEL_STRUCT_MEMBER | #undef KERNEL_STRUCT_MEMBER | ||||
| #undef KERNEL_STRUCT_ARRAY_MEMBER | #undef KERNEL_STRUCT_ARRAY_MEMBER | ||||
| #undef KERNEL_STRUCT_END | #undef KERNEL_STRUCT_END | ||||
| #undef KERNEL_STRUCT_END_ARRAY | #undef KERNEL_STRUCT_END_ARRAY | ||||
| #undef KERNEL_STRUCT_VOLUME_STACK_SIZE | |||||
| if (VLOG_IS_ON(3)) { | |||||
| size_t total_soa_size = 0; | |||||
| for (auto &&soa_memory : integrator_state_soa_) { | |||||
| total_soa_size += soa_memory->memory_size(); | |||||
| } | |||||
| VLOG(3) << "GPU SoA state size: " << string_human_readable_size(total_soa_size); | |||||
| } | |||||
| } | } | ||||
| void PathTraceWorkGPU::alloc_integrator_queue() | void PathTraceWorkGPU::alloc_integrator_queue() | ||||
| { | { | ||||
| if (integrator_queue_counter_.size() == 0) { | if (integrator_queue_counter_.size() == 0) { | ||||
| integrator_queue_counter_.alloc(1); | integrator_queue_counter_.alloc(1); | ||||
| integrator_queue_counter_.zero_to_device(); | integrator_queue_counter_.zero_to_device(); | ||||
| integrator_queue_counter_.copy_from_device(); | integrator_queue_counter_.copy_from_device(); | ||||
| ▲ Show 20 Lines • Show All 816 Lines • Show Last 20 Lines | |||||