Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/device_split_kernel.cpp
| Show First 20 Lines • Show All 171 Lines • ▼ Show 20 Lines | if (!kernel_data_initialized) { | ||||
| global_size[0] = round_up(gsize[0], local_size[0]); | global_size[0] = round_up(gsize[0], local_size[0]); | ||||
| global_size[1] = round_up(gsize[1], local_size[1]); | global_size[1] = round_up(gsize[1], local_size[1]); | ||||
| int num_global_elements = global_size[0] * global_size[1]; | int num_global_elements = global_size[0] * global_size[1]; | ||||
| assert(num_global_elements % WORK_POOL_SIZE == 0); | assert(num_global_elements % WORK_POOL_SIZE == 0); | ||||
| /* Calculate max groups */ | /* Calculate max groups */ | ||||
| /* Denotes the maximum work groups possible w.r.t. current requested tile size. */ | /* Denotes the maximum work groups possible w.r.t. current requested tile.get_size(). */ | ||||
| unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU : | unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU : | ||||
| WORK_POOL_SIZE_GPU; | WORK_POOL_SIZE_GPU; | ||||
| unsigned int max_work_groups = num_global_elements / work_pool_size + 1; | unsigned int max_work_groups = num_global_elements / work_pool_size + 1; | ||||
| /* Allocate work_pool_wgs memory. */ | /* Allocate work_pool_wgs memory. */ | ||||
| work_pool_wgs.alloc_to_device(max_work_groups); | work_pool_wgs.alloc_to_device(max_work_groups); | ||||
| queue_index.alloc_to_device(NUM_QUEUES); | queue_index.alloc_to_device(NUM_QUEUES); | ||||
| use_queues_flag.alloc_to_device(1); | use_queues_flag.alloc_to_device(1); | ||||
| split_data.alloc_to_device(state_buffer_size(kgbuffer, kernel_data, num_global_elements)); | split_data.alloc_to_device(state_buffer_size(kgbuffer, kernel_data, num_global_elements)); | ||||
| ray_state.alloc(num_global_elements); | ray_state.alloc(num_global_elements); | ||||
| } | } | ||||
| /* Number of elements in the global state buffer */ | /* Number of elements in the global state buffer */ | ||||
| int num_global_elements = global_size[0] * global_size[1]; | int num_global_elements = global_size[0] * global_size[1]; | ||||
| #define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \ | #define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \ | ||||
| if (device->have_error()) { \ | if (device->have_error()) { \ | ||||
| return false; \ | return false; \ | ||||
| } \ | } \ | ||||
| if (!kernel_##name->enqueue( \ | if (!kernel_##name->enqueue( \ | ||||
| KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \ | KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \ | ||||
| return false; \ | return false; \ | ||||
| } | } | ||||
| tile.sample = tile.start_sample; | tile.get_sample() = tile.get_start_sample(); | ||||
| /* for exponential increase between tile updates */ | /* for exponential increase between tile.get_updates() */ | ||||
| int time_multiplier = 1; | int time_multiplier = 1; | ||||
| while (tile.sample < tile.start_sample + tile.num_samples) { | while (tile.get_sample() < tile.get_start_sample() + tile.get_num_samples()) { | ||||
| /* to keep track of how long it takes to run a number of samples */ | /* to keep track of how long it takes to run a number of samples */ | ||||
| double start_time = time_dt(); | double start_time = time_dt(); | ||||
| /* initial guess to start rolling average */ | /* initial guess to start rolling average */ | ||||
| const int initial_num_samples = 1; | const int initial_num_samples = 1; | ||||
| /* approx number of samples per second */ | /* approx number of samples per second */ | ||||
| const int samples_per_second = (avg_time_per_sample > 0.0) ? | const int samples_per_second = (avg_time_per_sample > 0.0) ? | ||||
| int(double(time_multiplier) / avg_time_per_sample) + 1 : | int(double(time_multiplier) / avg_time_per_sample) + 1 : | ||||
| initial_num_samples; | initial_num_samples; | ||||
| RenderTile subtile = tile; | RenderTile subtile = tile; | ||||
| subtile.start_sample = tile.sample; | subtile.get_start_sample() = tile.get_sample(); | ||||
| subtile.num_samples = samples_per_second; | subtile.get_num_samples() = samples_per_second; | ||||
| if (task.adaptive_sampling.use) { | if (task.adaptive_sampling.use) { | ||||
| subtile.num_samples = task.adaptive_sampling.align_dynamic_samples(subtile.start_sample, | subtile.get_num_samples() = task.adaptive_sampling.align_dynamic_samples( | ||||
| subtile.num_samples); | subtile.get_start_sample(), subtile.get_num_samples()); | ||||
| } | } | ||||
| /* Don't go beyond requested number of samples. */ | /* Don't go beyond requested number of samples. */ | ||||
| subtile.num_samples = min(subtile.num_samples, | subtile.get_num_samples() = min(subtile.get_num_samples(), | ||||
| tile.start_sample + tile.num_samples - tile.sample); | tile.get_start_sample() + tile.get_num_samples() - | ||||
| tile.get_sample()); | |||||
| if (device->have_error()) { | if (device->have_error()) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| /* reset state memory here as global size for data_init | /* reset state memory here as global size for data_init | ||||
| * kernel might not be large enough to do in kernel | * kernel might not be large enough to do in kernel | ||||
| */ | */ | ||||
| ▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines | while (activeRaysAvailable) { | ||||
| } | } | ||||
| } | } | ||||
| if (time_dt() > cancel_time) { | if (time_dt() > cancel_time) { | ||||
| return true; | return true; | ||||
| } | } | ||||
| } | } | ||||
| int filter_sample = tile.sample + subtile.num_samples - 1; | int filter_sample = tile.get_sample() + subtile.get_num_samples() - 1; | ||||
| if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) { | if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) { | ||||
| size_t buffer_size[2]; | size_t buffer_size[2]; | ||||
| buffer_size[0] = round_up(tile.w, local_size[0]); | buffer_size[0] = round_up(tile.get_w(), local_size[0]); | ||||
| buffer_size[1] = round_up(tile.h, local_size[1]); | buffer_size[1] = round_up(tile.get_h(), local_size[1]); | ||||
| kernel_adaptive_stopping->enqueue( | kernel_adaptive_stopping->enqueue( | ||||
| KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); | KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); | ||||
| buffer_size[0] = round_up(tile.h, local_size[0]); | buffer_size[0] = round_up(tile.get_h(), local_size[0]); | ||||
| buffer_size[1] = round_up(1, local_size[1]); | buffer_size[1] = round_up(1, local_size[1]); | ||||
| kernel_adaptive_filter_x->enqueue( | kernel_adaptive_filter_x->enqueue( | ||||
| KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); | KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); | ||||
| buffer_size[0] = round_up(tile.w, local_size[0]); | buffer_size[0] = round_up(tile.get_w(), local_size[0]); | ||||
| buffer_size[1] = round_up(1, local_size[1]); | buffer_size[1] = round_up(1, local_size[1]); | ||||
| kernel_adaptive_filter_y->enqueue( | kernel_adaptive_filter_y->enqueue( | ||||
| KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); | KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); | ||||
| } | } | ||||
| double time_per_sample = ((time_dt() - start_time) / subtile.num_samples); | double time_per_sample = ((time_dt() - start_time) / subtile.get_num_samples()); | ||||
| if (avg_time_per_sample == 0.0) { | if (avg_time_per_sample == 0.0) { | ||||
| /* start rolling average */ | /* start rolling average */ | ||||
| avg_time_per_sample = time_per_sample; | avg_time_per_sample = time_per_sample; | ||||
| } | } | ||||
| else { | else { | ||||
| avg_time_per_sample = alpha * time_per_sample + (1.0 - alpha) * avg_time_per_sample; | avg_time_per_sample = alpha * time_per_sample + (1.0 - alpha) * avg_time_per_sample; | ||||
| } | } | ||||
| #undef ENQUEUE_SPLIT_KERNEL | #undef ENQUEUE_SPLIT_KERNEL | ||||
| tile.sample += subtile.num_samples; | tile.get_sample() += subtile.get_num_samples(); | ||||
| task.update_progress(&tile, tile.w * tile.h * subtile.num_samples); | task.update_progress(&tile, tile.get_w() * tile.get_h() * subtile.get_num_samples()); | ||||
| time_multiplier = min(time_multiplier << 1, 10); | time_multiplier = min(time_multiplier << 1, 10); | ||||
| if (task.get_cancel()) { | if (task.get_cancel()) { | ||||
| return true; | return true; | ||||
| } | } | ||||
| } | } | ||||
| if (task.adaptive_sampling.use) { | if (task.adaptive_sampling.use) { | ||||
| /* Reset the start samples. */ | /* Reset the start samples. */ | ||||
| RenderTile subtile = tile; | RenderTile subtile = tile; | ||||
| subtile.start_sample = tile.start_sample; | subtile.get_start_sample() = tile.get_start_sample(); | ||||
| subtile.num_samples = tile.sample - tile.start_sample; | subtile.get_num_samples() = tile.get_sample() - tile.get_start_sample(); | ||||
| enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size), | enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size), | ||||
| subtile, | subtile, | ||||
| num_global_elements, | num_global_elements, | ||||
| kgbuffer, | kgbuffer, | ||||
| kernel_data, | kernel_data, | ||||
| split_data, | split_data, | ||||
| ray_state, | ray_state, | ||||
| queue_index, | queue_index, | ||||
| use_queues_flag, | use_queues_flag, | ||||
| work_pool_wgs); | work_pool_wgs); | ||||
| size_t buffer_size[2]; | size_t buffer_size[2]; | ||||
| buffer_size[0] = round_up(tile.w, local_size[0]); | buffer_size[0] = round_up(tile.get_w(), local_size[0]); | ||||
| buffer_size[1] = round_up(tile.h, local_size[1]); | buffer_size[1] = round_up(tile.get_h(), local_size[1]); | ||||
| kernel_adaptive_adjust_samples->enqueue( | kernel_adaptive_adjust_samples->enqueue( | ||||
| KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); | KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); | ||||
| } | } | ||||
| return true; | return true; | ||||
| } | } | ||||
| CCL_NAMESPACE_END | CCL_NAMESPACE_END | ||||