Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/device/gpu/kernel.h
| Show All 15 Lines | |||||
| /* Common GPU kernels. */ | /* Common GPU kernels. */ | ||||
| #include "kernel/device/gpu/parallel_active_index.h" | #include "kernel/device/gpu/parallel_active_index.h" | ||||
| #include "kernel/device/gpu/parallel_prefix_sum.h" | #include "kernel/device/gpu/parallel_prefix_sum.h" | ||||
| #include "kernel/device/gpu/parallel_sorted_index.h" | #include "kernel/device/gpu/parallel_sorted_index.h" | ||||
| #include "kernel/device/gpu/work_stealing.h" | #include "kernel/device/gpu/work_stealing.h" | ||||
| /* Include constant tables before entering Metal's context class scope (context_begin.h) */ | |||||
| #include "kernel/tables.h" | |||||
| #ifdef __KERNEL_METAL__ | #ifdef __KERNEL_METAL__ | ||||
| # include "kernel/device/metal/context_begin.h" | # include "kernel/device/metal/context_begin.h" | ||||
| #endif | #endif | ||||
| #include "kernel/integrator/state.h" | #include "kernel/integrator/state.h" | ||||
| #include "kernel/integrator/state_flow.h" | #include "kernel/integrator/state_flow.h" | ||||
| #include "kernel/integrator/state_util.h" | #include "kernel/integrator/state_util.h" | ||||
| ▲ Show 20 Lines • Show All 427 Lines • ▼ Show 20 Lines | if (x < sw && y < sh) { | ||||
| converged = ccl_gpu_kernel_call(kernel_adaptive_sampling_convergence_check( | converged = ccl_gpu_kernel_call(kernel_adaptive_sampling_convergence_check( | ||||
| nullptr, render_buffer, sx + x, sy + y, threshold, reset, offset, stride)); | nullptr, render_buffer, sx + x, sy + y, threshold, reset, offset, stride)); | ||||
| } | } | ||||
| /* NOTE: All threads specified in the mask must execute the intrinsic. */ | /* NOTE: All threads specified in the mask must execute the intrinsic. */ | ||||
| const auto num_active_pixels_mask = ccl_gpu_ballot(!converged); | const auto num_active_pixels_mask = ccl_gpu_ballot(!converged); | ||||
| const int lane_id = ccl_gpu_thread_idx_x % ccl_gpu_warp_size; | const int lane_id = ccl_gpu_thread_idx_x % ccl_gpu_warp_size; | ||||
| if (lane_id == 0) { | if (lane_id == 0) { | ||||
| atomic_fetch_and_add_uint32(num_active_pixels, ccl_gpu_popc(num_active_pixels_mask)); | atomic_fetch_and_add_uint32(num_active_pixels, popcount(num_active_pixels_mask)); | ||||
| } | } | ||||
| } | } | ||||
| ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) | ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) | ||||
| ccl_gpu_kernel_signature(adaptive_sampling_filter_x, | ccl_gpu_kernel_signature(adaptive_sampling_filter_x, | ||||
| ccl_global float *render_buffer, | ccl_global float *render_buffer, | ||||
| int sx, | int sx, | ||||
| int sy, | int sy, | ||||
| ▲ Show 20 Lines • Show All 411 Lines • ▼ Show 20 Lines | ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) | ||||
| if (state < num_states) { | if (state < num_states) { | ||||
| can_split = ccl_gpu_kernel_call(kernel_shadow_catcher_path_can_split(nullptr, state)); | can_split = ccl_gpu_kernel_call(kernel_shadow_catcher_path_can_split(nullptr, state)); | ||||
| } | } | ||||
| /* NOTE: All threads specified in the mask must execute the intrinsic. */ | /* NOTE: All threads specified in the mask must execute the intrinsic. */ | ||||
| const auto can_split_mask = ccl_gpu_ballot(can_split); | const auto can_split_mask = ccl_gpu_ballot(can_split); | ||||
| const int lane_id = ccl_gpu_thread_idx_x % ccl_gpu_warp_size; | const int lane_id = ccl_gpu_thread_idx_x % ccl_gpu_warp_size; | ||||
| if (lane_id == 0) { | if (lane_id == 0) { | ||||
| atomic_fetch_and_add_uint32(num_possible_splits, ccl_gpu_popc(can_split_mask)); | atomic_fetch_and_add_uint32(num_possible_splits, popcount(can_split_mask)); | ||||
| } | } | ||||
| } | } | ||||