Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/kernels/cuda/kernel_split.cu
| Show First 20 Lines • Show All 82 Lines • ▼ Show 20 Lines | |||||
| #define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ | #define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ | ||||
| extern "C" __global__ void \ | extern "C" __global__ void \ | ||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ | CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ | ||||
| kernel_cuda_##name() \ | kernel_cuda_##name() \ | ||||
| { \ | { \ | ||||
| kernel_##name(NULL); \ | kernel_##name(NULL); \ | ||||
| } | } | ||||
| #define DEFINE_SPLIT_KERNEL_FUNCTION_LOCAL_QUEUE_ATOMICS(name) \ | |||||
maiself: Dont need this I think. | |||||
| extern "C" __global__ void \ | |||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ | |||||
| kernel_cuda_##name() \ | |||||
| { \ | |||||
| ccl_local unsigned int local_queue_atomics; \ | |||||
| kernel_##name(NULL, &local_queue_atomics); \ | |||||
| } | |||||
| #define DEFINE_SPLIT_KERNEL_FUNCTION_LOCAL_QUEUE_ATOMICS_2(name) \ | |||||
| extern "C" __global__ void \ | |||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ | |||||
| kernel_cuda_##name() \ | |||||
| { \ | |||||
| ccl_local unsigned int local_queue_atomics_0; \ | |||||
| ccl_local unsigned int local_queue_atomics_1; \ | |||||
| kernel_##name(NULL, &local_queue_atomics_0, &local_queue_atomics_1); \ | |||||
| } | |||||
| #define DEFINE_SPLIT_KERNEL_FUNCTION_LOCAL_QUEUE_ATOMICS_2V(name) \ | |||||
| extern "C" __global__ void \ | |||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ | |||||
| kernel_cuda_##name() \ | |||||
| { \ | |||||
| ccl_local unsigned int local_queue_atomics[2]; \ | |||||
| kernel_##name(NULL, local_queue_atomics); \ | |||||
| } | |||||
| DEFINE_SPLIT_KERNEL_FUNCTION(path_init) | DEFINE_SPLIT_KERNEL_FUNCTION(path_init) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) | DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) | DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue) | DEFINE_SPLIT_KERNEL_FUNCTION_LOCAL_QUEUE_ATOMICS_2V(queue_enqueue) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(background_buffer_update) | DEFINE_SPLIT_KERNEL_FUNCTION_LOCAL_QUEUE_ATOMICS(background_buffer_update); | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) | DEFINE_SPLIT_KERNEL_FUNCTION_LOCAL_QUEUE_ATOMICS(shader_eval) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) | DEFINE_SPLIT_KERNEL_FUNCTION_LOCAL_QUEUE_ATOMICS_2(holdout_emission_blurring_pathtermination_ao) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting) | DEFINE_SPLIT_KERNEL_FUNCTION_LOCAL_QUEUE_ATOMICS(direct_lighting) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked) | DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) | DEFINE_SPLIT_KERNEL_FUNCTION_LOCAL_QUEUE_ATOMICS(next_iteration_setup) | ||||
| extern "C" __global__ void | extern "C" __global__ void | ||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | ||||
| kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) | kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) | ||||
| { | { | ||||
| int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | ||||
| int y = sy + blockDim.y*blockIdx.y + threadIdx.y; | int y = sy + blockDim.y*blockIdx.y + threadIdx.y; | ||||
| Show All 17 Lines | |||||
Dont need this I think.