Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/kernels/cuda/kernel_split.cu
| Show First 20 Lines • Show All 87 Lines • ▼ Show 20 Lines | |||||
| #define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ | #define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ | ||||
| extern "C" __global__ void \ | extern "C" __global__ void \ | ||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ | CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ | ||||
| kernel_cuda_##name() \ | kernel_cuda_##name() \ | ||||
| { \ | { \ | ||||
| kernel_##name(NULL); \ | kernel_##name(NULL); \ | ||||
| } | } | ||||
| #define DEFINE_SPLIT_KERNEL_FUNCTION_ATOMICS(name, num_atomics) \ | |||||
maiself: Dont need this I think. | |||||
| extern "C" __global__ void \ | |||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ | |||||
| kernel_cuda_##name() \ | |||||
| { \ | |||||
| ccl_local unsigned int atomics[num_atomics]; \ | |||||
| kernel_##name(NULL, atomics); \ | |||||
| } | |||||
| #define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ | |||||
| extern "C" __global__ void \ | |||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) \ | |||||
| kernel_cuda_##name() \ | |||||
| { \ | |||||
| ccl_local type locals; \ | |||||
| kernel_##name(NULL, &locals); \ | |||||
| } | |||||
| DEFINE_SPLIT_KERNEL_FUNCTION(path_init) | DEFINE_SPLIT_KERNEL_FUNCTION(path_init) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) | DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) | DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(do_volume) | DEFINE_SPLIT_KERNEL_FUNCTION(do_volume) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue) | DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) | DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) | DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) | DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) | DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting) | DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) | DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) | DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) | DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) | DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) | ||||
| DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update) | DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) | ||||
| extern "C" __global__ void | extern "C" __global__ void | ||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | ||||
| kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) | kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) | ||||
| { | { | ||||
| int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | ||||
| int y = sy + blockDim.y*blockIdx.y + threadIdx.y; | int y = sy + blockDim.y*blockIdx.y + threadIdx.y; | ||||
| Show All 17 Lines | |||||
Dont need this I think.