Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/kernels/cuda/kernel.cu
| Show First 20 Lines • Show All 124 Lines • ▼ Show 20 Lines | |||||
| extern "C" __global__ void | extern "C" __global__ void | ||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | ||||
| kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) | kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) | ||||
| { | { | ||||
| int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | ||||
| int y = sy + blockDim.y*blockIdx.y + threadIdx.y; | int y = sy + blockDim.y*blockIdx.y + threadIdx.y; | ||||
| if(x < sx + sw && y < sy + sh) | if(x < sx + sw && y < sy + sh) { | ||||
| kernel_path_trace(NULL, buffer, rng_state, sample, x, y, offset, stride); | KernelGlobals kg; | ||||
| kernel_path_trace(&kg, buffer, rng_state, sample, x, y, offset, stride); | |||||
| } | |||||
| } | } | ||||
| #ifdef __BRANCHED_PATH__ | #ifdef __BRANCHED_PATH__ | ||||
| extern "C" __global__ void | extern "C" __global__ void | ||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS) | CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS) | ||||
| kernel_cuda_branched_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) | kernel_cuda_branched_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) | ||||
| { | { | ||||
| int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | ||||
| int y = sy + blockDim.y*blockIdx.y + threadIdx.y; | int y = sy + blockDim.y*blockIdx.y + threadIdx.y; | ||||
| if(x < sx + sw && y < sy + sh) | if(x < sx + sw && y < sy + sh) { | ||||
| kernel_branched_path_trace(NULL, buffer, rng_state, sample, x, y, offset, stride); | KernelGlobals kg; | ||||
| kernel_branched_path_trace(&kg, buffer, rng_state, sample, x, y, offset, stride); | |||||
| } | |||||
| } | } | ||||
| #endif | #endif | ||||
| extern "C" __global__ void | extern "C" __global__ void | ||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | ||||
| kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) | kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) | ||||
| { | { | ||||
| int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | ||||
| int y = sy + blockDim.y*blockIdx.y + threadIdx.y; | int y = sy + blockDim.y*blockIdx.y + threadIdx.y; | ||||
| if(x < sx + sw && y < sy + sh) | if(x < sx + sw && y < sy + sh) { | ||||
| kernel_film_convert_to_byte(NULL, rgba, buffer, sample_scale, x, y, offset, stride); | kernel_film_convert_to_byte(NULL, rgba, buffer, sample_scale, x, y, offset, stride); | ||||
| } | } | ||||
| } | |||||
| extern "C" __global__ void | extern "C" __global__ void | ||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | ||||
| kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) | kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) | ||||
| { | { | ||||
| int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | ||||
| int y = sy + blockDim.y*blockIdx.y + threadIdx.y; | int y = sy + blockDim.y*blockIdx.y + threadIdx.y; | ||||
| if(x < sx + sw && y < sy + sh) | if(x < sx + sw && y < sy + sh) { | ||||
| kernel_film_convert_to_half_float(NULL, rgba, buffer, sample_scale, x, y, offset, stride); | kernel_film_convert_to_half_float(NULL, rgba, buffer, sample_scale, x, y, offset, stride); | ||||
| } | } | ||||
| } | |||||
| extern "C" __global__ void | extern "C" __global__ void | ||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | ||||
| kernel_cuda_shader(uint4 *input, | kernel_cuda_shader(uint4 *input, | ||||
| float4 *output, | float4 *output, | ||||
| float *output_luma, | float *output_luma, | ||||
| int type, | int type, | ||||
| int sx, | int sx, | ||||
| int sw, | int sw, | ||||
| int offset, | int offset, | ||||
| int sample) | int sample) | ||||
| { | { | ||||
| int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | ||||
| if(x < sx + sw) { | if(x < sx + sw) { | ||||
| kernel_shader_evaluate(NULL, | KernelGlobals kg; | ||||
| kernel_shader_evaluate(&kg, | |||||
| input, | input, | ||||
| output, | output, | ||||
| output_luma, | output_luma, | ||||
| (ShaderEvalType)type, | (ShaderEvalType)type, | ||||
| x, | x, | ||||
| sample); | sample); | ||||
| } | } | ||||
| } | } | ||||
| #ifdef __BAKING__ | #ifdef __BAKING__ | ||||
| extern "C" __global__ void | extern "C" __global__ void | ||||
| CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | ||||
| kernel_cuda_bake(uint4 *input, float4 *output, int type, int filter, int sx, int sw, int offset, int sample) | kernel_cuda_bake(uint4 *input, float4 *output, int type, int filter, int sx, int sw, int offset, int sample) | ||||
| { | { | ||||
| int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | ||||
| if(x < sx + sw) | if(x < sx + sw) { | ||||
| kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, filter, x, offset, sample); | KernelGlobals kg; | ||||
| kernel_bake_evaluate(&kg, input, output, (ShaderEvalType)type, filter, x, offset, sample); | |||||
| } | |||||
| } | } | ||||
| #endif | #endif | ||||
| #endif | #endif | ||||