Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/device/gpu/kernel.h
| Show First 20 Lines • Show All 406 Lines • ▼ Show 20 Lines | |||||
| */ | */ | ||||
| /* Common implementation for float destination. */ | /* Common implementation for float destination. */ | ||||
| template<typename Processor> | template<typename Processor> | ||||
| ccl_device_inline void kernel_gpu_film_convert_common(const KernelFilmConvert *kfilm_convert, | ccl_device_inline void kernel_gpu_film_convert_common(const KernelFilmConvert *kfilm_convert, | ||||
| float *pixels, | float *pixels, | ||||
| float *render_buffer, | float *render_buffer, | ||||
| int num_pixels, | int num_pixels, | ||||
| int width, | |||||
| int offset, | int offset, | ||||
| int stride, | int stride, | ||||
| int dst_offset, | int dst_offset, | ||||
| int dst_stride, | |||||
| const Processor &processor) | const Processor &processor) | ||||
| { | { | ||||
| const int render_pixel_index = ccl_gpu_global_id_x(); | const int render_pixel_index = ccl_gpu_global_id_x(); | ||||
| if (render_pixel_index >= num_pixels) { | if (render_pixel_index >= num_pixels) { | ||||
| return; | return; | ||||
| } | } | ||||
| const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * kfilm_convert->pass_stride; | const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * kfilm_convert->pass_stride; | ||||
| ccl_global const float *buffer = render_buffer + render_buffer_offset; | ccl_global const float *buffer = render_buffer + render_buffer_offset; | ||||
| ccl_global float *pixel = pixels + | ccl_global float *pixel = pixels + | ||||
| (render_pixel_index + dst_offset) * kfilm_convert->pixel_stride; | (render_pixel_index + dst_offset) * kfilm_convert->pixel_stride; | ||||
| processor(kfilm_convert, buffer, pixel); | processor(kfilm_convert, buffer, pixel); | ||||
| } | } | ||||
| /* Common implementation for half4 destination and 4-channel input pass. */ | /* Common implementation for half4 destination and 4-channel input pass. */ | ||||
| template<typename Processor> | template<typename Processor> | ||||
| ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba( | ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba( | ||||
| const KernelFilmConvert *kfilm_convert, | const KernelFilmConvert *kfilm_convert, | ||||
| uchar4 *rgba, | uchar4 *rgba, | ||||
| float *render_buffer, | float *render_buffer, | ||||
| int num_pixels, | int num_pixels, | ||||
| int width, | |||||
| int offset, | int offset, | ||||
| int stride, | int stride, | ||||
| int rgba_offset, | int rgba_offset, | ||||
| int rgba_stride, | |||||
| const Processor &processor) | const Processor &processor) | ||||
| { | { | ||||
| const int render_pixel_index = ccl_gpu_global_id_x(); | const int render_pixel_index = ccl_gpu_global_id_x(); | ||||
| if (render_pixel_index >= num_pixels) { | if (render_pixel_index >= num_pixels) { | ||||
| return; | return; | ||||
| } | } | ||||
| const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * kfilm_convert->pass_stride; | const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * kfilm_convert->pass_stride; | ||||
| ccl_global const float *buffer = render_buffer + render_buffer_offset; | ccl_global const float *buffer = render_buffer + render_buffer_offset; | ||||
| float pixel[4]; | float pixel[4]; | ||||
| processor(kfilm_convert, buffer, pixel); | processor(kfilm_convert, buffer, pixel); | ||||
| film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel); | film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel); | ||||
| ccl_global half *out = (ccl_global half *)rgba + (rgba_offset + render_pixel_index) * 4; | const int x = render_pixel_index % width; | ||||
| float4_store_half(out, make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); | const int y = render_pixel_index / width; | ||||
| ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x; | |||||
| float4_store_half((ccl_global half *)out, make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); | |||||
| } | } | ||||
| /* Common implementation for half4 destination and 3-channel input pass. */ | /* Common implementation for half4 destination and 3-channel input pass. */ | ||||
| template<typename Processor> | template<typename Processor> | ||||
| ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgb( | ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgb( | ||||
| const KernelFilmConvert *kfilm_convert, | const KernelFilmConvert *kfilm_convert, | ||||
| uchar4 *rgba, | uchar4 *rgba, | ||||
| float *render_buffer, | float *render_buffer, | ||||
| int num_pixels, | int num_pixels, | ||||
| int width, | |||||
| int offset, | int offset, | ||||
| int stride, | int stride, | ||||
| int rgba_offset, | int rgba_offset, | ||||
| int rgba_stride, | |||||
| const Processor &processor) | const Processor &processor) | ||||
| { | { | ||||
| kernel_gpu_film_convert_half_rgba_common_rgba( | kernel_gpu_film_convert_half_rgba_common_rgba( | ||||
| kfilm_convert, | kfilm_convert, | ||||
| rgba, | rgba, | ||||
| render_buffer, | render_buffer, | ||||
| num_pixels, | num_pixels, | ||||
| width, | |||||
| offset, | offset, | ||||
| stride, | stride, | ||||
| rgba_offset, | rgba_offset, | ||||
| rgba_stride, | |||||
| [&processor](const KernelFilmConvert *kfilm_convert, | [&processor](const KernelFilmConvert *kfilm_convert, | ||||
| ccl_global const float *buffer, | ccl_global const float *buffer, | ||||
| float *pixel_rgba) { | float *pixel_rgba) { | ||||
| processor(kfilm_convert, buffer, pixel_rgba); | processor(kfilm_convert, buffer, pixel_rgba); | ||||
| pixel_rgba[3] = 1.0f; | pixel_rgba[3] = 1.0f; | ||||
| }); | }); | ||||
| } | } | ||||
| /* Common implementation for half4 destination and single channel input pass. */ | /* Common implementation for half4 destination and single channel input pass. */ | ||||
| template<typename Processor> | template<typename Processor> | ||||
| ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_value( | ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_value( | ||||
| const KernelFilmConvert *kfilm_convert, | const KernelFilmConvert *kfilm_convert, | ||||
| uchar4 *rgba, | uchar4 *rgba, | ||||
| float *render_buffer, | float *render_buffer, | ||||
| int num_pixels, | int num_pixels, | ||||
| int width, | |||||
| int offset, | int offset, | ||||
| int stride, | int stride, | ||||
| int rgba_offset, | int rgba_offset, | ||||
| int rgba_stride, | |||||
| const Processor &processor) | const Processor &processor) | ||||
| { | { | ||||
| kernel_gpu_film_convert_half_rgba_common_rgba( | kernel_gpu_film_convert_half_rgba_common_rgba( | ||||
| kfilm_convert, | kfilm_convert, | ||||
| rgba, | rgba, | ||||
| render_buffer, | render_buffer, | ||||
| num_pixels, | num_pixels, | ||||
| width, | |||||
| offset, | offset, | ||||
| stride, | stride, | ||||
| rgba_offset, | rgba_offset, | ||||
| rgba_stride, | |||||
| [&processor](const KernelFilmConvert *kfilm_convert, | [&processor](const KernelFilmConvert *kfilm_convert, | ||||
| ccl_global const float *buffer, | ccl_global const float *buffer, | ||||
| float *pixel_rgba) { | float *pixel_rgba) { | ||||
| float value; | float value; | ||||
| processor(kfilm_convert, buffer, &value); | processor(kfilm_convert, buffer, &value); | ||||
| pixel_rgba[0] = value; | pixel_rgba[0] = value; | ||||
| pixel_rgba[1] = value; | pixel_rgba[1] = value; | ||||
| pixel_rgba[2] = value; | pixel_rgba[2] = value; | ||||
| pixel_rgba[3] = 1.0f; | pixel_rgba[3] = 1.0f; | ||||
| }); | }); | ||||
| } | } | ||||
| #define KERNEL_FILM_CONVERT_PROC(name) \ | #define KERNEL_FILM_CONVERT_PROC(name) \ | ||||
| ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) name | ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) name | ||||
| #define KERNEL_FILM_CONVERT_DEFINE(variant, channels) \ | #define KERNEL_FILM_CONVERT_DEFINE(variant, channels) \ | ||||
| KERNEL_FILM_CONVERT_PROC(kernel_gpu_film_convert_##variant) \ | KERNEL_FILM_CONVERT_PROC(kernel_gpu_film_convert_##variant) \ | ||||
| (const KernelFilmConvert kfilm_convert, \ | (const KernelFilmConvert kfilm_convert, \ | ||||
| float *pixels, \ | float *pixels, \ | ||||
| float *render_buffer, \ | float *render_buffer, \ | ||||
| int num_pixels, \ | int num_pixels, \ | ||||
| int width, \ | |||||
| int offset, \ | int offset, \ | ||||
| int stride, \ | int stride, \ | ||||
| int rgba_offset) \ | int rgba_offset, \ | ||||
| int rgba_stride) \ | |||||
| { \ | { \ | ||||
| kernel_gpu_film_convert_common(&kfilm_convert, \ | kernel_gpu_film_convert_common(&kfilm_convert, \ | ||||
| pixels, \ | pixels, \ | ||||
| render_buffer, \ | render_buffer, \ | ||||
| num_pixels, \ | num_pixels, \ | ||||
| width, \ | |||||
| offset, \ | offset, \ | ||||
| stride, \ | stride, \ | ||||
| rgba_offset, \ | rgba_offset, \ | ||||
| rgba_stride, \ | |||||
| film_get_pass_pixel_##variant); \ | film_get_pass_pixel_##variant); \ | ||||
| } \ | } \ | ||||
| KERNEL_FILM_CONVERT_PROC(kernel_gpu_film_convert_##variant##_half_rgba) \ | KERNEL_FILM_CONVERT_PROC(kernel_gpu_film_convert_##variant##_half_rgba) \ | ||||
| (const KernelFilmConvert kfilm_convert, \ | (const KernelFilmConvert kfilm_convert, \ | ||||
| uchar4 *rgba, \ | uchar4 *rgba, \ | ||||
| float *render_buffer, \ | float *render_buffer, \ | ||||
| int num_pixels, \ | int num_pixels, \ | ||||
| int width, \ | |||||
| int offset, \ | int offset, \ | ||||
| int stride, \ | int stride, \ | ||||
| int rgba_offset) \ | int rgba_offset, \ | ||||
| int rgba_stride) \ | |||||
| { \ | { \ | ||||
| kernel_gpu_film_convert_half_rgba_common_##channels(&kfilm_convert, \ | kernel_gpu_film_convert_half_rgba_common_##channels(&kfilm_convert, \ | ||||
| rgba, \ | rgba, \ | ||||
| render_buffer, \ | render_buffer, \ | ||||
| num_pixels, \ | num_pixels, \ | ||||
| width, \ | |||||
| offset, \ | offset, \ | ||||
| stride, \ | stride, \ | ||||
| rgba_offset, \ | rgba_offset, \ | ||||
| rgba_stride, \ | |||||
| film_get_pass_pixel_##variant); \ | film_get_pass_pixel_##variant); \ | ||||
| } | } | ||||
| KERNEL_FILM_CONVERT_DEFINE(depth, value) | KERNEL_FILM_CONVERT_DEFINE(depth, value) | ||||
| KERNEL_FILM_CONVERT_DEFINE(mist, value) | KERNEL_FILM_CONVERT_DEFINE(mist, value) | ||||
| KERNEL_FILM_CONVERT_DEFINE(sample_count, value) | KERNEL_FILM_CONVERT_DEFINE(sample_count, value) | ||||
| KERNEL_FILM_CONVERT_DEFINE(float, value) | KERNEL_FILM_CONVERT_DEFINE(float, value) | ||||
| ▲ Show 20 Lines • Show All 252 Lines • Show Last 20 Lines | |||||