Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/device/gpu/kernel.h
| Show First 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | |||||
| #include "kernel/film/read.h" | #include "kernel/film/read.h" | ||||
| /* -------------------------------------------------------------------- | /* -------------------------------------------------------------------- | ||||
| * Integrator. | * Integrator. | ||||
| */ | */ | ||||
| ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) | ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) | ||||
| ccl_gpu_kernel_signature(integrator_reset, | ccl_gpu_kernel_signature(integrator_reset, int num_states) | ||||
| int num_states) | |||||
| { | { | ||||
| const int state = ccl_gpu_global_id_x(); | const int state = ccl_gpu_global_id_x(); | ||||
| if (state < num_states) { | if (state < num_states) { | ||||
| INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; | INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; | ||||
| INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; | INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; | ||||
| } | } | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 191 Lines • ▼ Show 20 Lines | ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) | ||||
| const int global_index = ccl_gpu_global_id_x(); | const int global_index = ccl_gpu_global_id_x(); | ||||
| if (global_index < work_size) { | if (global_index < work_size) { | ||||
| const int state = (path_index_array) ? path_index_array[global_index] : global_index; | const int state = (path_index_array) ? path_index_array[global_index] : global_index; | ||||
| ccl_gpu_kernel_call(integrator_shade_volume(NULL, state, render_buffer)); | ccl_gpu_kernel_call(integrator_shade_volume(NULL, state, render_buffer)); | ||||
| } | } | ||||
| } | } | ||||
| ccl_gpu_kernel(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) | ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) | ||||
| ccl_gpu_kernel_signature(integrator_queued_paths_array, | ccl_gpu_kernel_signature(integrator_queued_paths_array, | ||||
| int num_states, | int num_states, | ||||
| ccl_global int *indices, | ccl_global int *indices, | ||||
| ccl_global int *num_indices, | ccl_global int *num_indices, | ||||
| int kernel_index) | int kernel_index) | ||||
| { | { | ||||
| ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) == kernel_index, | ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) == kernel_index, | ||||
| int kernel_index) | int kernel_index); | ||||
| .kernel_index = kernel_index; | ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index; | ||||
| gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( | gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( | ||||
| num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); | num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); | ||||
| } | } | ||||
| ccl_gpu_kernel(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) | ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) | ||||
| ccl_gpu_kernel_signature(integrator_queued_shadow_paths_array, | ccl_gpu_kernel_signature(integrator_queued_shadow_paths_array, | ||||
| int num_states, | int num_states, | ||||
| ccl_global int *indices, | ccl_global int *indices, | ||||
| ccl_global int *num_indices, | ccl_global int *num_indices, | ||||
| int kernel_index) | int kernel_index) | ||||
| { | { | ||||
| ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, shadow_path, queued_kernel) == kernel_index, | ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, shadow_path, queued_kernel) == kernel_index, | ||||
| int kernel_index) | int kernel_index); | ||||
| .kernel_index = kernel_index; | ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index; | ||||
| gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( | gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( | ||||
| num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); | num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); | ||||
| } | } | ||||
| ccl_gpu_kernel(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) | ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) | ||||
| ccl_gpu_kernel_signature(integrator_active_paths_array, | ccl_gpu_kernel_signature(integrator_active_paths_array, | ||||
| int num_states, | int num_states, | ||||
| ccl_global int *indices, | ccl_global int *indices, | ||||
| ccl_global int *num_indices) | ccl_global int *num_indices) | ||||
| { | { | ||||
| ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) != 0); | ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) != 0); | ||||
| gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( | gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( | ||||
| num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); | num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); | ||||
| } | } | ||||
| ccl_gpu_kernel(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) | ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) | ||||
| ccl_gpu_kernel_signature(integrator_terminated_paths_array, | ccl_gpu_kernel_signature(integrator_terminated_paths_array, | ||||
| int num_states, | int num_states, | ||||
| ccl_global int *indices, | ccl_global int *indices, | ||||
| ccl_global int *num_indices, | ccl_global int *num_indices, | ||||
| int indices_offset) | int indices_offset) | ||||
| { | { | ||||
| ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) == 0); | ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, path, queued_kernel) == 0); | ||||
| gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( | gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( | ||||
| num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass); | num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass); | ||||
| } | } | ||||
| ccl_gpu_kernel(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) | ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) | ||||
| ccl_gpu_kernel_signature(integrator_terminated_shadow_paths_array, | ccl_gpu_kernel_signature(integrator_terminated_shadow_paths_array, | ||||
| int num_states, | int num_states, | ||||
| ccl_global int *indices, | ccl_global int *indices, | ||||
| ccl_global int *num_indices, | ccl_global int *num_indices, | ||||
| int indices_offset) | int indices_offset) | ||||
| { | { | ||||
| ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0); | ccl_gpu_kernel_lambda(INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0); | ||||
| gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( | gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( | ||||
| num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass); | num_states, indices + indices_offset, num_indices, ccl_gpu_kernel_lambda_pass); | ||||
| } | } | ||||
| ccl_gpu_kernel(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) | ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) | ||||
| ccl_gpu_kernel_signature(integrator_sorted_paths_array, | ccl_gpu_kernel_signature(integrator_sorted_paths_array, | ||||
| int num_states, | int num_states, | ||||
| int num_states_limit, | int num_states_limit, | ||||
| ccl_global int *indices, | ccl_global int *indices, | ||||
| ccl_global int *num_indices, | ccl_global int *num_indices, | ||||
| ccl_global int *key_counter, | ccl_global int *key_counter, | ||||
| ccl_global int *key_prefix_sum, | ccl_global int *key_prefix_sum, | ||||
| int kernel_index) | int kernel_index) | ||||
| { | { | ||||
| ccl_gpu_kernel_lambda((INTEGRATOR_STATE(state, path, queued_kernel) == kernel_index) ? | ccl_gpu_kernel_lambda((INTEGRATOR_STATE(state, path, queued_kernel) == kernel_index) ? | ||||
| INTEGRATOR_STATE(state, path, shader_sort_key) : | INTEGRATOR_STATE(state, path, shader_sort_key) : | ||||
| GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY, | GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY, | ||||
| int kernel_index) | int kernel_index); | ||||
| .kernel_index = kernel_index; | ccl_gpu_kernel_lambda_pass.kernel_index = kernel_index; | ||||
| const uint state_index = ccl_gpu_global_id_x(); | const uint state_index = ccl_gpu_global_id_x(); | ||||
| gpu_parallel_sorted_index_array( | gpu_parallel_sorted_index_array(state_index, | ||||
| state_index, | |||||
| num_states, | num_states, | ||||
| num_states_limit, | num_states_limit, | ||||
| indices, | indices, | ||||
| num_indices, | num_indices, | ||||
| key_counter, | key_counter, | ||||
| key_prefix_sum, | key_prefix_sum, | ||||
| ccl_gpu_kernel_lambda_pass); | ccl_gpu_kernel_lambda_pass); | ||||
| } | } | ||||
| ccl_gpu_kernel(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) | ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) | ||||
| ccl_gpu_kernel_signature(integrator_compact_paths_array, | ccl_gpu_kernel_signature(integrator_compact_paths_array, | ||||
| int num_states, | int num_states, | ||||
| ccl_global int *indices, | ccl_global int *indices, | ||||
| ccl_global int *num_indices, | ccl_global int *num_indices, | ||||
| int num_active_paths) | int num_active_paths) | ||||
| { | { | ||||
| ccl_gpu_kernel_lambda((state >= num_active_paths) && (INTEGRATOR_STATE(state, path, queued_kernel) != 0), | ccl_gpu_kernel_lambda((state >= num_active_paths) && | ||||
| int num_active_paths) | (INTEGRATOR_STATE(state, path, queued_kernel) != 0), | ||||
| .num_active_paths = num_active_paths; | int num_active_paths); | ||||
| ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths; | |||||
| gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( | gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( | ||||
| num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); | num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); | ||||
| } | } | ||||
| ccl_gpu_kernel(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) | ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) | ||||
| ccl_gpu_kernel_signature(integrator_compact_states, | ccl_gpu_kernel_signature(integrator_compact_states, | ||||
| ccl_global const int *active_terminated_states, | ccl_global const int *active_terminated_states, | ||||
| const int active_states_offset, | const int active_states_offset, | ||||
| const int terminated_states_offset, | const int terminated_states_offset, | ||||
| const int work_size) | const int work_size) | ||||
| { | { | ||||
| const int global_index = ccl_gpu_global_id_x(); | const int global_index = ccl_gpu_global_id_x(); | ||||
| if (global_index < work_size) { | if (global_index < work_size) { | ||||
| const int from_state = active_terminated_states[active_states_offset + global_index]; | const int from_state = active_terminated_states[active_states_offset + global_index]; | ||||
| const int to_state = active_terminated_states[terminated_states_offset + global_index]; | const int to_state = active_terminated_states[terminated_states_offset + global_index]; | ||||
| ccl_gpu_kernel_call(integrator_state_move(NULL, to_state, from_state)); | ccl_gpu_kernel_call(integrator_state_move(NULL, to_state, from_state)); | ||||
| } | } | ||||
| } | } | ||||
| ccl_gpu_kernel(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) | ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) | ||||
| ccl_gpu_kernel_signature(integrator_compact_shadow_paths_array, | ccl_gpu_kernel_signature(integrator_compact_shadow_paths_array, | ||||
| int num_states, | int num_states, | ||||
| ccl_global int *indices, | ccl_global int *indices, | ||||
| ccl_global int *num_indices, | ccl_global int *num_indices, | ||||
| int num_active_paths) | int num_active_paths) | ||||
| { | { | ||||
| ccl_gpu_kernel_lambda((state >= num_active_paths) && (INTEGRATOR_STATE(state, shadow_path, queued_kernel) != 0), | ccl_gpu_kernel_lambda((state >= num_active_paths) && | ||||
| int num_active_paths) | (INTEGRATOR_STATE(state, shadow_path, queued_kernel) != 0), | ||||
| .num_active_paths = num_active_paths; | int num_active_paths); | ||||
| ccl_gpu_kernel_lambda_pass.num_active_paths = num_active_paths; | |||||
| gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( | gpu_parallel_active_index_array<GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE>( | ||||
| num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); | num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass); | ||||
| } | } | ||||
| ccl_gpu_kernel(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) | ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE) | ||||
| ccl_gpu_kernel_signature(integrator_compact_shadow_states, | ccl_gpu_kernel_signature(integrator_compact_shadow_states, | ||||
| ccl_global const int *active_terminated_states, | ccl_global const int *active_terminated_states, | ||||
| const int active_states_offset, | const int active_states_offset, | ||||
| const int terminated_states_offset, | const int terminated_states_offset, | ||||
| const int work_size) | const int work_size) | ||||
| { | { | ||||
| const int global_index = ccl_gpu_global_id_x(); | const int global_index = ccl_gpu_global_id_x(); | ||||
| if (global_index < work_size) { | if (global_index < work_size) { | ||||
| const int from_state = active_terminated_states[active_states_offset + global_index]; | const int from_state = active_terminated_states[active_states_offset + global_index]; | ||||
| const int to_state = active_terminated_states[terminated_states_offset + global_index]; | const int to_state = active_terminated_states[terminated_states_offset + global_index]; | ||||
| ccl_gpu_kernel_call(integrator_shadow_state_move(NULL, to_state, from_state)); | ccl_gpu_kernel_call(integrator_shadow_state_move(NULL, to_state, from_state)); | ||||
| } | } | ||||
| } | } | ||||
| ccl_gpu_kernel(GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature( | ccl_gpu_kernel_threads(GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature( | ||||
| prefix_sum, ccl_global int *counter, ccl_global int *prefix_sum, int num_values) | prefix_sum, ccl_global int *counter, ccl_global int *prefix_sum, int num_values) | ||||
| { | { | ||||
| gpu_parallel_prefix_sum(ccl_gpu_global_id_x(), counter, prefix_sum, num_values); | gpu_parallel_prefix_sum(ccl_gpu_global_id_x(), counter, prefix_sum, num_values); | ||||
| } | } | ||||
| /* -------------------------------------------------------------------- | /* -------------------------------------------------------------------- | ||||
| * Adaptive sampling. | * Adaptive sampling. | ||||
| */ | */ | ||||
| ▲ Show 20 Lines • Show All 85 Lines • ▼ Show 20 Lines | |||||
| /* -------------------------------------------------------------------- | /* -------------------------------------------------------------------- | ||||
| * Film. | * Film. | ||||
| */ | */ | ||||
| #define KERNEL_FILM_CONVERT_VARIANT(variant, input_channel_count) \ | #define KERNEL_FILM_CONVERT_VARIANT(variant, input_channel_count) \ | ||||
| ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \ | ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \ | ||||
| ccl_gpu_kernel_signature(film_convert_##variant, \ | ccl_gpu_kernel_signature(film_convert_##variant, \ | ||||
| const KernelFilmConvert kfilm_convert, \ | const KernelFilmConvert kfilm_convert, \ | ||||
| ccl_global uchar4 *rgba, \ | ccl_global float *pixels, \ | ||||
| ccl_global float *render_buffer, \ | ccl_global float *render_buffer, \ | ||||
| int num_pixels, \ | int num_pixels, \ | ||||
| int width, \ | int width, \ | ||||
| int offset, \ | int offset, \ | ||||
| int stride, \ | int stride, \ | ||||
| int rgba_offset, \ | int rgba_offset, \ | ||||
| int rgba_stride) \ | int rgba_stride) \ | ||||
| { \ | { \ | ||||
| const int render_pixel_index = ccl_gpu_global_id_x(); \ | const int render_pixel_index = ccl_gpu_global_id_x(); \ | ||||
| if (render_pixel_index >= num_pixels) { \ | if (render_pixel_index >= num_pixels) { \ | ||||
| return; \ | return; \ | ||||
| } \ | } \ | ||||
| \ | \ | ||||
| const int x = render_pixel_index % width; \ | const int x = render_pixel_index % width; \ | ||||
| const int y = render_pixel_index / width; \ | const int y = render_pixel_index / width; \ | ||||
| \ | \ | ||||
| ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert.pass_stride + \ | ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert.pass_stride + \ | ||||
| y * stride * kfilm_convert.pass_stride; \ | y * stride * kfilm_convert.pass_stride; \ | ||||
| \ | \ | ||||
| float pixel[4]; \ | ccl_global float *pixel = pixels + \ | ||||
| film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); \ | (render_pixel_index + rgba_offset) * kfilm_convert.pixel_stride; \ | ||||
| \ | |||||
| film_apply_pass_pixel_overlays_rgba(&kfilm_convert, buffer, pixel); \ | |||||
| \ | |||||
| if (input_channel_count == 1) { \ | |||||
| pixel[1] = pixel[2] = pixel[0]; \ | |||||
| } \ | |||||
| if (input_channel_count <= 3) { \ | |||||
| pixel[3] = 1.0f; \ | |||||
| } \ | |||||
| \ | \ | ||||
| ccl_global float *out = ((ccl_global float *)rgba) + rgba_offset + y * rgba_stride + x; \ | film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); \ | ||||
| *(ccl_global float4 *)out = make_float4(pixel[0], pixel[1], pixel[2], pixel[3]); \ | |||||
| } \ | } \ | ||||
| \ | \ | ||||
| ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \ | ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \ | ||||
| ccl_gpu_kernel_signature(film_convert_##variant##_half_rgba, \ | ccl_gpu_kernel_signature(film_convert_##variant##_half_rgba, \ | ||||
| const KernelFilmConvert kfilm_convert, \ | const KernelFilmConvert kfilm_convert, \ | ||||
| ccl_global uchar4 *rgba, \ | ccl_global uchar4 *rgba, \ | ||||
| ccl_global float *render_buffer, \ | ccl_global float *render_buffer, \ | ||||
| int num_pixels, \ | int num_pixels, \ | ||||
| Show All 12 Lines | \ | ||||
| const int y = render_pixel_index / width; \ | const int y = render_pixel_index / width; \ | ||||
| \ | \ | ||||
| ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert.pass_stride + \ | ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert.pass_stride + \ | ||||
| y * stride * kfilm_convert.pass_stride; \ | y * stride * kfilm_convert.pass_stride; \ | ||||
| \ | \ | ||||
| float pixel[4]; \ | float pixel[4]; \ | ||||
| film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); \ | film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); \ | ||||
| \ | \ | ||||
| film_apply_pass_pixel_overlays_rgba(&kfilm_convert, buffer, pixel); \ | |||||
| \ | |||||
| if (input_channel_count == 1) { \ | if (input_channel_count == 1) { \ | ||||
| pixel[1] = pixel[2] = pixel[0]; \ | pixel[1] = pixel[2] = pixel[0]; \ | ||||
| } \ | } \ | ||||
| if (input_channel_count <= 3) { \ | if (input_channel_count <= 3) { \ | ||||
| pixel[3] = 1.0f; \ | pixel[3] = 1.0f; \ | ||||
| } \ | } \ | ||||
| \ | \ | ||||
| ccl_global half4 *out = ((ccl_global half4 *)rgba) + (rgba_offset + y * rgba_stride + x); \ | film_apply_pass_pixel_overlays_rgba(&kfilm_convert, buffer, pixel); \ | ||||
| \ | |||||
| ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x; \ | |||||
| *out = float4_to_half4_display(make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); \ | *out = float4_to_half4_display(make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); \ | ||||
| } | } | ||||
| /* 1 channel inputs */ | /* 1 channel inputs */ | ||||
| KERNEL_FILM_CONVERT_VARIANT(depth, 1) | KERNEL_FILM_CONVERT_VARIANT(depth, 1) | ||||
| KERNEL_FILM_CONVERT_VARIANT(mist, 1) | KERNEL_FILM_CONVERT_VARIANT(mist, 1) | ||||
| KERNEL_FILM_CONVERT_VARIANT(sample_count, 1) | KERNEL_FILM_CONVERT_VARIANT(sample_count, 1) | ||||
| KERNEL_FILM_CONVERT_VARIANT(float, 1) | KERNEL_FILM_CONVERT_VARIANT(float, 1) | ||||
| /* 3 channel inputs */ | /* 3 channel inputs */ | ||||
| KERNEL_FILM_CONVERT_VARIANT(light_path, 3) | KERNEL_FILM_CONVERT_VARIANT(light_path, 3) | ||||
| KERNEL_FILM_CONVERT_VARIANT(float3, 3) | KERNEL_FILM_CONVERT_VARIANT(float3, 3) | ||||
| /* 4 channel inputs */ | /* 4 channel inputs */ | ||||
| KERNEL_FILM_CONVERT_VARIANT(motion, 4) | KERNEL_FILM_CONVERT_VARIANT(motion, 4) | ||||
| KERNEL_FILM_CONVERT_VARIANT(cryptomatte, 4) | KERNEL_FILM_CONVERT_VARIANT(cryptomatte, 4) | ||||
| KERNEL_FILM_CONVERT_VARIANT(shadow_catcher, 4) | KERNEL_FILM_CONVERT_VARIANT(shadow_catcher, 4) | ||||
| KERNEL_FILM_CONVERT_VARIANT(shadow_catcher_matte_with_shadow, 4) | KERNEL_FILM_CONVERT_VARIANT(shadow_catcher_matte_with_shadow, 4) | ||||
| KERNEL_FILM_CONVERT_VARIANT(combined, 4) | KERNEL_FILM_CONVERT_VARIANT(combined, 4) | ||||
| KERNEL_FILM_CONVERT_VARIANT(float4, 4) | KERNEL_FILM_CONVERT_VARIANT(float4, 4) | ||||
| #undef KERNEL_FILM_CONVERT_VARIANT | |||||
| /* -------------------------------------------------------------------- | /* -------------------------------------------------------------------- | ||||
| * Shader evaluation. | * Shader evaluation. | ||||
| */ | */ | ||||
| /* Displacement */ | /* Displacement */ | ||||
| ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) | ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) | ||||
| ccl_gpu_kernel_signature(shader_eval_displace, | ccl_gpu_kernel_signature(shader_eval_displace, | ||||
| ▲ Show 20 Lines • Show All 253 Lines • Show Last 20 Lines | |||||