Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/kernel_passes.h
| Show First 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | #if defined(__SPLIT_KERNEL__) | ||||
| atomic_add_and_fetch_float(buf_z, value.z); | atomic_add_and_fetch_float(buf_z, value.z); | ||||
| atomic_add_and_fetch_float(buf_w, value.w); | atomic_add_and_fetch_float(buf_w, value.w); | ||||
| #else | #else | ||||
| ccl_global float4 *buf = (ccl_global float4*)buffer; | ccl_global float4 *buf = (ccl_global float4*)buffer; | ||||
| *buf = (sample == 0)? value: *buf + value; | *buf = (sample == 0)? value: *buf + value; | ||||
| #endif /* __SPLIT_KERNEL__ */ | #endif /* __SPLIT_KERNEL__ */ | ||||
| } | } | ||||
| #ifdef __DENOISING_FEATURES__ | |||||
| ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, int sample, float value) | |||||
| { | |||||
| kernel_write_pass_float(buffer, sample, value); | |||||
| /* The online one-pass variance update that's used for the megakernel can't easily be implemented | |||||
| * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */ | |||||
| # ifdef __SPLIT_KERNEL__ | |||||
| kernel_write_pass_float(buffer+1, sample, value*value); | |||||
| # else | |||||
| if(sample == 0) { | |||||
| kernel_write_pass_float(buffer+1, sample, 0.0f); | |||||
| } | |||||
| else { | |||||
| float new_mean = buffer[0] * (1.0f / (sample + 1)); | |||||
| float old_mean = (buffer[0] - value) * (1.0f / sample); | |||||
| kernel_write_pass_float(buffer+1, sample, (value - new_mean) * (value - old_mean)); | |||||
| } | |||||
| # endif | |||||
| } | |||||
| # if defined(__SPLIT_KERNEL__) | |||||
| # define kernel_write_pass_float3_unaligned kernel_write_pass_float3 | |||||
| # else | |||||
| ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, int sample, float3 value) | |||||
| { | |||||
| buffer[0] = (sample == 0)? value.x: buffer[0] + value.x; | |||||
| buffer[1] = (sample == 0)? value.y: buffer[1] + value.y; | |||||
| buffer[2] = (sample == 0)? value.z: buffer[2] + value.z; | |||||
| } | |||||
| # endif | |||||
| ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, int sample, float3 value) | |||||
| { | |||||
| kernel_write_pass_float3_unaligned(buffer, sample, value); | |||||
| # ifdef __SPLIT_KERNEL__ | |||||
| kernel_write_pass_float3_unaligned(buffer+3, sample, value*value); | |||||
| # else | |||||
| if(sample == 0) { | |||||
| kernel_write_pass_float3_unaligned(buffer+3, sample, make_float3(0.0f, 0.0f, 0.0f)); | |||||
| } | |||||
| else { | |||||
| float3 sum = make_float3(buffer[0], buffer[1], buffer[2]); | |||||
| float3 new_mean = sum * (1.0f / (sample + 1)); | |||||
| float3 old_mean = (sum - value) * (1.0f / sample); | |||||
| kernel_write_pass_float3_unaligned(buffer+3, sample, (value - new_mean) * (value - old_mean)); | |||||
| } | |||||
| # endif | |||||
| } | |||||
| ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_global float *buffer, | |||||
| int sample, float path_total, float path_total_shaded) | |||||
| { | |||||
| if(kernel_data.film.pass_denoising_data == 0) | |||||
| return; | |||||
| buffer += (sample & 1)? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A; | |||||
brecht: A comment about or getting rid of these magic numbers would be good. | |||||
| path_total = ensure_finite(path_total); | |||||
| path_total_shaded = ensure_finite(path_total_shaded); | |||||
| kernel_write_pass_float(buffer, sample/2, path_total); | |||||
| kernel_write_pass_float(buffer+1, sample/2, path_total_shaded); | |||||
| float value = path_total_shaded / max(path_total, 1e-7f); | |||||
| # ifdef __SPLIT_KERNEL__ | |||||
| kernel_write_pass_float(buffer+2, sample/2, value*value); | |||||
| # else | |||||
| if(sample < 2) { | |||||
| kernel_write_pass_float(buffer+2, sample/2, 0.0f); | |||||
| } | |||||
| else { | |||||
| float old_value = (buffer[1] - path_total_shaded) / max(buffer[0] - path_total, 1e-7f); | |||||
| float new_value = buffer[1] / max(buffer[0], 1e-7f); | |||||
| kernel_write_pass_float(buffer+2, sample, (value - new_value) * (value - old_value)); | |||||
| } | |||||
| # endif | |||||
| } | |||||
| #endif /* __DENOISING_FEATURES__ */ | |||||
| ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg, | |||||
| ShaderData *sd, | |||||
| ccl_global PathState *state, | |||||
| PathRadiance *L) | |||||
| { | |||||
| #ifdef __DENOISING_FEATURES__ | |||||
| if(state->denoising_feature_weight == 0.0f) { | |||||
| return; | |||||
| } | |||||
| L->denoising_depth += ensure_finite(state->denoising_feature_weight * sd->ray_length); | |||||
| float3 normal = make_float3(0.0f, 0.0f, 0.0f); | |||||
| float3 albedo = make_float3(0.0f, 0.0f, 0.0f); | |||||
| float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f; | |||||
| for(int i = 0; i < sd->num_closure; i++) { | |||||
| ShaderClosure *sc = &sd->closure[i]; | |||||
| if(!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) | |||||
| continue; | |||||
| /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */ | |||||
| normal += sc->N * sc->sample_weight; | |||||
| sum_weight += sc->sample_weight; | |||||
| if(!bsdf_is_specular_like(sc)) { | |||||
| albedo += sc->weight; | |||||
| sum_nonspecular_weight += sc->sample_weight; | |||||
| } | |||||
| } | |||||
| /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */ | |||||
Not Done Inline ActionsCan we make a bsdf_roughness() function in closures/bsdf.h for this? brecht: Can we make a `bsdf_roughness()` function in `closures/bsdf.h` for this? | |||||
| if((sum_weight == 0.0f) || (sum_nonspecular_weight*4.0f > sum_weight)) { | |||||
| if(sum_weight != 0.0f) { | |||||
| normal /= sum_weight; | |||||
Not Done Inline ActionsI guess this explains why N was moved out of the specific BSDFs. brecht: I guess this explains why `N` was moved out of the specific BSDFs. | |||||
| } | |||||
| L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal); | |||||
| L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo); | |||||
| state->denoising_feature_weight = 0.0f; | |||||
| if(!(state->flag & PATH_RAY_SHADOW_CATCHER)) { | |||||
| state->flag &= ~PATH_RAY_STORE_SHADOW_INFO; | |||||
| } | |||||
| } | |||||
| #else | |||||
| (void) kg; | |||||
| (void) sd; | |||||
| (void) state; | |||||
| (void) L; | |||||
| #endif /* __DENOISING_FEATURES__ */ | |||||
| } | |||||
| ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L, | ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L, | ||||
| ShaderData *sd, int sample, ccl_addr_space PathState *state, float3 throughput) | ShaderData *sd, int sample, ccl_addr_space PathState *state, float3 throughput) | ||||
| { | { | ||||
| #ifdef __PASSES__ | #ifdef __PASSES__ | ||||
| int path_flag = state->flag; | int path_flag = state->flag; | ||||
| if(!(path_flag & PATH_RAY_CAMERA)) | if(!(path_flag & PATH_RAY_CAMERA)) | ||||
| return; | return; | ||||
| ▲ Show 20 Lines • Show All 123 Lines • ▼ Show 20 Lines | if(flag & PASS_SHADOW) { | ||||
| shadow.w = kernel_data.film.pass_shadow_scale; | shadow.w = kernel_data.film.pass_shadow_scale; | ||||
| kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, sample, shadow); | kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, sample, shadow); | ||||
| } | } | ||||
| if(flag & PASS_MIST) | if(flag & PASS_MIST) | ||||
| kernel_write_pass_float(buffer + kernel_data.film.pass_mist, sample, 1.0f - L->mist); | kernel_write_pass_float(buffer + kernel_data.film.pass_mist, sample, 1.0f - L->mist); | ||||
| #endif | #endif | ||||
| } | } | ||||
| ccl_device_inline void kernel_write_result(KernelGlobals *kg, ccl_global float *buffer, | |||||
| int sample, PathRadiance *L, float alpha, bool is_shadow_catcher) | |||||
| { | |||||
| if(L) { | |||||
| float3 L_sum; | |||||
| #ifdef __SHADOW_TRICKS__ | |||||
| if(is_shadow_catcher) { | |||||
| L_sum = path_radiance_sum_shadowcatcher(kg, L, &alpha); | |||||
| } | |||||
| else | |||||
| #endif /* __SHADOW_TRICKS__ */ | |||||
| { | |||||
| L_sum = path_radiance_clamp_and_sum(kg, L); | |||||
| } | |||||
| kernel_write_pass_float4(buffer, sample, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha)); | |||||
| kernel_write_light_passes(kg, buffer, L, sample); | |||||
| #ifdef __DENOISING_FEATURES__ | |||||
| if(kernel_data.film.pass_denoising_data) { | |||||
| # ifdef __SHADOW_TRICKS__ | |||||
| kernel_write_denoising_shadow(kg, buffer, sample, average(L->path_total), average(L->path_total_shaded)); | |||||
| # else | |||||
| kernel_write_denoising_shadow(kg, buffer, sample, 0.0f, 0.0f); | |||||
| # endif | |||||
| if(kernel_data.film.pass_denoising_clean) { | |||||
| float3 noisy, clean; | |||||
| path_radiance_split_denoising(kg, L, &noisy, &clean); | |||||
| kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, | |||||
| sample, noisy); | |||||
| kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean, | |||||
| sample, clean); | |||||
| } | |||||
| else { | |||||
| kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, | |||||
| sample, L_sum); | |||||
| } | |||||
| kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, | |||||
| sample, L->denoising_normal); | |||||
| kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, | |||||
| sample, L->denoising_albedo); | |||||
| kernel_write_pass_float_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH, | |||||
| sample, L->denoising_depth); | |||||
| } | |||||
| #endif /* __DENOISING_FEATURES__ */ | |||||
| } | |||||
| else { | |||||
| kernel_write_pass_float4(buffer, sample, make_float4(0.0f, 0.0f, 0.0f, 0.0f)); | |||||
| #ifdef __DENOISING_FEATURES__ | |||||
| if(kernel_data.film.pass_denoising_data) { | |||||
| kernel_write_denoising_shadow(kg, buffer, sample, 0.0f, 0.0f); | |||||
| kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, | |||||
| sample, make_float3(0.0f, 0.0f, 0.0f)); | |||||
| kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, | |||||
| sample, make_float3(0.0f, 0.0f, 0.0f)); | |||||
| kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, | |||||
| sample, make_float3(0.0f, 0.0f, 0.0f)); | |||||
| kernel_write_pass_float_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH, | |||||
| sample, 0.0f); | |||||
| if(kernel_data.film.pass_denoising_clean) { | |||||
| kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean, | |||||
| sample, make_float3(0.0f, 0.0f, 0.0f)); | |||||
| } | |||||
| } | |||||
| #endif /* __DENOISING_FEATURES__ */ | |||||
| } | |||||
| } | |||||
| CCL_NAMESPACE_END | CCL_NAMESPACE_END | ||||
A comment about or getting rid of these magic numbers would be good.