Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/kernel_passes.h
| Show All 10 Lines | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | * distributed under the License is distributed on an "AS IS" BASIS, | ||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| CCL_NAMESPACE_BEGIN | CCL_NAMESPACE_BEGIN | ||||
| #if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__) | |||||
| #define __ATOMIC_PASS_WRITE__ | |||||
| #endif | |||||
| ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value) | ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value) | ||||
| { | { | ||||
| ccl_global float *buf = buffer; | ccl_global float *buf = buffer; | ||||
| #if defined(__SPLIT_KERNEL__) | #ifdef __ATOMIC_PASS_WRITE__ | ||||
| atomic_add_and_fetch_float(buf, value); | atomic_add_and_fetch_float(buf, value); | ||||
| #else | #else | ||||
| *buf += value; | *buf += value; | ||||
| #endif /* __SPLIT_KERNEL__ */ | #endif | ||||
| } | } | ||||
| ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, float3 value) | ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, float3 value) | ||||
| { | { | ||||
| #if defined(__SPLIT_KERNEL__) | #ifdef __ATOMIC_PASS_WRITE__ | ||||
| ccl_global float *buf_x = buffer + 0; | ccl_global float *buf_x = buffer + 0; | ||||
| ccl_global float *buf_y = buffer + 1; | ccl_global float *buf_y = buffer + 1; | ||||
| ccl_global float *buf_z = buffer + 2; | ccl_global float *buf_z = buffer + 2; | ||||
| atomic_add_and_fetch_float(buf_x, value.x); | atomic_add_and_fetch_float(buf_x, value.x); | ||||
| atomic_add_and_fetch_float(buf_y, value.y); | atomic_add_and_fetch_float(buf_y, value.y); | ||||
| atomic_add_and_fetch_float(buf_z, value.z); | atomic_add_and_fetch_float(buf_z, value.z); | ||||
| #else | #else | ||||
| ccl_global float3 *buf = (ccl_global float3*)buffer; | ccl_global float3 *buf = (ccl_global float3*)buffer; | ||||
| *buf += value; | *buf += value; | ||||
| #endif /* __SPLIT_KERNEL__ */ | #endif | ||||
| } | } | ||||
| ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, float4 value) | ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, float4 value) | ||||
| { | { | ||||
| #if defined(__SPLIT_KERNEL__) | #ifdef __ATOMIC_PASS_WRITE__ | ||||
| ccl_global float *buf_x = buffer + 0; | ccl_global float *buf_x = buffer + 0; | ||||
| ccl_global float *buf_y = buffer + 1; | ccl_global float *buf_y = buffer + 1; | ||||
| ccl_global float *buf_z = buffer + 2; | ccl_global float *buf_z = buffer + 2; | ||||
| ccl_global float *buf_w = buffer + 3; | ccl_global float *buf_w = buffer + 3; | ||||
| atomic_add_and_fetch_float(buf_x, value.x); | atomic_add_and_fetch_float(buf_x, value.x); | ||||
| atomic_add_and_fetch_float(buf_y, value.y); | atomic_add_and_fetch_float(buf_y, value.y); | ||||
| atomic_add_and_fetch_float(buf_z, value.z); | atomic_add_and_fetch_float(buf_z, value.z); | ||||
| atomic_add_and_fetch_float(buf_w, value.w); | atomic_add_and_fetch_float(buf_w, value.w); | ||||
| #else | #else | ||||
| ccl_global float4 *buf = (ccl_global float4*)buffer; | ccl_global float4 *buf = (ccl_global float4*)buffer; | ||||
| *buf += value; | *buf += value; | ||||
| #endif /* __SPLIT_KERNEL__ */ | #endif | ||||
| } | } | ||||
| #ifdef __DENOISING_FEATURES__ | #ifdef __DENOISING_FEATURES__ | ||||
| ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, float value) | ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, float value) | ||||
| { | { | ||||
| kernel_write_pass_float(buffer, value); | kernel_write_pass_float(buffer, value); | ||||
| /* The online one-pass variance update that's used for the megakernel can't easily be implemented | /* The online one-pass variance update that's used for the megakernel can't easily be implemented | ||||
| * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */ | * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */ | ||||
| kernel_write_pass_float(buffer+1, value*value); | kernel_write_pass_float(buffer+1, value*value); | ||||
| } | } | ||||
| # if defined(__SPLIT_KERNEL__) | # ifdef __ATOMIC_PASS_WRITE__ | ||||
| # define kernel_write_pass_float3_unaligned kernel_write_pass_float3 | # define kernel_write_pass_float3_unaligned kernel_write_pass_float3 | ||||
| # else | # else | ||||
| ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, float3 value) | ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, float3 value) | ||||
| { | { | ||||
| buffer[0] += value.x; | buffer[0] += value.x; | ||||
| buffer[1] += value.y; | buffer[1] += value.y; | ||||
| buffer[2] += value.z; | buffer[2] += value.z; | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 291 Lines • Show Last 20 Lines | |||||