Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/util/math.h
| Show All 24 Lines | |||||
| #ifndef __KERNEL_GPU__ | #ifndef __KERNEL_GPU__ | ||||
| # include <cmath> | # include <cmath> | ||||
| #endif | #endif | ||||
| #ifdef __HIP__ | #ifdef __HIP__ | ||||
| # include <hip/hip_vector_types.h> | # include <hip/hip_vector_types.h> | ||||
| #endif | #endif | ||||
| #if !defined(__KERNEL_METAL__) | |||||
| #include <float.h> | # include <float.h> | ||||
| #include <math.h> | # include <math.h> | ||||
| #include <stdio.h> | # include <stdio.h> | ||||
| #endif /* !defined(__KERNEL_METAL__) */ | |||||
| #include "util/types.h" | #include "util/types.h" | ||||
| CCL_NAMESPACE_BEGIN | CCL_NAMESPACE_BEGIN | ||||
| /* Float Pi variations */ | /* Float Pi variations */ | ||||
| /* Division */ | /* Division */ | ||||
| ▲ Show 20 Lines • Show All 125 Lines • ▼ Show 20 Lines | ccl_device_inline float min4(float a, float b, float c, float d) | ||||
| return min(min(a, b), min(c, d)); | return min(min(a, b), min(c, d)); | ||||
| } | } | ||||
| ccl_device_inline float max4(float a, float b, float c, float d) | ccl_device_inline float max4(float a, float b, float c, float d) | ||||
| { | { | ||||
| return max(max(a, b), max(c, d)); | return max(max(a, b), max(c, d)); | ||||
| } | } | ||||
| #if !defined(__KERNEL_METAL__) | |||||
| /* Int/Float conversion */ | /* Int/Float conversion */ | ||||
| ccl_device_inline int as_int(uint i) | ccl_device_inline int as_int(uint i) | ||||
| { | { | ||||
| union { | union { | ||||
| uint ui; | uint ui; | ||||
| int i; | int i; | ||||
| } u; | } u; | ||||
| Show All 16 Lines | ccl_device_inline uint as_uint(float f) | ||||
| union { | union { | ||||
| uint i; | uint i; | ||||
| float f; | float f; | ||||
| } u; | } u; | ||||
| u.f = f; | u.f = f; | ||||
| return u.i; | return u.i; | ||||
| } | } | ||||
| #ifndef __HIP__ | # ifndef __HIP__ | ||||
| ccl_device_inline int __float_as_int(float f) | ccl_device_inline int __float_as_int(float f) | ||||
| { | { | ||||
| union { | union { | ||||
| int i; | int i; | ||||
| float f; | float f; | ||||
| } u; | } u; | ||||
| u.f = f; | u.f = f; | ||||
| return u.i; | return u.i; | ||||
| Show All 23 Lines | |||||
| { | { | ||||
| union { | union { | ||||
| uint i; | uint i; | ||||
| float f; | float f; | ||||
| } u; | } u; | ||||
| u.i = i; | u.i = i; | ||||
| return u.f; | return u.f; | ||||
| } | } | ||||
| #endif | # endif | ||||
| ccl_device_inline int4 __float4_as_int4(float4 f) | ccl_device_inline int4 __float4_as_int4(float4 f) | ||||
| { | { | ||||
| #ifdef __KERNEL_SSE__ | # ifdef __KERNEL_SSE__ | ||||
| return int4(_mm_castps_si128(f.m128)); | return int4(_mm_castps_si128(f.m128)); | ||||
| #else | # else | ||||
| return make_int4( | return make_int4( | ||||
| __float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z), __float_as_int(f.w)); | __float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z), __float_as_int(f.w)); | ||||
| #endif | # endif | ||||
| } | } | ||||
| ccl_device_inline float4 __int4_as_float4(int4 i) | ccl_device_inline float4 __int4_as_float4(int4 i) | ||||
| { | { | ||||
| #ifdef __KERNEL_SSE__ | # ifdef __KERNEL_SSE__ | ||||
| return float4(_mm_castsi128_ps(i.m128)); | return float4(_mm_castsi128_ps(i.m128)); | ||||
| #else | # else | ||||
| return make_float4( | return make_float4( | ||||
| __int_as_float(i.x), __int_as_float(i.y), __int_as_float(i.z), __int_as_float(i.w)); | __int_as_float(i.x), __int_as_float(i.y), __int_as_float(i.z), __int_as_float(i.w)); | ||||
| #endif | # endif | ||||
| } | } | ||||
| #endif /* !defined(__KERNEL_METAL__) */ | |||||
| #if defined(__KERNEL_METAL__) | |||||
| # define isnan_safe(v) isnan(v) | |||||
| # define isfinite_safe(v) isfinite(v) | |||||
| #else | |||||
| template<typename T> ccl_device_inline uint pointer_pack_to_uint_0(T *ptr) | template<typename T> ccl_device_inline uint pointer_pack_to_uint_0(T *ptr) | ||||
| { | { | ||||
| return ((uint64_t)ptr) & 0xFFFFFFFF; | return ((uint64_t)ptr) & 0xFFFFFFFF; | ||||
| } | } | ||||
| template<typename T> ccl_device_inline uint pointer_pack_to_uint_1(T *ptr) | template<typename T> ccl_device_inline uint pointer_pack_to_uint_1(T *ptr) | ||||
| { | { | ||||
| return (((uint64_t)ptr) >> 32) & 0xFFFFFFFF; | return (((uint64_t)ptr) >> 32) & 0xFFFFFFFF; | ||||
| Show All 27 Lines | |||||
| } | } | ||||
| ccl_device_inline bool isfinite_safe(float f) | ccl_device_inline bool isfinite_safe(float f) | ||||
| { | { | ||||
| /* By IEEE 754 rule, 2*Inf equals Inf */ | /* By IEEE 754 rule, 2*Inf equals Inf */ | ||||
| unsigned int x = __float_as_uint(f); | unsigned int x = __float_as_uint(f); | ||||
| return (f == f) && (x == 0 || x == (1u << 31) || (f != 2.0f * f)) && !((x << 1) > 0xff000000u); | return (f == f) && (x == 0 || x == (1u << 31) || (f != 2.0f * f)) && !((x << 1) > 0xff000000u); | ||||
| } | } | ||||
| #endif | |||||
| ccl_device_inline float ensure_finite(float v) | ccl_device_inline float ensure_finite(float v) | ||||
| { | { | ||||
| return isfinite_safe(v) ? v : 0.0f; | return isfinite_safe(v) ? v : 0.0f; | ||||
| } | } | ||||
| #if !defined(__KERNEL_METAL__) | |||||
| ccl_device_inline int clamp(int a, int mn, int mx) | ccl_device_inline int clamp(int a, int mn, int mx) | ||||
| { | { | ||||
| return min(max(a, mn), mx); | return min(max(a, mn), mx); | ||||
| } | } | ||||
| ccl_device_inline float clamp(float a, float mn, float mx) | ccl_device_inline float clamp(float a, float mn, float mx) | ||||
| { | { | ||||
| return min(max(a, mn), mx); | return min(max(a, mn), mx); | ||||
| Show All 13 Lines | else if (x >= edge1) | ||||
| result = 1.0f; | result = 1.0f; | ||||
| else { | else { | ||||
| float t = (x - edge0) / (edge1 - edge0); | float t = (x - edge0) / (edge1 - edge0); | ||||
| result = (3.0f - 2.0f * t) * (t * t); | result = (3.0f - 2.0f * t) * (t * t); | ||||
| } | } | ||||
| return result; | return result; | ||||
| } | } | ||||
| #ifndef __KERNEL_CUDA__ | #endif /* !defined(__KERNEL_METAL__) */ | ||||
| #if defined(__KERNEL_CUDA__) | |||||
| ccl_device_inline float saturatef(float a) | ccl_device_inline float saturatef(float a) | ||||
| { | { | ||||
| return clamp(a, 0.0f, 1.0f); | return __saturatef(a); | ||||
| } | } | ||||
| #else | #elif !defined(__KERNEL_METAL__) | ||||
| ccl_device_inline float saturatef(float a) | ccl_device_inline float saturatef(float a) | ||||
| { | { | ||||
| return __saturatef(a); | return clamp(a, 0.0f, 1.0f); | ||||
| } | } | ||||
| #endif /* __KERNEL_CUDA__ */ | #endif /* __KERNEL_CUDA__ */ | ||||
| ccl_device_inline int float_to_int(float f) | ccl_device_inline int float_to_int(float f) | ||||
| { | { | ||||
| return (int)f; | return (int)f; | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 120 Lines • ▼ Show 20 Lines | |||||
| #include "util/math_float2.h" | #include "util/math_float2.h" | ||||
| #include "util/math_float3.h" | #include "util/math_float3.h" | ||||
| #include "util/math_float4.h" | #include "util/math_float4.h" | ||||
| #include "util/rect.h" | #include "util/rect.h" | ||||
| CCL_NAMESPACE_BEGIN | CCL_NAMESPACE_BEGIN | ||||
| #if !defined(__KERNEL_METAL__) | |||||
| /* Interpolation */ | /* Interpolation */ | ||||
| template<class A, class B> A lerp(const A &a, const A &b, const B &t) | template<class A, class B> A lerp(const A &a, const A &b, const B &t) | ||||
| { | { | ||||
| return (A)(a * ((B)1 - t) + b * t); | return (A)(a * ((B)1 - t) + b * t); | ||||
| } | } | ||||
| #endif /* __KERNEL_METAL__ */ | |||||
| /* Triangle */ | /* Triangle */ | ||||
| ccl_device_inline float triangle_area(ccl_private const float3 &v1, | ccl_device_inline float triangle_area(ccl_private const float3 &v1, | ||||
| ccl_private const float3 &v2, | ccl_private const float3 &v2, | ||||
| ccl_private const float3 &v3) | ccl_private const float3 &v3) | ||||
| { | { | ||||
| return len(cross(v3 - v2, v1 - v2)) * 0.5f; | return len(cross(v3 - v2, v1 - v2)) * 0.5f; | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 113 Lines • ▼ Show 20 Lines | |||||
| ccl_device_inline float safe_sqrtf(float f) | ccl_device_inline float safe_sqrtf(float f) | ||||
| { | { | ||||
| return sqrtf(max(f, 0.0f)); | return sqrtf(max(f, 0.0f)); | ||||
| } | } | ||||
| ccl_device_inline float inversesqrtf(float f) | ccl_device_inline float inversesqrtf(float f) | ||||
| { | { | ||||
| #if defined(__KERNEL_METAL__) | |||||
| return (f > 0.0f) ? rsqrt(f) : 0.0f; | |||||
| #else | |||||
| return (f > 0.0f) ? 1.0f / sqrtf(f) : 0.0f; | return (f > 0.0f) ? 1.0f / sqrtf(f) : 0.0f; | ||||
| #endif | |||||
| } | } | ||||
| ccl_device float safe_asinf(float a) | ccl_device float safe_asinf(float a) | ||||
| { | { | ||||
| return asinf(clamp(a, -1.0f, 1.0f)); | return asinf(clamp(a, -1.0f, 1.0f)); | ||||
| } | } | ||||
| ccl_device float safe_acosf(float a) | ccl_device float safe_acosf(float a) | ||||
| ▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines | ccl_device_inline float xor_signmask(float x, int y) | ||||
| return __int_as_float(__float_as_int(x) ^ y); | return __int_as_float(__float_as_int(x) ^ y); | ||||
| } | } | ||||
| ccl_device float bits_to_01(uint bits) | ccl_device float bits_to_01(uint bits) | ||||
| { | { | ||||
| return bits * (1.0f / (float)0xFFFFFFFF); | return bits * (1.0f / (float)0xFFFFFFFF); | ||||
| } | } | ||||
| #if !defined(__KERNEL_GPU__) | |||||
| # if defined(__GNUC__) | |||||
| # define popcount(x) __builtin_popcount(x) | |||||
| # else | |||||
| ccl_device_inline uint popcount(uint x) | |||||
| { | |||||
| /* TODO(Stefan): pop-count intrinsic for Windows with fallback for older CPUs. */ | |||||
| uint i = x & 0xaaaaaaaa; | |||||
| i = i - ((i >> 1) & 0x55555555); | |||||
| i = (i & 0x33333333) + ((i >> 2) & 0x33333333); | |||||
| i = (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; | |||||
| return i & 1; | |||||
| } | |||||
| # endif | |||||
| #elif !defined(__KERNEL_METAL__) | |||||
| # define popcount(x) __popc(x) | |||||
| #endif | |||||
| ccl_device_inline uint count_leading_zeros(uint x) | ccl_device_inline uint count_leading_zeros(uint x) | ||||
| { | { | ||||
| #if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) | #if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) | ||||
| return __clz(x); | return __clz(x); | ||||
| #elif defined(__KERNEL_METAL__) | |||||
| return clz(x); | |||||
| #else | #else | ||||
| assert(x != 0); | assert(x != 0); | ||||
| # ifdef _MSC_VER | # ifdef _MSC_VER | ||||
| unsigned long leading_zero = 0; | unsigned long leading_zero = 0; | ||||
| _BitScanReverse(&leading_zero, x); | _BitScanReverse(&leading_zero, x); | ||||
| return (31 - leading_zero); | return (31 - leading_zero); | ||||
| # else | # else | ||||
| return __builtin_clz(x); | return __builtin_clz(x); | ||||
| # endif | # endif | ||||
| #endif | #endif | ||||
| } | } | ||||
| ccl_device_inline uint count_trailing_zeros(uint x) | ccl_device_inline uint count_trailing_zeros(uint x) | ||||
| { | { | ||||
| #if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) | #if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) | ||||
| return (__ffs(x) - 1); | return (__ffs(x) - 1); | ||||
| #elif defined(__KERNEL_METAL__) | |||||
| return ctz(x); | |||||
| #else | #else | ||||
| assert(x != 0); | assert(x != 0); | ||||
| # ifdef _MSC_VER | # ifdef _MSC_VER | ||||
| unsigned long ctz = 0; | unsigned long ctz = 0; | ||||
| _BitScanForward(&ctz, x); | _BitScanForward(&ctz, x); | ||||
| return ctz; | return ctz; | ||||
| # else | # else | ||||
| return __builtin_ctz(x); | return __builtin_ctz(x); | ||||
| # endif | # endif | ||||
| #endif | #endif | ||||
| } | } | ||||
| ccl_device_inline uint find_first_set(uint x) | ccl_device_inline uint find_first_set(uint x) | ||||
| { | { | ||||
| #if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) | #if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) | ||||
| return __ffs(x); | return __ffs(x); | ||||
| #elif defined(__KERNEL_METAL__) | |||||
| return (x != 0) ? ctz(x) + 1 : 0; | |||||
| #else | #else | ||||
| # ifdef _MSC_VER | # ifdef _MSC_VER | ||||
| return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0; | return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0; | ||||
| # else | # else | ||||
| return __builtin_ffs(x); | return __builtin_ffs(x); | ||||
| # endif | # endif | ||||
| #endif | #endif | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines | |||||
| ccl_device_inline uint32_t reverse_integer_bits(uint32_t x) | ccl_device_inline uint32_t reverse_integer_bits(uint32_t x) | ||||
| { | { | ||||
| /* Use a native instruction if it exists. */ | /* Use a native instruction if it exists. */ | ||||
| #if defined(__arm__) || defined(__aarch64__) | #if defined(__arm__) || defined(__aarch64__) | ||||
| __asm__("rbit %w0, %w1" : "=r"(x) : "r"(x)); | __asm__("rbit %w0, %w1" : "=r"(x) : "r"(x)); | ||||
| return x; | return x; | ||||
| #elif defined(__KERNEL_CUDA__) | #elif defined(__KERNEL_CUDA__) | ||||
| return __brev(x); | return __brev(x); | ||||
| #elif defined(__KERNEL_METAL__) | |||||
| return reverse_bits(x); | |||||
| #elif __has_builtin(__builtin_bitreverse32) | #elif __has_builtin(__builtin_bitreverse32) | ||||
| return __builtin_bitreverse32(x); | return __builtin_bitreverse32(x); | ||||
| #else | #else | ||||
| /* Flip pairwise. */ | /* Flip pairwise. */ | ||||
| x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); | x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); | ||||
| /* Flip pairs. */ | /* Flip pairs. */ | ||||
| x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); | x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); | ||||
| /* Flip nibbles. */ | /* Flip nibbles. */ | ||||
| Show All 16 Lines | |||||