Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/util/half.h
| Show All 22 Lines | |||||
| #if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__) | #if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__) | ||||
| # include "util/simd.h" | # include "util/simd.h" | ||||
| #endif | #endif | ||||
| CCL_NAMESPACE_BEGIN | CCL_NAMESPACE_BEGIN | ||||
| /* Half Floats */ | /* Half Floats */ | ||||
| #if defined(__KERNEL_METAL__) | |||||
| ccl_device_inline float half_to_float(half h_in) | |||||
| { | |||||
| float f; | |||||
| union { | |||||
| half h; | |||||
| uint16_t s; | |||||
| } val; | |||||
| val.h = h_in; | |||||
| *((ccl_private int *)&f) = ((val.s & 0x8000) << 16) | (((val.s & 0x7c00) + 0x1C000) << 13) | | |||||
| ((val.s & 0x03FF) << 13); | |||||
| return f; | |||||
| } | |||||
| #else | |||||
| /* CUDA has its own half data type, no need to define then */ | /* CUDA has its own half data type, no need to define then */ | ||||
| #if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__) | # if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__) | ||||
| /* Implementing this as a class rather than a typedef so that the compiler can tell it apart from | /* Implementing this as a class rather than a typedef so that the compiler can tell it apart from | ||||
| * unsigned shorts. */ | * unsigned shorts. */ | ||||
| class half { | class half { | ||||
| public: | public: | ||||
| half() : v(0) | half() : v(0) | ||||
| { | { | ||||
| } | } | ||||
| half(const unsigned short &i) : v(i) | half(const unsigned short &i) : v(i) | ||||
| { | { | ||||
| } | } | ||||
| operator unsigned short() | operator unsigned short() | ||||
| { | { | ||||
| return v; | return v; | ||||
| } | } | ||||
| half &operator=(const unsigned short &i) | half &operator=(const unsigned short &i) | ||||
| { | { | ||||
| v = i; | v = i; | ||||
| return *this; | return *this; | ||||
| } | } | ||||
| private: | private: | ||||
| unsigned short v; | unsigned short v; | ||||
| }; | }; | ||||
| #endif | # endif | ||||
| struct half4 { | struct half4 { | ||||
| half x, y, z, w; | half x, y, z, w; | ||||
| }; | }; | ||||
| #endif | |||||
| /* Conversion to/from half float for image textures | /* Conversion to/from half float for image textures | ||||
| * | * | ||||
| * Simplified float to half for fast sampling on processor without a native | * Simplified float to half for fast sampling on processor without a native | ||||
| * instruction, and eliminating any NaN and inf values. */ | * instruction, and eliminating any NaN and inf values. */ | ||||
| ccl_device_inline half float_to_half_image(float f) | ccl_device_inline half float_to_half_image(float f) | ||||
| { | { | ||||
| #if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) | #if defined(__KERNEL_METAL__) | ||||
| return half(f); | |||||
| #elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) | |||||
| return __float2half(f); | return __float2half(f); | ||||
| #else | #else | ||||
| const uint u = __float_as_uint(f); | const uint u = __float_as_uint(f); | ||||
| /* Sign bit, shifted to its position. */ | /* Sign bit, shifted to its position. */ | ||||
| uint sign_bit = u & 0x80000000; | uint sign_bit = u & 0x80000000; | ||||
| sign_bit >>= 16; | sign_bit >>= 16; | ||||
| /* Exponent. */ | /* Exponent. */ | ||||
| uint exponent_bits = u & 0x7f800000; | uint exponent_bits = u & 0x7f800000; | ||||
| Show All 9 Lines | #else | ||||
| value_bits = (exponent_bits == 0 ? 0 : value_bits); | value_bits = (exponent_bits == 0 ? 0 : value_bits); | ||||
| /* Re-insert sign bit and return. */ | /* Re-insert sign bit and return. */ | ||||
| return (value_bits | sign_bit); | return (value_bits | sign_bit); | ||||
| #endif | #endif | ||||
| } | } | ||||
| ccl_device_inline float half_to_float_image(half h) | ccl_device_inline float half_to_float_image(half h) | ||||
| { | { | ||||
| #if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) | #if defined(__KERNEL_METAL__) | ||||
| return half_to_float(h); | |||||
| #elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) | |||||
| return __half2float(h); | return __half2float(h); | ||||
| #else | #else | ||||
| const int x = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13); | const int x = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13); | ||||
| return __int_as_float(x); | return __int_as_float(x); | ||||
| #endif | #endif | ||||
| } | } | ||||
| ccl_device_inline float4 half4_to_float4_image(const half4 h) | ccl_device_inline float4 half4_to_float4_image(const half4 h) | ||||
| Show All 16 Lines | |||||
| /* Conversion to half float texture for display. | /* Conversion to half float texture for display. | ||||
| * | * | ||||
| * Simplified float to half for fast display texture conversion on processors | * Simplified float to half for fast display texture conversion on processors | ||||
| * without a native instruction. Assumes no negative, no NaN, no inf, and sets | * without a native instruction. Assumes no negative, no NaN, no inf, and sets | ||||
| * denormal to 0. */ | * denormal to 0. */ | ||||
| ccl_device_inline half float_to_half_display(const float f) | ccl_device_inline half float_to_half_display(const float f) | ||||
| { | { | ||||
| #if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) | #if defined(__KERNEL_METAL__) | ||||
| return half(f); | |||||
| #elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) | |||||
| return __float2half(f); | return __float2half(f); | ||||
| #else | #else | ||||
| const int x = __float_as_int((f > 0.0f) ? ((f < 65504.0f) ? f : 65504.0f) : 0.0f); | const int x = __float_as_int((f > 0.0f) ? ((f < 65504.0f) ? f : 65504.0f) : 0.0f); | ||||
| const int absolute = x & 0x7FFFFFFF; | const int absolute = x & 0x7FFFFFFF; | ||||
| const int Z = absolute + 0xC8000000; | const int Z = absolute + 0xC8000000; | ||||
| const int result = (absolute < 0x38800000) ? 0 : Z; | const int result = (absolute < 0x38800000) ? 0 : Z; | ||||
| const int rshift = (result >> 13); | const int rshift = (result >> 13); | ||||
| return (rshift & 0x7FFF); | return (rshift & 0x7FFF); | ||||
| Show All 33 Lines | |||||