Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/device/metal/compat.h
| Show All 28 Lines | |||||
| #include <metal_pack> | #include <metal_pack> | ||||
| #include <metal_stdlib> | #include <metal_stdlib> | ||||
| #include <simd/simd.h> | #include <simd/simd.h> | ||||
| using namespace metal; | using namespace metal; | ||||
| #pragma clang diagnostic ignored "-Wunused-variable" | #pragma clang diagnostic ignored "-Wunused-variable" | ||||
| #pragma clang diagnostic ignored "-Wsign-compare" | #pragma clang diagnostic ignored "-Wsign-compare" | ||||
| #pragma clang diagnostic ignored "-Wuninitialized" | |||||
| /* Qualifiers */ | /* Qualifiers */ | ||||
| #define ccl_device | #define ccl_device | ||||
| #define ccl_device_inline ccl_device | #define ccl_device_inline ccl_device | ||||
| #define ccl_device_forceinline ccl_device | #define ccl_device_forceinline ccl_device | ||||
| #define ccl_device_noinline ccl_device __attribute__((noinline)) | #define ccl_device_noinline ccl_device __attribute__((noinline)) | ||||
| #define ccl_device_noinline_cpu ccl_device | #define ccl_device_noinline_cpu ccl_device | ||||
| #define ccl_device_inline_method ccl_device | #define ccl_device_inline_method ccl_device | ||||
| #define ccl_global device | #define ccl_global device | ||||
| #define ccl_static_constant static constant constexpr | #define ccl_inline_constant static constant constexpr | ||||
| #define ccl_device_constant constant | #define ccl_device_constant constant | ||||
| #define ccl_constant const device | #define ccl_constant const device | ||||
| #define ccl_gpu_shared threadgroup | #define ccl_gpu_shared threadgroup | ||||
| #define ccl_private thread | #define ccl_private thread | ||||
| #define ccl_may_alias | #define ccl_may_alias | ||||
| #define ccl_restrict __restrict | #define ccl_restrict __restrict | ||||
| #define ccl_loop_no_unroll | #define ccl_loop_no_unroll | ||||
| #define ccl_align(n) alignas(n) | #define ccl_align(n) alignas(n) | ||||
| #define ccl_optional_struct_init | #define ccl_optional_struct_init | ||||
| /* No assert supported for Metal */ | /* No assert supported for Metal */ | ||||
| #define kernel_assert(cond) | #define kernel_assert(cond) | ||||
| #define ccl_gpu_global_id_x() metal_global_id | #define ccl_gpu_global_id_x() metal_global_id | ||||
| #define ccl_gpu_warp_size simdgroup_size | #define ccl_gpu_warp_size simdgroup_size | ||||
| #define ccl_gpu_thread_idx_x simd_group_index | #define ccl_gpu_thread_idx_x simd_group_index | ||||
| #define ccl_gpu_thread_mask(thread_warp) uint64_t((1ull << thread_warp) - 1) | #define ccl_gpu_thread_mask(thread_warp) uint64_t((1ull << thread_warp) - 1) | ||||
| #define ccl_gpu_ballot(predicate) ((uint64_t)((simd_vote::vote_t)simd_ballot(predicate))) | #define ccl_gpu_ballot(predicate) ((uint64_t)((simd_vote::vote_t)simd_ballot(predicate))) | ||||
| #define ccl_gpu_popc(x) popcount(x) | #define ccl_gpu_syncthreads() threadgroup_barrier(mem_flags::mem_threadgroup); | ||||
| // clang-format off | // clang-format off | ||||
| /* kernel.h adapters */ | /* kernel.h adapters */ | ||||
| #define ccl_gpu_kernel(block_num_threads, thread_num_registers) | #define ccl_gpu_kernel(block_num_threads, thread_num_registers) | ||||
| #define ccl_gpu_kernel_threads(block_num_threads) | #define ccl_gpu_kernel_threads(block_num_threads) | ||||
| ▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | kernel void kernel_metal_##name(device const kernel_gpu_##name *params_struct, \ | ||||
| const uint metal_global_id [[thread_position_in_grid]], \ | const uint metal_global_id [[thread_position_in_grid]], \ | ||||
| const ushort metal_local_id [[thread_position_in_threadgroup]], \ | const ushort metal_local_id [[thread_position_in_threadgroup]], \ | ||||
| const ushort metal_local_size [[threads_per_threadgroup]], \ | const ushort metal_local_size [[threads_per_threadgroup]], \ | ||||
| uint simdgroup_size [[threads_per_simdgroup]], \ | uint simdgroup_size [[threads_per_simdgroup]], \ | ||||
| uint simd_lane_index [[thread_index_in_simdgroup]], \ | uint simd_lane_index [[thread_index_in_simdgroup]], \ | ||||
| uint simd_group_index [[simdgroup_index_in_threadgroup]], \ | uint simd_group_index [[simdgroup_index_in_threadgroup]], \ | ||||
| uint num_simd_groups [[simdgroups_per_threadgroup]]) { \ | uint num_simd_groups [[simdgroups_per_threadgroup]]) { \ | ||||
| MetalKernelContext context(_launch_params_metal, _metal_ancillaries); \ | MetalKernelContext context(_launch_params_metal, _metal_ancillaries); \ | ||||
| INIT_DEBUG_BUFFER \ | |||||
| params_struct->run(context, simdgroup_offset, metal_global_id, metal_local_id, metal_local_size, simdgroup_size, simd_lane_index, simd_group_index, num_simd_groups); \ | params_struct->run(context, simdgroup_offset, metal_global_id, metal_local_id, metal_local_size, simdgroup_size, simd_lane_index, simd_group_index, num_simd_groups); \ | ||||
| } \ | } \ | ||||
| void kernel_gpu_##name::run(thread MetalKernelContext& context, \ | void kernel_gpu_##name::run(thread MetalKernelContext& context, \ | ||||
| threadgroup int *simdgroup_offset, \ | threadgroup int *simdgroup_offset, \ | ||||
| const uint metal_global_id, \ | const uint metal_global_id, \ | ||||
| const ushort metal_local_id, \ | const ushort metal_local_id, \ | ||||
| const ushort metal_local_size, \ | const ushort metal_local_size, \ | ||||
| uint simdgroup_size, \ | uint simdgroup_size, \ | ||||
| ▲ Show 20 Lines • Show All 89 Lines • ▼ Show 20 Lines | |||||
| #define hypotf(x, y) hypot(float(x), float(y)) | #define hypotf(x, y) hypot(float(x), float(y)) | ||||
| #define atan2f(x, y) atan2(float(x), float(y)) | #define atan2f(x, y) atan2(float(x), float(y)) | ||||
| #define fmaxf(x, y) fmax(float(x), float(y)) | #define fmaxf(x, y) fmax(float(x), float(y)) | ||||
| #define fminf(x, y) fmin(float(x), float(y)) | #define fminf(x, y) fmin(float(x), float(y)) | ||||
| #define fmodf(x, y) fmod(float(x), float(y)) | #define fmodf(x, y) fmod(float(x), float(y)) | ||||
| #define sinhf(x) sinh(float(x)) | #define sinhf(x) sinh(float(x)) | ||||
| #define coshf(x) cosh(float(x)) | #define coshf(x) cosh(float(x)) | ||||
| #define tanhf(x) tanh(float(x)) | #define tanhf(x) tanh(float(x)) | ||||
| #define saturatef(x) saturate(float(x)) | |||||
| /* Use native functions with possibly lower precision for performance, | /* Use native functions with possibly lower precision for performance, | ||||
| * no issues found so far. */ | * no issues found so far. */ | ||||
| #define trigmode fast | #define trigmode fast | ||||
| #define sinf(x) trigmode::sin(float(x)) | #define sinf(x) trigmode::sin(float(x)) | ||||
| #define cosf(x) trigmode::cos(float(x)) | #define cosf(x) trigmode::cos(float(x)) | ||||
| #define tanf(x) trigmode::tan(float(x)) | #define tanf(x) trigmode::tan(float(x)) | ||||
| #define expf(x) trigmode::exp(float(x)) | #define expf(x) trigmode::exp(float(x)) | ||||
| #define sqrtf(x) trigmode::sqrt(float(x)) | #define sqrtf(x) trigmode::sqrt(float(x)) | ||||
| #define logf(x) trigmode::log(float(x)) | #define logf(x) trigmode::log(float(x)) | ||||
| #define NULL 0 | #define NULL 0 | ||||
| #define __device__ | |||||
| /* texture bindings and sampler setup */ | /* texture bindings and sampler setup */ | ||||
| struct Texture2DParamsMetal { | struct Texture2DParamsMetal { | ||||
| texture2d<float, access::sample> tex; | texture2d<float, access::sample> tex; | ||||
| }; | }; | ||||
| struct Texture3DParamsMetal { | struct Texture3DParamsMetal { | ||||
| texture3d<float, access::sample> tex; | texture3d<float, access::sample> tex; | ||||
| }; | }; | ||||
| struct MetalAncillaries { | struct MetalAncillaries { | ||||
| device Texture2DParamsMetal *textures_2d; | device Texture2DParamsMetal *textures_2d; | ||||
| device Texture3DParamsMetal *textures_3d; | device Texture3DParamsMetal *textures_3d; | ||||
| }; | }; | ||||
| #include "util/half.h" | |||||
| #include "util/types.h" | |||||
| enum SamplerType { | enum SamplerType { | ||||
| SamplerFilterNearest_AddressRepeat, | SamplerFilterNearest_AddressRepeat, | ||||
| SamplerFilterNearest_AddressClampEdge, | SamplerFilterNearest_AddressClampEdge, | ||||
| SamplerFilterNearest_AddressClampZero, | SamplerFilterNearest_AddressClampZero, | ||||
| SamplerFilterLinear_AddressRepeat, | SamplerFilterLinear_AddressRepeat, | ||||
| SamplerFilterLinear_AddressClampEdge, | SamplerFilterLinear_AddressClampEdge, | ||||
| SamplerFilterLinear_AddressClampZero, | SamplerFilterLinear_AddressClampZero, | ||||
| Show All 12 Lines | |||||