Changeset View
Standalone View
source/blender/gpu/shaders/metal/mtl_shader_defines.msl
- This file was added.
| /* SPDX-License-Identifier: GPL-2.0-or-later */ | |||||
| /** Special header for mapping commonly defined tokens to API-specific variations. | |||||
| * Where possible, this will adhere closely to base GLSL, where semantics are the same. | |||||
| * However, host code shader code may need modifying to support types where necessary variations | |||||
| * exist between APIs but are not expressed through the source. (e.g. distinctio between depth2d | |||||
| * and texture2d types in metal). | |||||
| */ | |||||
| /* Base instance with offsets. */ | |||||
| #define gpu_BaseInstance gl_BaseInstanceARB | |||||
| #define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance) | |||||
| /* derivative signs. */ | |||||
| #define DFDX_SIGN 1.0 | |||||
| #define DFDY_SIGN 1.0 | |||||
| /* Type definitions. */ | |||||
| #define vec2 float2 | |||||
| #define vec3 float3 | |||||
| #define vec4 float4 | |||||
| #define mat2 float2x2 | |||||
| #define mat2x2 float2x2 | |||||
| #define mat3 float3x3 | |||||
| #define mat4 float4x4 | |||||
| #define ivec2 int2 | |||||
fclem: We might want to be exhaustive for matrix type because of pyGPU. So it is missing all of the… | |||||
| #define ivec3 int3 | |||||
| #define ivec4 int4 | |||||
| #define uvec2 uint2 | |||||
| #define uvec3 uint3 | |||||
| #define uvec4 uint4 | |||||
| /* MTLBOOL is used for native boolean's generated by the Metal backend, to avoid type-emulation | |||||
| * for GLSL bools, which are treated as integers. */ | |||||
| #define MTLBOOL bool | |||||
| #define bool int | |||||
| #define bvec2 bool2 | |||||
| #define bvec3 bool3 | |||||
| #define bvec4 bool4 | |||||
| #define vec3_1010102_Unorm uint | |||||
| #define vec3_1010102_Inorm int | |||||
| /* Strip GLSL Decorators. */ | |||||
| #define in | |||||
| #define flat | |||||
| #define smooth | |||||
| #define noperspective | |||||
| #define layout(std140) struct | |||||
| #define uniform | |||||
| /* Used to replace 'out' in function parameters with threadlocal reference | |||||
| * shortened to avoid expanding the glsl source string. */ | |||||
| #define THD thread | |||||
| /* Generate wrapper structs for combined texture and sampler type. */ | |||||
| #ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS | |||||
| # define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \ | |||||
| template<typename T, access A = access::sample> struct STRUCT_NAME { \ | |||||
| thread TEX_TYPE<T, A> *texture; \ | |||||
| constant sampler *samp; \ | |||||
| } | |||||
| #else | |||||
| # define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \ | |||||
| template<typename T, access A = access::sample> struct STRUCT_NAME { \ | |||||
| thread TEX_TYPE<T, A> *texture; \ | |||||
| thread sampler *samp; \ | |||||
| } | |||||
| #endif | |||||
| /* Add any types as needed. */ | |||||
| COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d, texture1d); | |||||
| COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d_array, texture1d_array); | |||||
| COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d, texture2d); | |||||
| COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d, depth2d); | |||||
| COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d_array, texture2d_array); | |||||
| COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d_array, depth2d_array); | |||||
| COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_3d, texture3d); | |||||
| COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_buffer, texture_buffer); | |||||
| COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube, texturecube); | |||||
| COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube_array, texturecube_array); | |||||
| COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube, texturecube_array); | |||||
| COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube_array, texturecube_array); | |||||
| /* Sampler struct for argument buffer. */ | |||||
| #ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS | |||||
| struct SStruct { | |||||
| array<sampler, ARGUMENT_BUFFER_NUM_SAMPLERS> sampler_args [[id(0)]]; | |||||
| }; | |||||
| #endif | |||||
| /* Samplers as function parameters. */ | |||||
| #define sampler1D thread _mtl_combined_image_sampler_1d<float> | |||||
| #define sampler1DArray thread _mtl_combined_image_sampler_1d_array<float> | |||||
| #define sampler2D thread _mtl_combined_image_sampler_2d<float> | |||||
| #define depth2D thread _mtl_combined_image_sampler_depth_2d<float> | |||||
| #define sampler2DArray thread _mtl_combined_image_sampler_2d_array<float> | |||||
| #define sampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float> | |||||
| #define depth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float> | |||||
| #define sampler3D thread _mtl_combined_image_sampler_3d<float> | |||||
| #define samplerBuffer thread _mtl_combined_image_sampler_buffer<float, access::read> | |||||
| #define samplerCube thread _mtl_combined_image_sampler_cube<float> | |||||
| #define samplerCubeArray thread _mtl_combined_image_sampler_cube_array<float> | |||||
| #define usampler1D thread _mtl_combined_image_sampler_1d<uint> | |||||
| #define usampler1DArray thread _mtl_combined_image_sampler_1d_array<uint> | |||||
| #define usampler2D thread _mtl_combined_image_sampler_2d<uint> | |||||
| #define udepth2D thread _mtl_combined_image_sampler_depth_2d<uint> | |||||
| #define usampler2DArray thread _mtl_combined_image_sampler_2d_array<uint> | |||||
| #define usampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint> | |||||
| #define udepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint> | |||||
| #define usampler3D thread _mtl_combined_image_sampler_3d<uint> | |||||
| #define usamplerBuffer thread _mtl_combined_image_sampler_buffer<uint, access::read> | |||||
| #define usamplerCube thread _mtl_combined_image_sampler_cube<uint> | |||||
| #define usamplerCubeArray thread _mtl_combined_image_sampler_cube_array<uint> | |||||
| #define isampler1D thread _mtl_combined_image_sampler_1d<int> | |||||
| #define isampler1DArray thread _mtl_combined_image_sampler_1d_array<int> | |||||
| #define isampler2D thread _mtl_combined_image_sampler_2d<int> | |||||
| #define idepth2D thread _mtl_combined_image_sampler_depth_2d<int> | |||||
| #define isampler2DArray thread _mtl_combined_image_sampler_2d_array<int> | |||||
| #define isampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int> | |||||
| #define idepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int> | |||||
| #define isampler3D thread _mtl_combined_image_sampler_3d<int> | |||||
| #define isamplerBuffer thread _mtl_combined_image_sampler_buffer<int, access::read> | |||||
| #define isamplerCube thread _mtl_combined_image_sampler_cube<int> | |||||
| #define isamplerCubeArray thread _mtl_combined_image_sampler_cube_array<int> | |||||
| /* Vector accessor aliases. */ | |||||
| #define st xy | |||||
| /* Texture functions. */ | |||||
| #define texelFetch _texelFetch_internal | |||||
| #define texelFetchOffset(__tex, __texel, __lod, __offset) \ | |||||
| _texelFetch_internal(__tex, __texel, __lod, __offset) | |||||
| #define texture2(__tex, __uv) _texture_internal_samp(__tex, __uv) | |||||
| #define texture3(__tex, __uv, _bias) _texture_internal_bias(__tex, __uv, bias(float(_bias))) | |||||
| #define textureLod(__tex, __uv, __lod) _texture_internal_level(__tex, __uv, level(float(__lod))) | |||||
| #define textureLodOffset(__tex, __uv, __lod, __offset) \ | |||||
| _texture_internal_level(__tex, __uv, level(float(__lod)), __offset) | |||||
| #define textureGather2(__tex, __uv) _texture_gather_internal(__tex, __uv, 0) | |||||
| #define textureGather3(__tex, __uv, __comp) _texture_gather_internal(__tex, __uv, __comp) | |||||
| #define textureGatherOffset(__tex, __offset, __uv, __comp) \ | |||||
| _texture_gather_internal(__tex, __uv, __comp, __offset) | |||||
| #define TEXURE_MACRO(_1, _2, _3, TEXNAME, ...) TEXNAME | |||||
| #define texture(...) TEXURE_MACRO(__VA_ARGS__, texture3, texture2)(__VA_ARGS__) | |||||
| #define textureGather(...) TEXURE_MACRO(__VA_ARGS__, textureGather3, textureGather2)(__VA_ARGS__) | |||||
| /* Texture-write functions. */ | |||||
| #define imageStore(_tex, _coord, _value) _texture_write_internal(_tex, _coord, _value) | |||||
| /* Singular return values from texture functions of type DEPTH are often indexed with either .r or | |||||
| * .x. This is a lightweight wrapper type for handling this syntax. */ | |||||
| union _msl_return_float { | |||||
| float r; | |||||
| float x; | |||||
| inline operator float() const | |||||
| { | |||||
| return r; | |||||
| } | |||||
| }; | |||||
| /* Add custom texture sampling/reading routines for each type to account for special return cases, | |||||
| * e.g. returning a float with an r parameter Note: Cannot use template specialization for input | |||||
| * type, as return types are specific to the signature of 'tex'. */ | |||||
| /* Texture Read. */ | |||||
| template<typename S, typename T, access A> | |||||
| inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, T texel) | |||||
| { | |||||
| float w = tex.texture->get_width(); | |||||
| if (texel >= 0 && texel < w) { | |||||
| return tex.texture->read(uint(texel)); | |||||
| } | |||||
| else { | |||||
| return vec<S, 4>(0); | |||||
| } | |||||
| } | |||||
| template<typename S, typename T> | |||||
| inline vec<S, 4> _texelFetch_internal( | |||||
| const thread _mtl_combined_image_sampler_buffer<S, access::read> tex, T texel) | |||||
| { | |||||
| float w = tex.texture->get_width(); | |||||
| if (texel >= 0 && texel < w) { | |||||
| return tex.texture->read(uint(texel)); | |||||
| } | |||||
| else { | |||||
| return vec<S, 4>(0); | |||||
| } | |||||
| } | |||||
| template<typename S, typename T, access A> | |||||
| inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, | |||||
| T texel, | |||||
| uint lod, | |||||
| T offset = 0) | |||||
| { | |||||
| float w = tex.texture->get_width(); | |||||
| if ((texel + offset) >= 0 && (texel + offset) < w) { | |||||
| /* LODs not supported for 1d textures. This must be zero. */ | |||||
| return tex.texture->read(uint(texel + offset), 0); | |||||
| } | |||||
| else { | |||||
| return vec<S, 4>(0); | |||||
| } | |||||
| } | |||||
| template<typename S, typename T, access A> | |||||
| inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, | |||||
| vec<T, 1> texel, | |||||
| uint lod, | |||||
| vec<T, 1> offset = 0) | |||||
| { | |||||
| float w = tex.texture->get_width(); | |||||
| if ((texel + offset) >= 0 && (texel + offset) < w) { | |||||
| /* LODs not supported for 1d textures. This must be zero. */ | |||||
| return tex.texture->read(uint(texel + offset), 0); | |||||
| } | |||||
| else { | |||||
| return vec<S, 4>(0); | |||||
| } | |||||
| } | |||||
| template<typename S, typename T, int n, access A> | |||||
| inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, | |||||
| vec<T, n> texel, | |||||
| uint lod, | |||||
| vec<T, n> offset = vec<T, n>(0)) | |||||
| { | |||||
| float w = tex.texture->get_width(); | |||||
| if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w) { | |||||
| /* LODs not supported for 1d textures. This must be zero. */ | |||||
| return tex.texture->read(uint(texel.x + offset.x), 0); | |||||
| } | |||||
| else { | |||||
| return vec<S, 4>(0); | |||||
| } | |||||
| } | |||||
| template<typename S, typename T, access A> | |||||
| inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex, | |||||
| vec<T, 2> texel, | |||||
| uint lod, | |||||
| vec<T, 2> offset = vec<T, 2>(0, 0)) | |||||
| { | |||||
| float w = tex.texture->get_width(); | |||||
| float h = tex.texture->get_array_size(); | |||||
| if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && | |||||
| (texel.y + offset.y) < h) { | |||||
| /* LODs not supported for 1d textures. This must be zero. */ | |||||
| return tex.texture->read(uint(texel.x + offset.x), uint(texel.y + offset.y), 0); | |||||
| } | |||||
| else { | |||||
| return vec<S, 4>(0); | |||||
| } | |||||
| } | |||||
| template<typename S, typename T, access A> | |||||
| inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d<S, A> tex, | |||||
| vec<T, 2> texel, | |||||
| uint lod, | |||||
| vec<T, 2> offset = vec<T, 2>(0)) | |||||
| { | |||||
| float w = tex.texture->get_width() >> lod; | |||||
| float h = tex.texture->get_height() >> lod; | |||||
| if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && | |||||
| (texel.y + offset.y) < h) { | |||||
| return tex.texture->read(uint2(texel + offset), lod); | |||||
| } | |||||
| else { | |||||
| return vec<S, 4>(0); | |||||
| } | |||||
| } | |||||
| template<typename S, typename T, access A> | |||||
| inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex, | |||||
| vec<T, 3> texel, | |||||
| uint lod, | |||||
| vec<T, 3> offset = vec<T, 3>(0)) | |||||
| { | |||||
| float w = tex.texture->get_width() >> lod; | |||||
| float h = tex.texture->get_height() >> lod; | |||||
| float d = tex.texture->get_array_size(); | |||||
| if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && | |||||
| (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) { | |||||
| return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod); | |||||
| } | |||||
| else { | |||||
| return vec<S, 4>(0); | |||||
| } | |||||
| } | |||||
| template<typename S, typename T, access A> | |||||
| inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_3d<S, A> tex, | |||||
| vec<T, 3> texel, | |||||
| uint lod, | |||||
| vec<T, 3> offset = vec<T, 3>(0)) | |||||
| { | |||||
| float w = tex.texture->get_width() >> lod; | |||||
| float h = tex.texture->get_height() >> lod; | |||||
| float d = tex.texture->get_depth() >> lod; | |||||
| if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && | |||||
| (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) { | |||||
| return tex.texture->read(uint3(texel + offset), lod); | |||||
| } | |||||
| else { | |||||
| return vec<S, 4>(0); | |||||
| } | |||||
| } | |||||
| template<typename T, access A> | |||||
| inline _msl_return_float _texelFetch_internal( | |||||
| thread _mtl_combined_image_sampler_depth_2d<float, A> tex, | |||||
| vec<T, 2> texel, | |||||
| uint lod, | |||||
| vec<T, 2> offset = vec<T, 2>(0)) | |||||
| { | |||||
| float w = tex.texture->get_width() >> lod; | |||||
| float h = tex.texture->get_height() >> lod; | |||||
| if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && | |||||
| (texel.y + offset.y) < h) { | |||||
| _msl_return_float fl = {tex.texture->read(uint2(texel + offset), lod)}; | |||||
| return fl; | |||||
| } | |||||
| else { | |||||
| _msl_return_float fl = {0}; | |||||
| return fl; | |||||
| } | |||||
| } | |||||
| template<typename S, typename T, access A> | |||||
| inline vec<S, 4> _texture_internal_samp(thread _mtl_combined_image_sampler_2d_array<S, A> tex, | |||||
| vec<T, 3> texel, | |||||
| uint lod, | |||||
| vec<T, 3> offset = vec<T, 3>(0)) | |||||
| { | |||||
| float w = tex.texture->get_width() >> lod; | |||||
| float h = tex.texture->get_height() >> lod; | |||||
| float d = tex.texture->get_array_size(); | |||||
| if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && | |||||
| (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) { | |||||
| return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod); | |||||
| } | |||||
| else { | |||||
| return vec<S, 4>(0); | |||||
| } | |||||
| } | |||||
| /* Sample. */ | |||||
| template<typename T> | |||||
| inline vec<T, 4> _texture_internal_samp( | |||||
| thread _mtl_combined_image_sampler_1d<T, access::sample> tex, float u) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, u); | |||||
| } | |||||
| inline float4 _texture_internal_samp( | |||||
| thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex, float2 ua) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, ua.x, uint(ua.y)); | |||||
| } | |||||
| inline int4 _texture_internal_samp(thread _mtl_combined_image_sampler_2d<int, access::sample> tex, | |||||
| float2 uv) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, uv); | |||||
| } | |||||
| inline uint4 _texture_internal_samp( | |||||
| thread _mtl_combined_image_sampler_2d<uint, access::sample> tex, float2 uv) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, uv); | |||||
| } | |||||
| inline float4 _texture_internal_samp( | |||||
| thread _mtl_combined_image_sampler_2d<float, access::sample> tex, float2 uv) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, uv); | |||||
| } | |||||
| inline _msl_return_float _texture_internal_samp( | |||||
| thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, float2 uv) | |||||
| { | |||||
| _msl_return_float fl = {tex.texture->sample(*tex.samp, uv)}; | |||||
| return fl; | |||||
| } | |||||
| template<typename T> | |||||
| inline vec<T, 4> _texture_internal_samp( | |||||
| thread _mtl_combined_image_sampler_3d<T, access::sample> tex, float3 uvw) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, uvw); | |||||
| } | |||||
| template<typename T> | |||||
| inline vec<T, 4> _texture_internal_samp( | |||||
| thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, float3 uva) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z)); | |||||
| } | |||||
| inline _msl_return_float _texture_internal_samp( | |||||
| thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float3 uva) | |||||
| { | |||||
| _msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z))}; | |||||
| return fl; | |||||
| } | |||||
| inline _msl_return_float _texture_internal_samp( | |||||
| thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float4 uvac) | |||||
| { | |||||
| _msl_return_float fl = { | |||||
| tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0))}; | |||||
| return fl; | |||||
| } | |||||
| template<typename T> | |||||
| inline vec<T, 4> _texture_internal_samp( | |||||
| thread _mtl_combined_image_sampler_cube<T, access::sample> tex, float3 uvs) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, uvs.xyz); | |||||
| } | |||||
| template<typename T> | |||||
| inline vec<T, 4> _texture_internal_samp( | |||||
| thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex, float4 coord_a) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w)); | |||||
| } | |||||
| /* Sample Level. */ | |||||
| template<typename T> | |||||
| inline vec<T, 4> _texture_internal_level( | |||||
| thread _mtl_combined_image_sampler_1d<T, access::sample> tex, | |||||
| float u, | |||||
| level options, | |||||
| int offset = 0) | |||||
| { | |||||
| /* LODs not supported for 1d textures. This must be zero. */ | |||||
| return tex.texture->sample(*tex.samp, u); | |||||
| } | |||||
| inline float4 _texture_internal_level( | |||||
| thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex, | |||||
| float2 ua, | |||||
| level options, | |||||
| int offset = 0) | |||||
| { | |||||
| /* LODs not supported for 1d textures. This must be zero. */ | |||||
| return tex.texture->sample(*tex.samp, ua.x, uint(ua.y)); | |||||
| } | |||||
| inline int4 _texture_internal_level(thread _mtl_combined_image_sampler_2d<int, access::sample> tex, | |||||
| float2 uv, | |||||
| level options, | |||||
| int2 offset = int2(0)) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, uv, options, offset); | |||||
| } | |||||
| inline uint4 _texture_internal_level( | |||||
| thread _mtl_combined_image_sampler_2d<uint, access::sample> tex, | |||||
| float2 uv, | |||||
| level options, | |||||
| int2 offset = int2(0)) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, uv, options, offset); | |||||
| } | |||||
| inline float4 _texture_internal_level( | |||||
| thread _mtl_combined_image_sampler_2d<float, access::sample> tex, | |||||
| float2 uv, | |||||
| level options, | |||||
| int2 offset = int2(0)) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, uv, options, offset); | |||||
| } | |||||
| inline _msl_return_float _texture_internal_level( | |||||
| thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, | |||||
| float2 uv, | |||||
| level options, | |||||
| int2 offset = int2(0)) | |||||
| { | |||||
| _msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)}; | |||||
| return fl; | |||||
| } | |||||
| template<typename T> | |||||
| inline vec<T, 4> _texture_internal_level( | |||||
| thread _mtl_combined_image_sampler_3d<T, access::sample> tex, | |||||
| float3 uvw, | |||||
| level options = level(0), | |||||
| int3 offset = int3(0)) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, uvw, options, offset); | |||||
| } | |||||
| template<typename T> | |||||
| inline vec<T, 4> _texture_internal_level( | |||||
| thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, | |||||
| float3 uva, | |||||
| level options = level(0), | |||||
| int2 offset = int2(0)) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset); | |||||
| } | |||||
| inline _msl_return_float _texture_internal_level( | |||||
| thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, | |||||
| float3 uva, | |||||
| level options = level(0), | |||||
| int2 offset = int2(0)) | |||||
| { | |||||
| _msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset)}; | |||||
| return fl; | |||||
| } | |||||
| inline _msl_return_float _texture_internal_level( | |||||
| thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, | |||||
| float4 uvac, | |||||
| level options = level(0), | |||||
| int2 offset = int2(0)) | |||||
| { | |||||
| _msl_return_float fl = { | |||||
| tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0), offset)}; | |||||
| return fl; | |||||
| } | |||||
| template<typename T> | |||||
| inline vec<T, 4> _texture_internal_level( | |||||
| thread _mtl_combined_image_sampler_cube<T, access::sample> tex, | |||||
| float3 uvs, | |||||
| level options = level(0), | |||||
| int2 offset = int2(0)) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, uvs.xyz, options); | |||||
| } | |||||
| template<typename T> | |||||
| inline vec<T, 4> _texture_internal_level( | |||||
| thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex, | |||||
| float4 coord_a, | |||||
| level options = level(0), | |||||
| int3 offset = int3(0)) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w), options); | |||||
| } | |||||
| /* Sample Bias. */ | |||||
| template<typename T> | |||||
| inline vec<T, 4> _texture_internal_bias( | |||||
| thread _mtl_combined_image_sampler_1d<T, access::sample> tex, | |||||
| float u, | |||||
| bias options = bias(0.0), | |||||
| int offset = 0) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, u); | |||||
| } | |||||
| inline float4 _texture_internal_bias( | |||||
| thread _mtl_combined_image_sampler_2d<float, access::sample> tex, | |||||
| float2 uv, | |||||
| bias options = bias(0.0), | |||||
| int2 offset = int2(0)) | |||||
| { | |||||
| return tex.texture->sample(*tex.samp, uv, options, offset); | |||||
| } | |||||
| inline _msl_return_float _texture_internal_bias( | |||||
| thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, | |||||
| float2 uv, | |||||
| bias options = bias(0), | |||||
| int2 offset = int2(0)) | |||||
| { | |||||
| _msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)}; | |||||
| return fl; | |||||
| } | |||||
| /* Texture Gather. */ | |||||
| component int_to_component(const int comp) | |||||
| { | |||||
| switch (comp) { | |||||
| default: | |||||
| case 0: | |||||
| return component::x; | |||||
| case 1: | |||||
| return component::y; | |||||
| case 2: | |||||
| return component::z; | |||||
| case 3: | |||||
| return component::w; | |||||
| } | |||||
| return component::x; | |||||
| } | |||||
| inline float4 _texture_gather_internal( | |||||
| thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, | |||||
| float2 uv, | |||||
| const int comp = 0, | |||||
| int2 offset = int2(0)) | |||||
| { | |||||
| return tex.texture->gather(*tex.samp, uv, offset); | |||||
| } | |||||
| template<typename T> | |||||
| inline vec<T, 4> _texture_gather_internal( | |||||
| thread _mtl_combined_image_sampler_2d<T, access::sample> tex, | |||||
| float2 uv, | |||||
| const int comp = 0, | |||||
| int2 offset = int2(0)) | |||||
| { | |||||
| return tex.texture->gather(*tex.samp, uv, offset); | |||||
| } | |||||
| template<typename T> | |||||
| inline vec<T, 4> _texture_gather_internal( | |||||
| thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, | |||||
| float2 uv, | |||||
| const int comp = 0, | |||||
| int2 offset = int2(0)) | |||||
| { | |||||
| return tex.texture->gather(*tex.samp, uv, offset); | |||||
| } | |||||
| /* Texture write support. */ | |||||
| template<typename S, typename T, access A> | |||||
| inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d<S, A> tex, | |||||
| T _coord, | |||||
| vec<S, 4> value) | |||||
| { | |||||
| float w = tex.texture->get_width(); | |||||
| float h = tex.texture->get_height(); | |||||
| if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h) { | |||||
| tex.texture->write(value, uint2(_coord.xy)); | |||||
| } | |||||
| } | |||||
| template<typename S, typename T, access A> | |||||
| inline void _texture_write_internal(thread _mtl_combined_image_sampler_3d<S, A> tex, | |||||
| T _coord, | |||||
| vec<S, 4> value) | |||||
| { | |||||
| float w = tex.texture->get_width(); | |||||
| float h = tex.texture->get_height(); | |||||
| float d = tex.texture->get_depth(); | |||||
| if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 && | |||||
| _coord.z < d) { | |||||
| tex.texture->write(value, uint3(_coord.xyz)); | |||||
| } | |||||
| } | |||||
| /* SSBO Vertex Fetch Mode. */ | |||||
| #ifdef MTL_SSBO_VERTEX_FETCH | |||||
| /* Enabled when geometry is passed via raw buffer bindings, rather than using | |||||
| * vertex assembly in the vertex-descriptor. | |||||
| * | |||||
| * To describe the layout of input attribute data, we will generate uniforms (defaulting to 0) | |||||
| * with the names per unique input attribute with name `attr`: | |||||
| * | |||||
| * - uniform_ssbo_stride_##attr -- Representing the stride between element. | |||||
| * - uniform_ssbo_offset_##attr -- Representing the base offset within the vertex. | |||||
| * - uniform_ssbo_fetchmode_##attr - Whether using per-vertex (=0) or per-instance fetch (=1). | |||||
| * - uniform_ssbo_vbo_id_##attr - buffer binding index for VBO with data for this attribute. | |||||
| * - uniform_ssbo_type_##attr - The type of data in the currently bound buffer. | |||||
| * | |||||
| * If the uniform_ssbo_type_* does not match with the desired type, then it is the responsibility | |||||
| * of the shader to perform the conversion. Types should always be read as the raw attribute type, | |||||
| * and then converted. e.g. If the uniform_ssbo_type_* is `int`, but we want to read it to be | |||||
| * normalized to a float. | |||||
| * The implementation should query the attribute type using vertex_fetch_get_attr_type(attr_name): | |||||
| * | |||||
| * float fweight = 0.0; | |||||
| * if(vertex_fetch_get_attr_type(in_weight) == GPU_SHADER_ATTR_TYPE_INT) { | |||||
| * int iweight = vertex_fetch_attribute(gl_VertexID, in_weight, int); | |||||
| * fweight = (float)iweight/(float)INT32_MAX; | |||||
| * } else { | |||||
| * fweight = = vertex_fetch_attribute(gl_VertexID, in_weight, float); | |||||
| * } | |||||
| * | |||||
| * Note: These uniforms are generated as part of the same data block used for regular uniforms | |||||
| * and attribute data is written prior to each draw call, depending on the configuration of | |||||
| * the vertex descriptor for an MTLBatch or MTLImmedaite call. */ | |||||
| # define PPCAT_NX(A, B) A##B | |||||
| # define PPCAT(A, B) PPCAT_NX(A, B) | |||||
| # define RESOLVE_VERTEX(v_id) \ | |||||
| ((UNIFORM_SSBO_USES_INDEXED_RENDERING_STR > 0) ? \ | |||||
| ((UNIFORM_SSBO_INDEX_MODE_U16_STR > 0) ? MTL_INDEX_DATA_U16[v_id] : \ | |||||
| MTL_INDEX_DATA_U32[v_id]) : \ | |||||
| v_id) | |||||
| # define ATTR_TYPE(attr) PPCAT(SSBO_ATTR_TYPE_, attr) | |||||
| # define vertex_fetch_attribute_raw(n, attr, type) \ | |||||
| (reinterpret_cast<constant type *>( \ | |||||
| &MTL_VERTEX_DATA[PPCAT(UNIFORM_SSBO_VBO_ID_STR, attr)] \ | |||||
| [(PPCAT(UNIFORM_SSBO_STRIDE_STR, attr) * \ | |||||
| ((PPCAT(UNIFORM_SSBO_FETCHMODE_STR, attr)) ? gl_InstanceID : n)) + \ | |||||
| PPCAT(UNIFORM_SSBO_OFFSET_STR, attr)]))[0] | |||||
| # define vertex_fetch_attribute(n, attr, type) \ | |||||
| vertex_fetch_attribute_raw(RESOLVE_VERTEX(n), attr, type) | |||||
| # define vertex_id_from_index_id(n) RESOLVE_VERTEX(n) | |||||
| # define vertex_fetch_get_input_prim_type() UNIFORM_SSBO_INPUT_PRIM_TYPE_STR | |||||
| # define vertex_fetch_get_input_vert_count() UNIFORM_SSBO_INPUT_VERT_COUNT_STR | |||||
| # define vertex_fetch_get_attr_type(attr) PPCAT(UNIFORM_SSBO_TYPE_STR, attr) | |||||
| /* Must mirror GPU_primitive.h. */ | |||||
| # define GPU_PRIM_POINTS 0 | |||||
| # define GPU_PRIM_LINES 1 | |||||
| # define GPU_PRIM_TRIS 2 | |||||
| # define GPU_PRIM_LINE_STRIP 3 | |||||
| # define GPU_PRIM_LINE_LOOP 4 | |||||
| # define GPU_PRIM_TRI_STRIP 5 | |||||
| # define GPU_PRIM_TRI_FAN 6 | |||||
| # define GPU_PRIM_LINES_ADJ 7 | |||||
| # define GPU_PRIM_TRIS_ADJ 8 | |||||
| # define GPU_PRIM_LINE_STRIP_ADJ 9 | |||||
| #endif | |||||
| /* Common Functions. */ | |||||
| #define dFdx(x) dfdx(x) | |||||
| #define dFdy(x) dfdy(x) | |||||
| #define mod(x, y) _mtlmod(x, y) | |||||
| #define discard discard_fragment() | |||||
| #define inversesqrt rsqrt | |||||
| inline float radians(float deg) | |||||
| { | |||||
| /* Constant factor: M_PI_F/180.0. */ | |||||
| return deg * 0.01745329251f; | |||||
| } | |||||
| inline float degrees(float rad) | |||||
| { | |||||
| /* Constant factor: 180.0/M_PI_F. */ | |||||
| return rad * 57.2957795131; | |||||
| } | |||||
| #define select(A, B, C) mix(A, B, C) | |||||
| /* Type conversions and type truncations. */ | |||||
| inline float4 to_float4(float3 val) | |||||
| { | |||||
| return float4(val, 1.0); | |||||
| } | |||||
| /* Type conversions and type truncations (Utility Functions). */ | |||||
| inline float3x3 mat4_to_mat3(float4x4 matrix) | |||||
| { | |||||
| return float3x3(matrix[0].xyz, matrix[1].xyz, matrix[2].xyz); | |||||
| } | |||||
| inline int floatBitsToInt(float f) | |||||
| { | |||||
Not Done Inline ActionsWhy not implement those as inline function like many others bellow? It would offer better debuggability. fclem: Why not implement those as inline function like many others bellow? It would offer better… | |||||
| return as_type<int>(f); | |||||
Not Done Inline ActionsIs M_PI_F defined by MSL? if that so it will conflict with our in common_math_lib.glsl. If not it will be missing from pyGPU shaders. Also, I think precomputing the constant factor is a better idea to avoid precision loss of 2 operation. fclem: Is `M_PI_F` defined by MSL? if that so it will conflict with our in `common_math_lib.glsl`. If… | |||||
Not Done Inline ActionsYes, M_PI_F is declared globally. Currently it does not appear to collide, though I can replace this definition with a pre-computed constant. Do pyGPU shaders include common_math_lib as default? On this note, are there any good test-cases for pyGPU, would be useful to have a range of extensions which use the features to test the implementation. MichaelPW: Yes, M_PI_F is declared globally. Currently it does not appear to collide, though I can replace… | |||||
Not Done Inline ActionsI would prefer precomputed constant.
They don't. They would have to declare their own M_PI_F constant.
I will ask the right person and foward you the answer. But to my knowledge, there is only the official extensions that we maintain. And I would be there is not a lot of custom shaders in it. fclem: I would prefer precomputed constant.
>Do pyGPU shaders include common_math_lib as default? | |||||
| } | |||||
| inline int2 floatBitsToInt(float2 f) | |||||
| { | |||||
| return as_type<int2>(f); | |||||
Not Done Inline ActionsWhy the define to a single use function? fclem: Why the define to a single use function? | |||||
Not Done Inline ActionsGood point, this was unnecessary, have removed. MichaelPW: Good point, this was unnecessary, have removed. | |||||
| } | |||||
| inline int3 floatBitsToInt(float3 f) | |||||
| { | |||||
| return as_type<int3>(f); | |||||
| } | |||||
| inline int4 floatBitsToInt(float4 f) | |||||
| { | |||||
| return as_type<int4>(f); | |||||
| } | |||||
| inline uint floatBitsToUint(float f) | |||||
| { | |||||
| return as_type<uint>(f); | |||||
| } | |||||
| inline uint2 floatBitsToUint(float2 f) | |||||
| { | |||||
| return as_type<uint2>(f); | |||||
| } | |||||
| inline uint3 floatBitsToUint(float3 f) | |||||
| { | |||||
| return as_type<uint3>(f); | |||||
| } | |||||
| inline uint4 floatBitsToUint(float4 f) | |||||
| { | |||||
| return as_type<uint4>(f); | |||||
| } | |||||
| inline float intBitsToFloat(int f) | |||||
| { | |||||
| return as_type<float>(f); | |||||
| } | |||||
| inline float2 intBitsToFloat(int2 f) | |||||
| { | |||||
| return as_type<float2>(f); | |||||
| } | |||||
| inline float3 intBitsToFloat(int3 f) | |||||
| { | |||||
| return as_type<float3>(f); | |||||
| } | |||||
| inline float4 intBitsToFloat(int4 f) | |||||
| { | |||||
| return as_type<float4>(f); | |||||
| } | |||||
| /* Texture size functions. Add texture types as needed. */ | |||||
| template<typename T, access A> | |||||
| int textureSize(thread _mtl_combined_image_sampler_1d<T, A> image, uint lod) | |||||
| { | |||||
| return int(image.texture->get_width()); | |||||
| } | |||||
| template<typename T, access A> | |||||
| int2 textureSize(thread _mtl_combined_image_sampler_1d_array<T, A> image, uint lod) | |||||
| { | |||||
| return int2(image.texture->get_width(), image.texture->get_array_size()); | |||||
| } | |||||
| template<typename T, access A> | |||||
| int2 textureSize(thread _mtl_combined_image_sampler_2d<T, A> image, uint lod) | |||||
| { | |||||
| return int2(image.texture->get_width(lod), image.texture->get_height(lod)); | |||||
| } | |||||
| template<typename T, access A> | |||||
| int2 textureSize(thread _mtl_combined_image_sampler_depth_2d<T, A> image, uint lod) | |||||
| { | |||||
| return int2(image.texture->get_width(lod), image.texture->get_height(lod)); | |||||
| } | |||||
| template<typename T, access A> | |||||
| int3 textureSize(thread _mtl_combined_image_sampler_2d_array<T, A> image, uint lod) | |||||
| { | |||||
| return int3(image.texture->get_width(lod), | |||||
| image.texture->get_height(lod), | |||||
| image.texture->get_array_size()); | |||||
| } | |||||
| template<typename T, access A> | |||||
| int3 textureSize(thread _mtl_combined_image_sampler_depth_2d_array<T, A> image, uint lod) | |||||
| { | |||||
| return int3(image.texture->get_width(lod), | |||||
| image.texture->get_height(lod), | |||||
| image.texture->get_array_size()); | |||||
| } | |||||
| template<typename T, access A> | |||||
| int2 textureSize(thread _mtl_combined_image_sampler_cube<T, A> image, uint lod) | |||||
| { | |||||
| return int2(image.texture->get_width(lod), image.texture->get_height(lod)); | |||||
| } | |||||
| template<typename T, access A> | |||||
| int3 textureSize(thread _mtl_combined_image_sampler_3d<T, A> image, uint lod) | |||||
| { | |||||
| return int3(image.texture->get_width(lod), | |||||
| image.texture->get_height(lod), | |||||
| image.texture->get_depth(lod)); | |||||
| } | |||||
| /* Equality and comparison functions. */ | |||||
| #define lessThan(a, b) ((a) < (b)) | |||||
| #define lessThanEqual(a, b) ((a) <= (b)) | |||||
| #define greaterThan(a, b) ((a) > (b)) | |||||
| #define greaterThanEqual(a, b) ((a) >= (b)) | |||||
| #define equal(a, b) ((a) == (b)) | |||||
| #define notEqual(a, b) ((a) != (b)) | |||||
| template<typename T, int n> bool all(vec<T, n> x) | |||||
| { | |||||
| bool _all = true; | |||||
| for (int i = 0; i < n; i++) { | |||||
| _all = _all && (x[i] > 0); | |||||
| } | |||||
| return _all; | |||||
| } | |||||
| template<typename T, int n> bool any(vec<T, n> x) | |||||
| { | |||||
| bool _any = false; | |||||
| for (int i = 0; i < n; i++) { | |||||
| _any = _any || (x[i] > 0); | |||||
| } | |||||
| return _any; | |||||
| } | |||||
| /* Modulo functionality. */ | |||||
| int _mtlmod(int a, int b) | |||||
| { | |||||
| return a - b * (a / b); | |||||
| } | |||||
| template<typename T, int n> vec<T, n> _mtlmod(vec<T, n> x, vec<T, n> y) | |||||
| { | |||||
| return x - y * floor(x / y); | |||||
| } | |||||
| template<typename T, int n, typename U> vec<T, n> _mtlmod(vec<T, n> x, U y) | |||||
| { | |||||
| return x - vec<T, n>(y) * floor(x / vec<T, n>(y)); | |||||
| } | |||||
| template<typename T, typename U, int n> vec<U, n> _mtlmod(T x, vec<U, n> y) | |||||
| { | |||||
| return vec<U, n>(x) - y * floor(vec<U, n>(x) / y); | |||||
| } | |||||
| /* Mathematical functions. */ | |||||
| template<typename T> T atan(T y, T x) | |||||
| { | |||||
| return atan2(y, x); | |||||
| } | |||||
| /* Matrix Inverse. */ | |||||
| float4x4 inverse(float4x4 a) | |||||
| { | |||||
| float b00 = a[0][0] * a[1][1] - a[0][1] * a[1][0]; | |||||
| float b01 = a[0][0] * a[1][2] - a[0][2] * a[1][0]; | |||||
| float b02 = a[0][0] * a[1][3] - a[0][3] * a[1][0]; | |||||
| float b03 = a[0][1] * a[1][2] - a[0][2] * a[1][1]; | |||||
| float b04 = a[0][1] * a[1][3] - a[0][3] * a[1][1]; | |||||
| float b05 = a[0][2] * a[1][3] - a[0][3] * a[1][2]; | |||||
| float b06 = a[2][0] * a[3][1] - a[2][1] * a[3][0]; | |||||
| float b07 = a[2][0] * a[3][2] - a[2][2] * a[3][0]; | |||||
| float b08 = a[2][0] * a[3][3] - a[2][3] * a[3][0]; | |||||
| float b09 = a[2][1] * a[3][2] - a[2][2] * a[3][1]; | |||||
| float b10 = a[2][1] * a[3][3] - a[2][3] * a[3][1]; | |||||
| float b11 = a[2][2] * a[3][3] - a[2][3] * a[3][2]; | |||||
Not Done Inline ActionsIs that used by the backend internally? Because to my knowledge, this is not part of GLSL. fclem: Is that used by the backend internally? Because to my knowledge, this is not part of GLSL. | |||||
Not Done Inline ActionsThis was thrown in In-case it was needed, and also used in a few experiments, but will remove it as I imagine it is not needed. MichaelPW: This was thrown in In-case it was needed, and also used in a few experiments, but will remove… | |||||
| float invdet = 1.0 / (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06); | |||||
| return float4x4(a[1][1] * b11 - a[1][2] * b10 + a[1][3] * b09, | |||||
| a[0][2] * b10 - a[0][1] * b11 - a[0][3] * b09, | |||||
| a[3][1] * b05 - a[3][2] * b04 + a[3][3] * b03, | |||||
| a[2][2] * b04 - a[2][1] * b05 - a[2][3] * b03, | |||||
| a[1][2] * b08 - a[1][0] * b11 - a[1][3] * b07, | |||||
| a[0][0] * b11 - a[0][2] * b08 + a[0][3] * b07, | |||||
| a[3][2] * b02 - a[3][0] * b05 - a[3][3] * b01, | |||||
| a[2][0] * b05 - a[2][2] * b02 + a[2][3] * b01, | |||||
| a[1][0] * b10 - a[1][1] * b08 + a[1][3] * b06, | |||||
| a[0][1] * b08 - a[0][0] * b10 - a[0][3] * b06, | |||||
| a[3][0] * b04 - a[3][1] * b02 + a[3][3] * b00, | |||||
| a[2][1] * b02 - a[2][0] * b04 - a[2][3] * b00, | |||||
| a[1][1] * b07 - a[1][0] * b09 - a[1][2] * b06, | |||||
| a[0][0] * b09 - a[0][1] * b07 + a[0][2] * b06, | |||||
| a[3][1] * b01 - a[3][0] * b03 - a[3][2] * b00, | |||||
| a[2][0] * b03 - a[2][1] * b01 + a[2][2] * b00) * | |||||
| invdet; | |||||
| } | |||||
| float3x3 inverse(float3x3 m) | |||||
| { | |||||
| float invdet = 1.0 / (m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) - | |||||
| m[1][0] * (m[0][1] * m[2][2] - m[2][1] * m[0][2]) + | |||||
| m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2])); | |||||
| float3x3 inverse(0); | |||||
| inverse[0][0] = +(m[1][1] * m[2][2] - m[2][1] * m[1][2]); | |||||
| inverse[1][0] = -(m[1][0] * m[2][2] - m[2][0] * m[1][2]); | |||||
| inverse[2][0] = +(m[1][0] * m[2][1] - m[2][0] * m[1][1]); | |||||
| inverse[0][1] = -(m[0][1] * m[2][2] - m[2][1] * m[0][2]); | |||||
| inverse[1][1] = +(m[0][0] * m[2][2] - m[2][0] * m[0][2]); | |||||
| inverse[2][1] = -(m[0][0] * m[2][1] - m[2][0] * m[0][1]); | |||||
| inverse[0][2] = +(m[0][1] * m[1][2] - m[1][1] * m[0][2]); | |||||
| inverse[1][2] = -(m[0][0] * m[1][2] - m[1][0] * m[0][2]); | |||||
| inverse[2][2] = +(m[0][0] * m[1][1] - m[1][0] * m[0][1]); | |||||
| inverse = inverse * invdet; | |||||
| return inverse; | |||||
| } | |||||
| /* Additional overloads for builtin functions. */ | |||||
| float distance(float x, float y) | |||||
| { | |||||
| return abs(y - x); | |||||
| } | |||||
| /* Overload for mix(A, B, float ratio). */ | |||||
| template<typename T, int Size> vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, float val) | |||||
| { | |||||
| return mix(a, b, vec<T, Size>(val)); | |||||
| } | |||||
| /* Overload for mix(A, B, bvec<N>). */ | |||||
| template<typename T, int Size> | |||||
| vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, vec<int, Size> mask) | |||||
| { | |||||
| vec<T, Size> result; | |||||
Not Done Inline ActionsWhy add metal namespace here? fclem: Why add metal namespace here? | |||||
Not Done Inline ActionsAh yes, this is also no longer needed. This was carry-over after previously using some functions from the Metal headers, but this is ultimately unnecessary, especially also given the shader explicitly uses the metal namespace. MichaelPW: Ah yes, this is also no longer needed. This was carry-over after previously using some… | |||||
| for (int i = 0; i < Size; i++) { | |||||
| result[i] = mask[i] ? b[i] : a[i]; | |||||
| } | |||||
| return result; | |||||
| } | |||||
| /* Using vec<bool, S> does not appear to work, splitting cases. */ | |||||
| /* Overload for mix(A, B, bvec<N>). */ | |||||
| template<typename T> vec<T, 4> mix(vec<T, 4> a, vec<T, 4> b, bvec4 mask) | |||||
| { | |||||
| vec<T, 4> result; | |||||
| for (int i = 0; i < 4; i++) { | |||||
| result[i] = mask[i] ? b[i] : a[i]; | |||||
| } | |||||
| return result; | |||||
| } | |||||
| /* Overload for mix(A, B, bvec<N>). */ | |||||
| template<typename T> vec<T, 3> mix(vec<T, 3> a, vec<T, 3> b, bvec3 mask) | |||||
| { | |||||
| vec<T, 3> result; | |||||
| for (int i = 0; i < 3; i++) { | |||||
| result[i] = mask[i] ? b[i] : a[i]; | |||||
| } | |||||
| return result; | |||||
| } | |||||
| /* Overload for mix(A, B, bvec<N>). */ | |||||
| template<typename T> vec<T, 2> mix(vec<T, 2> a, vec<T, 2> b, bvec2 mask) | |||||
| { | |||||
| vec<T, 2> result; | |||||
| for (int i = 0; i < 2; i++) { | |||||
| result[i] = mask[i] ? b[i] : a[i]; | |||||
| } | |||||
| return result; | |||||
| } | |||||
| /* Overload for mix(A, B, bvec<N>). */ | |||||
| template<typename T> T mix(T a, T b, MTLBOOL mask) | |||||
| { | |||||
| return (mask) ? b : a; | |||||
| } | |||||
| template<typename T, unsigned int Size> bool is_zero(vec<T, Size> a) | |||||
| { | |||||
| for (int i = 0; i < Size; i++) { | |||||
| if (a[i] != T(0)) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| /* Matrix conversion fallback. */ | |||||
| mat3 MAT3(vec3 a, vec3 b, vec3 c) | |||||
| { | |||||
| return mat3(a, b, c); | |||||
| } | |||||
| mat3 MAT3(float f) | |||||
| { | |||||
| return mat3(f); | |||||
| } | |||||
| mat3 MAT3(mat4 m) | |||||
| { | |||||
| return mat4_to_mat3(m); | |||||
| } | |||||
| No newline at end of file | |||||
Not Done Inline ActionsSo all of this is only needed because of the mat4_to_mat3 case? Maybe we can make an exception and enforce the use of a mat4_to_mat3. We could even catch the error in the log parser and suggest the workaround. fclem: So all of this is only needed because of the `mat4_to_mat3` case?
Maybe we can make an… | |||||
Not Done Inline ActionsTo simplify the code, we could remove mat4_to_mat3 and have the conversion directly in the MAT3 function. Currently, a find-and-replace expression is used to detect casts to mat3( and replaces those with MAT3 so as to use these function calls, such that all cases can be handled. But yes, the functions here are only wrappers around the constructor, as find-and-replace is not advanced enough to know the input parameter type. I believe a previous patch had replaced mat3(mat4 m) cases to use mat4_to_mat3 directly, though this was removed for compatibility. Would be more explicit however to have this in the high level shaders, as mat4_to_mat3 does cause a type truncation. MichaelPW: To simplify the code, we could remove mat4_to_mat3 and have the conversion directly in the MAT3… | |||||
Not Done Inline ActionsNote: I suggest to always add a newline at the end of shader files since they all gets concatenated. Sometime you can end up with #define in the first line of a following file and this would break compilation because directive would be preceded by another char. fclem: Note: I suggest to always add a newline at the end of shader files since they all gets… | |||||
We might want to be exhaustive for matrix type because of pyGPU. So it is missing all of the non symetric case and the alias to mat3x3 and mat4x4.