Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/util/transform.h
| Show First 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | |||||
| * rotation (4), then translation (3), then 3x3 scale matrix (9). */ | * rotation (4), then translation (3), then 3x3 scale matrix (9). */ | ||||
| typedef struct DecomposedTransform { | typedef struct DecomposedTransform { | ||||
| float4 x, y, z, w; | float4 x, y, z, w; | ||||
| } DecomposedTransform; | } DecomposedTransform; | ||||
| /* Functions */ | /* Functions */ | ||||
| #ifdef __KERNEL_METAL__ | |||||
| /* transform_point specialized for ccl_global */ | |||||
| ccl_device_inline float3 transform_point(ccl_global const Transform *t, const float3 a) | |||||
| { | |||||
| ccl_global const float3x3 &b(*(ccl_global const float3x3 *)t); | |||||
| return (a * b).xyz + make_float3(t->x.w, t->y.w, t->z.w); | |||||
| } | |||||
| #endif | |||||
| ccl_device_inline float3 transform_point(ccl_private const Transform *t, const float3 a) | ccl_device_inline float3 transform_point(ccl_private const Transform *t, const float3 a) | ||||
| { | { | ||||
| /* TODO(sergey): Disabled for now, causes crashes in certain cases. */ | /* TODO(sergey): Disabled for now, causes crashes in certain cases. */ | ||||
| #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) | #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) | ||||
| ssef x, y, z, w, aa; | ssef x, y, z, w, aa; | ||||
| aa = a.m128; | aa = a.m128; | ||||
| x = _mm_loadu_ps(&t->x.x); | x = _mm_loadu_ps(&t->x.x); | ||||
| y = _mm_loadu_ps(&t->y.x); | y = _mm_loadu_ps(&t->y.x); | ||||
| z = _mm_loadu_ps(&t->z.x); | z = _mm_loadu_ps(&t->z.x); | ||||
| w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f); | w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f); | ||||
| _MM_TRANSPOSE4_PS(x, y, z, w); | _MM_TRANSPOSE4_PS(x, y, z, w); | ||||
| ssef tmp = shuffle<0>(aa) * x; | ssef tmp = shuffle<0>(aa) * x; | ||||
| tmp = madd(shuffle<1>(aa), y, tmp); | tmp = madd(shuffle<1>(aa), y, tmp); | ||||
| tmp = madd(shuffle<2>(aa), z, tmp); | tmp = madd(shuffle<2>(aa), z, tmp); | ||||
| tmp += w; | tmp += w; | ||||
| return float3(tmp.m128); | return float3(tmp.m128); | ||||
| #elif defined(__KERNEL_METAL__) | |||||
| ccl_private const float3x3 &b(*(ccl_private const float3x3 *)t); | |||||
| return (a * b).xyz + make_float3(t->x.w, t->y.w, t->z.w); | |||||
| #else | #else | ||||
| float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z + t->x.w, | float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z + t->x.w, | ||||
| a.x * t->y.x + a.y * t->y.y + a.z * t->y.z + t->y.w, | a.x * t->y.x + a.y * t->y.y + a.z * t->y.z + t->y.w, | ||||
| a.x * t->z.x + a.y * t->z.y + a.z * t->z.z + t->z.w); | a.x * t->z.x + a.y * t->z.y + a.z * t->z.z + t->z.w); | ||||
| return c; | return c; | ||||
| #endif | #endif | ||||
| } | } | ||||
| Show All 10 Lines | #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) | ||||
| _MM_TRANSPOSE4_PS(x, y, z, w); | _MM_TRANSPOSE4_PS(x, y, z, w); | ||||
| ssef tmp = shuffle<0>(aa) * x; | ssef tmp = shuffle<0>(aa) * x; | ||||
| tmp = madd(shuffle<1>(aa), y, tmp); | tmp = madd(shuffle<1>(aa), y, tmp); | ||||
| tmp = madd(shuffle<2>(aa), z, tmp); | tmp = madd(shuffle<2>(aa), z, tmp); | ||||
| return float3(tmp.m128); | return float3(tmp.m128); | ||||
| #elif defined(__KERNEL_METAL__) | |||||
| ccl_private const float3x3 &b(*(ccl_private const float3x3 *)t); | |||||
| return (a * b).xyz; | |||||
| #else | #else | ||||
| float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z, | float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z, | ||||
| a.x * t->y.x + a.y * t->y.y + a.z * t->y.z, | a.x * t->y.x + a.y * t->y.y + a.z * t->y.z, | ||||
| a.x * t->z.x + a.y * t->z.y + a.z * t->z.z); | a.x * t->z.x + a.y * t->z.y + a.z * t->z.z); | ||||
| return c; | return c; | ||||
| #endif | #endif | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 335 Lines • ▼ Show 20 Lines | tfm->x = make_float4( | ||||
| dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x); | dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x); | ||||
| tfm->y = make_float4( | tfm->y = make_float4( | ||||
| dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y); | dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y); | ||||
| tfm->z = make_float4( | tfm->z = make_float4( | ||||
| dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z); | dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z); | ||||
| } | } | ||||
| /* Interpolate from array of decomposed transforms. */ | /* Interpolate from array of decomposed transforms. */ | ||||
| ccl_device void transform_motion_array_interpolate(Transform *tfm, | ccl_device void transform_motion_array_interpolate(ccl_private Transform *tfm, | ||||
| const DecomposedTransform *motion, | ccl_global const DecomposedTransform *motion, | ||||
| uint numsteps, | uint numsteps, | ||||
| float time) | float time) | ||||
| { | { | ||||
| /* Figure out which steps we need to interpolate. */ | /* Figure out which steps we need to interpolate. */ | ||||
| int maxstep = numsteps - 1; | int maxstep = numsteps - 1; | ||||
| int step = min((int)(time * maxstep), maxstep - 1); | int step = min((int)(time * maxstep), maxstep - 1); | ||||
| float t = time * maxstep - step; | float t = time * maxstep - step; | ||||
| const DecomposedTransform *a = motion + step; | ccl_global const DecomposedTransform *a = motion + step; | ||||
| const DecomposedTransform *b = motion + step + 1; | ccl_global const DecomposedTransform *b = motion + step + 1; | ||||
| /* Interpolate rotation, translation and scale. */ | /* Interpolate rotation, translation and scale. */ | ||||
| DecomposedTransform decomp; | DecomposedTransform decomp; | ||||
| decomp.x = quat_interpolate(a->x, b->x, t); | decomp.x = quat_interpolate(a->x, b->x, t); | ||||
| decomp.y = (1.0f - t) * a->y + t * b->y; | decomp.y = (1.0f - t) * a->y + t * b->y; | ||||
| decomp.z = (1.0f - t) * a->z + t * b->z; | decomp.z = (1.0f - t) * a->z + t * b->z; | ||||
| decomp.w = (1.0f - t) * a->w + t * b->w; | decomp.w = (1.0f - t) * a->w + t * b->w; | ||||
| Show All 40 Lines | |||||