Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/util/util_math_matrix.h
| Show All 29 Lines | |||||
| #else | #else | ||||
| # define MATS(A, n, r, c, s) MAT(A, n, r, c) | # define MATS(A, n, r, c, s) MAT(A, n, r, c) | ||||
| # define MATHS(A, r, c, s) A[(r) * ((r) + 1) / 2 + (c)] | # define MATHS(A, r, c, s) A[(r) * ((r) + 1) / 2 + (c)] | ||||
| # define VECS(V, i, s) V[i] | # define VECS(V, i, s) V[i] | ||||
| #endif | #endif | ||||
| /* Zeroing helpers. */ | /* Zeroing helpers. */ | ||||
| ccl_device_inline void math_vector_zero(float *v, int n) | ccl_device_inline void math_vector_zero(ccl_private float *v, int n) | ||||
| { | { | ||||
| for (int i = 0; i < n; i++) { | for (int i = 0; i < n; i++) { | ||||
| v[i] = 0.0f; | v[i] = 0.0f; | ||||
| } | } | ||||
| } | } | ||||
| ccl_device_inline void math_matrix_zero(float *A, int n) | ccl_device_inline void math_matrix_zero(ccl_private float *A, int n) | ||||
| { | { | ||||
| for (int row = 0; row < n; row++) { | for (int row = 0; row < n; row++) { | ||||
| for (int col = 0; col <= row; col++) { | for (int col = 0; col <= row; col++) { | ||||
| MAT(A, n, row, col) = 0.0f; | MAT(A, n, row, col) = 0.0f; | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| /* Elementary vector operations. */ | /* Elementary vector operations. */ | ||||
| ccl_device_inline void math_vector_add(float *a, const float *ccl_restrict b, int n) | ccl_device_inline void math_vector_add(ccl_private float *a, | ||||
| ccl_private const float *ccl_restrict b, | |||||
| int n) | |||||
| { | { | ||||
| for (int i = 0; i < n; i++) { | for (int i = 0; i < n; i++) { | ||||
| a[i] += b[i]; | a[i] += b[i]; | ||||
| } | } | ||||
| } | } | ||||
| ccl_device_inline void math_vector_mul(float *a, const float *ccl_restrict b, int n) | ccl_device_inline void math_vector_mul(ccl_private float *a, | ||||
| ccl_private const float *ccl_restrict b, | |||||
| int n) | |||||
| { | { | ||||
| for (int i = 0; i < n; i++) { | for (int i = 0; i < n; i++) { | ||||
| a[i] *= b[i]; | a[i] *= b[i]; | ||||
| } | } | ||||
| } | } | ||||
| ccl_device_inline void math_vector_mul_strided(ccl_global float *a, | ccl_device_inline void math_vector_mul_strided(ccl_global float *a, | ||||
| const float *ccl_restrict b, | ccl_private const float *ccl_restrict b, | ||||
| int astride, | int astride, | ||||
| int n) | int n) | ||||
| { | { | ||||
| for (int i = 0; i < n; i++) { | for (int i = 0; i < n; i++) { | ||||
| a[i * astride] *= b[i]; | a[i * astride] *= b[i]; | ||||
| } | } | ||||
| } | } | ||||
| ccl_device_inline void math_vector_scale(float *a, float b, int n) | ccl_device_inline void math_vector_scale(ccl_private float *a, float b, int n) | ||||
| { | { | ||||
| for (int i = 0; i < n; i++) { | for (int i = 0; i < n; i++) { | ||||
| a[i] *= b; | a[i] *= b; | ||||
| } | } | ||||
| } | } | ||||
| ccl_device_inline void math_vector_max(float *a, const float *ccl_restrict b, int n) | ccl_device_inline void math_vector_max(ccl_private float *a, | ||||
| ccl_private const float *ccl_restrict b, | |||||
| int n) | |||||
| { | { | ||||
| for (int i = 0; i < n; i++) { | for (int i = 0; i < n; i++) { | ||||
| a[i] = max(a[i], b[i]); | a[i] = max(a[i], b[i]); | ||||
| } | } | ||||
| } | } | ||||
| ccl_device_inline void math_vec3_add(float3 *v, int n, float *x, float3 w) | ccl_device_inline void math_vec3_add(ccl_private float3 *v, int n, ccl_private float *x, float3 w) | ||||
| { | { | ||||
| for (int i = 0; i < n; i++) { | for (int i = 0; i < n; i++) { | ||||
| v[i] += w * x[i]; | v[i] += w * x[i]; | ||||
| } | } | ||||
| } | } | ||||
| ccl_device_inline void math_vec3_add_strided( | ccl_device_inline void math_vec3_add_strided( | ||||
| ccl_global float3 *v, int n, float *x, float3 w, int stride) | ccl_global float3 *v, int n, ccl_private float *x, float3 w, int stride) | ||||
| { | { | ||||
| for (int i = 0; i < n; i++) { | for (int i = 0; i < n; i++) { | ||||
| ccl_global float *elem = (ccl_global float *)(v + i * stride); | ccl_global float *elem = (ccl_global float *)(v + i * stride); | ||||
| atomic_add_and_fetch_float(elem + 0, w.x * x[i]); | atomic_add_and_fetch_float(elem + 0, w.x * x[i]); | ||||
| atomic_add_and_fetch_float(elem + 1, w.y * x[i]); | atomic_add_and_fetch_float(elem + 1, w.y * x[i]); | ||||
| atomic_add_and_fetch_float(elem + 2, w.z * x[i]); | atomic_add_and_fetch_float(elem + 2, w.z * x[i]); | ||||
| } | } | ||||
| } | } | ||||
| Show All 9 Lines | |||||
| { | { | ||||
| for (int row = 0; row < n; row++) { | for (int row = 0; row < n; row++) { | ||||
| MATHS(A, row, row, stride) += val; | MATHS(A, row, row, stride) += val; | ||||
| } | } | ||||
| } | } | ||||
| /* Add Gramian matrix of v to A. | /* Add Gramian matrix of v to A. | ||||
| * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */ | * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */ | ||||
| ccl_device_inline void math_matrix_add_gramian(float *A, | ccl_device_inline void math_matrix_add_gramian(ccl_private float *A, | ||||
| int n, | int n, | ||||
| const float *ccl_restrict v, | ccl_private const float *ccl_restrict v, | ||||
| float weight) | float weight) | ||||
| { | { | ||||
| for (int row = 0; row < n; row++) { | for (int row = 0; row < n; row++) { | ||||
| for (int col = 0; col <= row; col++) { | for (int col = 0; col <= row; col++) { | ||||
| MAT(A, n, row, col) += v[row] * v[col] * weight; | MAT(A, n, row, col) += v[row] * v[col] * weight; | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| /* Add Gramian matrix of v to A. | /* Add Gramian matrix of v to A. | ||||
| * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */ | * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */ | ||||
| ccl_device_inline void math_trimatrix_add_gramian_strided( | ccl_device_inline void math_trimatrix_add_gramian_strided( | ||||
| ccl_global float *A, int n, const float *ccl_restrict v, float weight, int stride) | ccl_global float *A, int n, ccl_private const float *ccl_restrict v, float weight, int stride) | ||||
| { | { | ||||
| for (int row = 0; row < n; row++) { | for (int row = 0; row < n; row++) { | ||||
| for (int col = 0; col <= row; col++) { | for (int col = 0; col <= row; col++) { | ||||
| atomic_add_and_fetch_float(&MATHS(A, row, col, stride), v[row] * v[col] * weight); | atomic_add_and_fetch_float(&MATHS(A, row, col, stride), v[row] * v[col] * weight); | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| ccl_device_inline void math_trimatrix_add_gramian(ccl_global float *A, | ccl_device_inline void math_trimatrix_add_gramian(ccl_global float *A, | ||||
| int n, | int n, | ||||
| const float *ccl_restrict v, | ccl_private const float *ccl_restrict v, | ||||
| float weight) | float weight) | ||||
| { | { | ||||
| for (int row = 0; row < n; row++) { | for (int row = 0; row < n; row++) { | ||||
| for (int col = 0; col <= row; col++) { | for (int col = 0; col <= row; col++) { | ||||
| MATHS(A, row, col, 1) += v[row] * v[col] * weight; | MATHS(A, row, col, 1) += v[row] * v[col] * weight; | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines | |||||
| /* Perform the Jacobi Eigenvalue Method on matrix A. | /* Perform the Jacobi Eigenvalue Method on matrix A. | ||||
| * A is assumed to be a symmetrical matrix, therefore only the lower-triangular part is ever | * A is assumed to be a symmetrical matrix, therefore only the lower-triangular part is ever | ||||
| * accessed. The algorithm overwrites the contents of A. | * accessed. The algorithm overwrites the contents of A. | ||||
| * | * | ||||
| * After returning, A will be overwritten with D, which is (almost) diagonal, | * After returning, A will be overwritten with D, which is (almost) diagonal, | ||||
| * and V will contain the eigenvectors of the original A in its rows (!), | * and V will contain the eigenvectors of the original A in its rows (!), | ||||
| * so that A = V^T*D*V. Therefore, the diagonal elements of D are the (sorted) eigenvalues of A. | * so that A = V^T*D*V. Therefore, the diagonal elements of D are the (sorted) eigenvalues of A. | ||||
| */ | */ | ||||
| ccl_device void math_matrix_jacobi_eigendecomposition(float *A, | ccl_device void math_matrix_jacobi_eigendecomposition(ccl_private float *A, | ||||
| ccl_global float *V, | ccl_global float *V, | ||||
| int n, | int n, | ||||
| int v_stride) | int v_stride) | ||||
| { | { | ||||
| const float singular_epsilon = 1e-9f; | const float singular_epsilon = 1e-9f; | ||||
| for (int row = 0; row < n; row++) { | for (int row = 0; row < n; row++) { | ||||
| for (int col = 0; col < n; col++) { | for (int col = 0; col < n; col++) { | ||||
| ▲ Show 20 Lines • Show All 193 Lines • Show Last 20 Lines | |||||