Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/bvh/qbvh_nodes.h
| Show First 20 Lines • Show All 130 Lines • ▼ Show 20 Lines | #else | ||||
| const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); | const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); | ||||
| const sseb vmask = tnear <= tfar; | const sseb vmask = tnear <= tfar; | ||||
| int mask = (int)movemask(vmask); | int mask = (int)movemask(vmask); | ||||
| #endif | #endif | ||||
| *dist = tnear; | *dist = tnear; | ||||
| return mask; | return mask; | ||||
| } | } | ||||
| ccl_device_inline int qbvh_aligned_node_intersect_robust( | |||||
| KernelGlobals *ccl_restrict kg, | |||||
| const ssef& isect_near, | |||||
| const ssef& isect_far, | |||||
| #ifdef __KERNEL_AVX2__ | |||||
| const sse3f& P_idir, | |||||
| #else | |||||
| const sse3f& P, | |||||
| #endif | |||||
| const sse3f& idir, | |||||
| const int near_x, | |||||
| const int near_y, | |||||
| const int near_z, | |||||
| const int far_x, | |||||
| const int far_y, | |||||
| const int far_z, | |||||
| const int node_addr, | |||||
| const float difl, | |||||
| ssef *ccl_restrict dist) | |||||
| { | |||||
| const int offset = node_addr + 1; | |||||
| #ifdef __KERNEL_AVX2__ | |||||
| const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x); | |||||
| const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y); | |||||
| const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z); | |||||
| const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x); | |||||
| const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y); | |||||
| const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z); | |||||
| #else | |||||
| const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x; | |||||
| const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y; | |||||
| const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z; | |||||
| const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x; | |||||
| const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y; | |||||
| const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z; | |||||
| #endif | |||||
| const float round_down = 1.0f - difl; | |||||
| const float round_up = 1.0f + difl; | |||||
| const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); | |||||
| const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); | |||||
| const sseb vmask = round_down*tnear <= round_up*tfar; | |||||
| *dist = tnear; | |||||
| return (int)movemask(vmask); | |||||
| } | |||||
| /* Unaligned nodes intersection */ | /* Unaligned nodes intersection */ | ||||
| ccl_device_inline int qbvh_unaligned_node_intersect( | ccl_device_inline int qbvh_unaligned_node_intersect( | ||||
| KernelGlobals *ccl_restrict kg, | KernelGlobals *ccl_restrict kg, | ||||
| const ssef& isect_near, | const ssef& isect_near, | ||||
| const ssef& isect_far, | const ssef& isect_far, | ||||
| #ifdef __KERNEL_AVX2__ | #ifdef __KERNEL_AVX2__ | ||||
| const sse3f& org_idir, | const sse3f& org_idir, | ||||
| ▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | #else | ||||
| const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); | const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); | ||||
| const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); | const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); | ||||
| const sseb vmask = tnear <= tfar; | const sseb vmask = tnear <= tfar; | ||||
| *dist = tnear; | *dist = tnear; | ||||
| return movemask(vmask); | return movemask(vmask); | ||||
| #endif | #endif | ||||
| } | } | ||||
| ccl_device_inline int qbvh_unaligned_node_intersect_robust( | |||||
| KernelGlobals *ccl_restrict kg, | |||||
| const ssef& isect_near, | |||||
| const ssef& isect_far, | |||||
| #ifdef __KERNEL_AVX2__ | |||||
| const sse3f& P_idir, | |||||
| #endif | |||||
| const sse3f& P, | |||||
| const sse3f& dir, | |||||
| const sse3f& idir, | |||||
| const int near_x, | |||||
| const int near_y, | |||||
| const int near_z, | |||||
| const int far_x, | |||||
| const int far_y, | |||||
| const int far_z, | |||||
| const int node_addr, | |||||
| const float difl, | |||||
| ssef *ccl_restrict dist) | |||||
| { | |||||
| const int offset = node_addr; | |||||
| const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1); | |||||
| const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2); | |||||
| const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3); | |||||
| const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4); | |||||
| const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5); | |||||
| const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6); | |||||
| const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7); | |||||
| const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8); | |||||
| const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9); | |||||
| const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10); | |||||
| const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11); | |||||
| const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12); | |||||
| const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z, | |||||
| aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z, | |||||
| aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z; | |||||
| const ssef aligned_P_x = P.x*tfm_x_x + P.y*tfm_x_y + P.z*tfm_x_z + tfm_t_x, | |||||
| aligned_P_y = P.x*tfm_y_x + P.y*tfm_y_y + P.z*tfm_y_z + tfm_t_y, | |||||
| aligned_P_z = P.x*tfm_z_x + P.y*tfm_z_y + P.z*tfm_z_z + tfm_t_z; | |||||
| const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f); | |||||
| const ssef nrdir_x = neg_one / aligned_dir_x, | |||||
| nrdir_y = neg_one / aligned_dir_y, | |||||
| nrdir_z = neg_one / aligned_dir_z; | |||||
| const ssef tlower_x = aligned_P_x * nrdir_x, | |||||
| tlower_y = aligned_P_y * nrdir_y, | |||||
| tlower_z = aligned_P_z * nrdir_z; | |||||
| const ssef tupper_x = tlower_x - nrdir_x, | |||||
| tupper_y = tlower_y - nrdir_y, | |||||
| tupper_z = tlower_z - nrdir_z; | |||||
| const float round_down = 1.0f - difl; | |||||
| const float round_up = 1.0f + difl; | |||||
| #ifdef __KERNEL_SSE41__ | |||||
| const ssef tnear_x = mini(tlower_x, tupper_x); | |||||
| const ssef tnear_y = mini(tlower_y, tupper_y); | |||||
| const ssef tnear_z = mini(tlower_z, tupper_z); | |||||
| const ssef tfar_x = maxi(tlower_x, tupper_x); | |||||
| const ssef tfar_y = maxi(tlower_y, tupper_y); | |||||
| const ssef tfar_z = maxi(tlower_z, tupper_z); | |||||
| #else | |||||
| const ssef tnear_x = min(tlower_x, tupper_x); | |||||
| const ssef tnear_y = min(tlower_y, tupper_y); | |||||
| const ssef tnear_z = min(tlower_z, tupper_z); | |||||
| const ssef tfar_x = max(tlower_x, tupper_x); | |||||
| const ssef tfar_y = max(tlower_y, tupper_y); | |||||
| const ssef tfar_z = max(tlower_z, tupper_z); | |||||
| #endif | |||||
| const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); | |||||
| const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); | |||||
| const sseb vmask = round_down*tnear <= round_up*tfar; | |||||
| *dist = tnear; | |||||
| return movemask(vmask); | |||||
| } | |||||
| /* Intersectors wrappers. | /* Intersectors wrappers. | ||||
| * | * | ||||
| * They'll check node type and call appropriate intersection code. | * They'll check node type and call appropriate intersection code. | ||||
| */ | */ | ||||
| ccl_device_inline int qbvh_node_intersect( | ccl_device_inline int qbvh_node_intersect( | ||||
| KernelGlobals *ccl_restrict kg, | KernelGlobals *ccl_restrict kg, | ||||
| const ssef& isect_near, | const ssef& isect_near, | ||||
| ▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines | |||||
| #endif | #endif | ||||
| idir, | idir, | ||||
| near_x, near_y, near_z, | near_x, near_y, near_z, | ||||
| far_x, far_y, far_z, | far_x, far_y, far_z, | ||||
| node_addr, | node_addr, | ||||
| dist); | dist); | ||||
| } | } | ||||
| } | } | ||||
| ccl_device_inline int qbvh_node_intersect_robust( | |||||
| KernelGlobals *ccl_restrict kg, | |||||
| const ssef& isect_near, | |||||
| const ssef& isect_far, | |||||
| #ifdef __KERNEL_AVX2__ | |||||
| const sse3f& P_idir, | |||||
| #endif | |||||
| const sse3f& P, | |||||
| const sse3f& dir, | |||||
| const sse3f& idir, | |||||
| const int near_x, | |||||
| const int near_y, | |||||
| const int near_z, | |||||
| const int far_x, | |||||
| const int far_y, | |||||
| const int far_z, | |||||
| const int node_addr, | |||||
| const float difl, | |||||
| ssef *ccl_restrict dist) | |||||
| { | |||||
| const int offset = node_addr; | |||||
| const float4 node = kernel_tex_fetch(__bvh_nodes, offset); | |||||
| if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { | |||||
| return qbvh_unaligned_node_intersect_robust(kg, | |||||
| isect_near, | |||||
| isect_far, | |||||
| #ifdef __KERNEL_AVX2__ | |||||
| P_idir, | |||||
| #endif | |||||
| P, | |||||
| dir, | |||||
| idir, | |||||
| near_x, near_y, near_z, | |||||
| far_x, far_y, far_z, | |||||
| node_addr, | |||||
| difl, | |||||
| dist); | |||||
| } | |||||
| else { | |||||
| return qbvh_aligned_node_intersect_robust(kg, | |||||
| isect_near, | |||||
| isect_far, | |||||
| #ifdef __KERNEL_AVX2__ | |||||
| P_idir, | |||||
| #else | |||||
| P, | |||||
| #endif | |||||
| idir, | |||||
| near_x, near_y, near_z, | |||||
| far_x, far_y, far_z, | |||||
| node_addr, | |||||
| difl, | |||||
| dist); | |||||
| } | |||||
| } | |||||