Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/bvh/bvh_nodes.h
| Show First 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | #ifdef __VISIBILITY_FLAG__ | ||||
| return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | | return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | | ||||
| (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); | (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); | ||||
| #else | #else | ||||
| return ((c0max >= c0min)? 1: 0) | | return ((c0max >= c0min)? 1: 0) | | ||||
| ((c1max >= c1min)? 2: 0); | ((c1max >= c1min)? 2: 0); | ||||
| #endif | #endif | ||||
| } | } | ||||
| ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg, | |||||
| const float3 P, | |||||
| const float3 idir, | |||||
| const float t, | |||||
| const float difl, | |||||
| const float extmax, | |||||
| const int node_addr, | |||||
| const uint visibility, | |||||
| float dist[2]) | |||||
| { | |||||
| /* fetch node data */ | |||||
| float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); | |||||
| float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1); | |||||
| float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2); | |||||
| float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3); | |||||
| /* intersect ray against child nodes */ | |||||
| float c0lox = (node0.x - P.x) * idir.x; | |||||
| float c0hix = (node0.z - P.x) * idir.x; | |||||
| float c0loy = (node1.x - P.y) * idir.y; | |||||
| float c0hiy = (node1.z - P.y) * idir.y; | |||||
| float c0loz = (node2.x - P.z) * idir.z; | |||||
| float c0hiz = (node2.z - P.z) * idir.z; | |||||
| float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz)); | |||||
| float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz)); | |||||
| float c1lox = (node0.y - P.x) * idir.x; | |||||
| float c1hix = (node0.w - P.x) * idir.x; | |||||
| float c1loy = (node1.y - P.y) * idir.y; | |||||
| float c1hiy = (node1.w - P.y) * idir.y; | |||||
| float c1loz = (node2.y - P.z) * idir.z; | |||||
| float c1hiz = (node2.w - P.z) * idir.z; | |||||
| float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz)); | |||||
| float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz)); | |||||
| if(difl != 0.0f) { | |||||
| float hdiff = 1.0f + difl; | |||||
| float ldiff = 1.0f - difl; | |||||
| if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) { | |||||
| c0min = max(ldiff * c0min, c0min - extmax); | |||||
| c0max = min(hdiff * c0max, c0max + extmax); | |||||
| } | |||||
| if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) { | |||||
| c1min = max(ldiff * c1min, c1min - extmax); | |||||
| c1max = min(hdiff * c1max, c1max + extmax); | |||||
| } | |||||
| } | |||||
| dist[0] = c0min; | |||||
| dist[1] = c1min; | |||||
| #ifdef __VISIBILITY_FLAG__ | |||||
| /* this visibility test gives a 5% performance hit, how to solve? */ | |||||
| return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | | |||||
| (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); | |||||
| #else | |||||
| return ((c0max >= c0min)? 1: 0) | | |||||
| ((c1max >= c1min)? 2: 0); | |||||
| #endif | |||||
| } | |||||
| ccl_device_forceinline bool bvh_unaligned_node_intersect_child( | ccl_device_forceinline bool bvh_unaligned_node_intersect_child( | ||||
| KernelGlobals *kg, | KernelGlobals *kg, | ||||
| const float3 P, | const float3 P, | ||||
| const float3 dir, | const float3 dir, | ||||
| const float t, | const float t, | ||||
| int node_addr, | int node_addr, | ||||
| int child, | int child, | ||||
| float dist[2]) | float dist[2]) | ||||
| Show All 11 Lines | ccl_device_forceinline bool bvh_unaligned_node_intersect_child( | ||||
| const float far_y = max(lower_xyz.y, upper_xyz.y); | const float far_y = max(lower_xyz.y, upper_xyz.y); | ||||
| const float far_z = max(lower_xyz.z, upper_xyz.z); | const float far_z = max(lower_xyz.z, upper_xyz.z); | ||||
| const float tnear = max4(0.0f, near_x, near_y, near_z); | const float tnear = max4(0.0f, near_x, near_y, near_z); | ||||
| const float tfar = min4(t, far_x, far_y, far_z); | const float tfar = min4(t, far_x, far_y, far_z); | ||||
| *dist = tnear; | *dist = tnear; | ||||
| return tnear <= tfar; | return tnear <= tfar; | ||||
| } | } | ||||
| ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust( | |||||
| KernelGlobals *kg, | |||||
| const float3 P, | |||||
| const float3 dir, | |||||
| const float t, | |||||
| const float difl, | |||||
| int node_addr, | |||||
| int child, | |||||
| float dist[2]) | |||||
| { | |||||
| Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child); | |||||
| float3 aligned_dir = transform_direction(&space, dir); | |||||
| float3 aligned_P = transform_point(&space, P); | |||||
| float3 nrdir = -bvh_inverse_direction(aligned_dir); | |||||
| float3 tLowerXYZ = aligned_P * nrdir; | |||||
| float3 tUpperXYZ = tLowerXYZ - nrdir; | |||||
| const float near_x = min(tLowerXYZ.x, tUpperXYZ.x); | |||||
| const float near_y = min(tLowerXYZ.y, tUpperXYZ.y); | |||||
| const float near_z = min(tLowerXYZ.z, tUpperXYZ.z); | |||||
| const float far_x = max(tLowerXYZ.x, tUpperXYZ.x); | |||||
| const float far_y = max(tLowerXYZ.y, tUpperXYZ.y); | |||||
| const float far_z = max(tLowerXYZ.z, tUpperXYZ.z); | |||||
| const float tnear = max4(0.0f, near_x, near_y, near_z); | |||||
| const float tfar = min4(t, far_x, far_y, far_z); | |||||
| *dist = tnear; | |||||
| if(difl != 0.0f) { | |||||
| /* TODO(sergey): Same as for QBVH, needs a proper use. */ | |||||
| const float round_down = 1.0f - difl; | |||||
| const float round_up = 1.0f + difl; | |||||
| return round_down*tnear <= round_up*tfar; | |||||
| } | |||||
| else { | |||||
| return tnear <= tfar; | |||||
| } | |||||
| } | |||||
| ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg, | ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg, | ||||
| const float3 P, | const float3 P, | ||||
| const float3 dir, | const float3 dir, | ||||
| const float3 idir, | const float3 idir, | ||||
| const float t, | const float t, | ||||
| const int node_addr, | const int node_addr, | ||||
| const uint visibility, | const uint visibility, | ||||
| float dist[2]) | float dist[2]) | ||||
| Show All 14 Lines | |||||
| #endif | #endif | ||||
| { | { | ||||
| mask |= 2; | mask |= 2; | ||||
| } | } | ||||
| } | } | ||||
| return mask; | return mask; | ||||
| } | } | ||||
| ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg, | |||||
| const float3 P, | |||||
| const float3 dir, | |||||
| const float3 idir, | |||||
| const float t, | |||||
| const float difl, | |||||
| const float extmax, | |||||
| const int node_addr, | |||||
| const uint visibility, | |||||
| float dist[2]) | |||||
| { | |||||
| int mask = 0; | |||||
| float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); | |||||
| if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) { | |||||
| #ifdef __VISIBILITY_FLAG__ | |||||
| if((__float_as_uint(cnodes.x) & visibility)) | |||||
| #endif | |||||
| { | |||||
| mask |= 1; | |||||
| } | |||||
| } | |||||
| if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) { | |||||
| #ifdef __VISIBILITY_FLAG__ | |||||
| if((__float_as_uint(cnodes.y) & visibility)) | |||||
| #endif | |||||
| { | |||||
| mask |= 2; | |||||
| } | |||||
| } | |||||
| return mask; | |||||
| } | |||||
| ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg, | ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg, | ||||
| const float3 P, | const float3 P, | ||||
| const float3 dir, | const float3 dir, | ||||
| const float3 idir, | const float3 idir, | ||||
| const float t, | const float t, | ||||
| const int node_addr, | const int node_addr, | ||||
| const uint visibility, | const uint visibility, | ||||
| float dist[2]) | float dist[2]) | ||||
| Show All 15 Lines | return bvh_aligned_node_intersect(kg, | ||||
| idir, | idir, | ||||
| t, | t, | ||||
| node_addr, | node_addr, | ||||
| visibility, | visibility, | ||||
| dist); | dist); | ||||
| } | } | ||||
| } | } | ||||
| ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg, | |||||
| const float3 P, | |||||
| const float3 dir, | |||||
| const float3 idir, | |||||
| const float t, | |||||
| const float difl, | |||||
| const float extmax, | |||||
| const int node_addr, | |||||
| const uint visibility, | |||||
| float dist[2]) | |||||
| { | |||||
| float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); | |||||
| if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { | |||||
| return bvh_unaligned_node_intersect_robust(kg, | |||||
| P, | |||||
| dir, | |||||
| idir, | |||||
| t, | |||||
| difl, | |||||
| extmax, | |||||
| node_addr, | |||||
| visibility, | |||||
| dist); | |||||
| } | |||||
| else { | |||||
| return bvh_aligned_node_intersect_robust(kg, | |||||
| P, | |||||
| idir, | |||||
| t, | |||||
| difl, | |||||
| extmax, | |||||
| node_addr, | |||||
| visibility, | |||||
| dist); | |||||
| } | |||||
| } | |||||
| #else /* !defined(__KERNEL_SSE2__) */ | #else /* !defined(__KERNEL_SSE2__) */ | ||||
| int ccl_device_forceinline bvh_aligned_node_intersect( | int ccl_device_forceinline bvh_aligned_node_intersect( | ||||
| KernelGlobals *kg, | KernelGlobals *kg, | ||||
| const float3& P, | const float3& P, | ||||
| const float3& dir, | const float3& dir, | ||||
| const ssef& tsplat, | const ssef& tsplat, | ||||
| const ssef Psplat[3], | const ssef Psplat[3], | ||||
| Show All 30 Lines | # ifdef __VISIBILITY_FLAG__ | ||||
| int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | | int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | | ||||
| (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); | (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); | ||||
| return cmask; | return cmask; | ||||
| # else | # else | ||||
| return mask & 3; | return mask & 3; | ||||
| # endif | # endif | ||||
| } | } | ||||
| ccl_device_forceinline int bvh_aligned_node_intersect_robust( | |||||
| KernelGlobals *kg, | |||||
| const float3& P, | |||||
| const float3& dir, | |||||
| const ssef& tsplat, | |||||
| const ssef Psplat[3], | |||||
| const ssef idirsplat[3], | |||||
| const shuffle_swap_t shufflexyz[3], | |||||
| const float difl, | |||||
| const float extmax, | |||||
| const int nodeAddr, | |||||
| const uint visibility, | |||||
| float dist[2]) | |||||
| { | |||||
| /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ | |||||
| const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); | |||||
| /* fetch node data */ | |||||
| const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr; | |||||
| /* intersect ray against child nodes */ | |||||
| const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; | |||||
| const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; | |||||
| const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; | |||||
| /* calculate { c0min, c1min, -c0max, -c1max} */ | |||||
| ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); | |||||
| const ssef tminmax = minmax ^ pn; | |||||
| if(difl != 0.0f) { | |||||
| float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); | |||||
| float4 *tminmaxview = (float4*)&tminmax; | |||||
| float& c0min = tminmaxview->x, &c1min = tminmaxview->y; | |||||
| float& c0max = tminmaxview->z, &c1max = tminmaxview->w; | |||||
| float hdiff = 1.0f + difl; | |||||
| float ldiff = 1.0f - difl; | |||||
| if(__float_as_int(cnodes.x) & PATH_RAY_CURVE) { | |||||
| c0min = max(ldiff * c0min, c0min - extmax); | |||||
| c0max = min(hdiff * c0max, c0max + extmax); | |||||
| } | |||||
| if(__float_as_int(cnodes.y) & PATH_RAY_CURVE) { | |||||
| c1min = max(ldiff * c1min, c1min - extmax); | |||||
| c1max = min(hdiff * c1max, c1max + extmax); | |||||
| } | |||||
| } | |||||
| const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); | |||||
| dist[0] = tminmax[0]; | |||||
| dist[1] = tminmax[1]; | |||||
| int mask = movemask(lrhit); | |||||
| # ifdef __VISIBILITY_FLAG__ | |||||
| /* this visibility test gives a 5% performance hit, how to solve? */ | |||||
| float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); | |||||
| int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | | |||||
| (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); | |||||
| return cmask; | |||||
| # else | |||||
| return mask & 3; | |||||
| # endif | |||||
| } | |||||
| ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg, | ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg, | ||||
| const float3 P, | const float3 P, | ||||
| const float3 dir, | const float3 dir, | ||||
| const ssef& isect_near, | const ssef& isect_near, | ||||
| const ssef& isect_far, | const ssef& isect_far, | ||||
| const int node_addr, | const int node_addr, | ||||
| const uint visibility, | const uint visibility, | ||||
| float dist[2]) | float dist[2]) | ||||
| ▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | # ifdef __VISIBILITY_FLAG__ | ||||
| int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | | int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | | ||||
| (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); | (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); | ||||
| return cmask; | return cmask; | ||||
| # else | # else | ||||
| return mask & 3; | return mask & 3; | ||||
| # endif | # endif | ||||
| } | } | ||||
| ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg, | |||||
| const float3 P, | |||||
| const float3 dir, | |||||
| const ssef& isect_near, | |||||
| const ssef& isect_far, | |||||
| const float difl, | |||||
| const int node_addr, | |||||
| const uint visibility, | |||||
| float dist[2]) | |||||
| { | |||||
| Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0); | |||||
| Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1); | |||||
| float3 aligned_dir0 = transform_direction(&space0, dir), | |||||
| aligned_dir1 = transform_direction(&space1, dir); | |||||
| float3 aligned_P0 = transform_point(&space0, P), | |||||
| aligned_P1 = transform_point(&space1, P); | |||||
| float3 nrdir0 = -bvh_inverse_direction(aligned_dir0), | |||||
| nrdir1 = -bvh_inverse_direction(aligned_dir1); | |||||
| ssef lower_x = ssef(aligned_P0.x * nrdir0.x, | |||||
| aligned_P1.x * nrdir1.x, | |||||
| 0.0f, 0.0f), | |||||
| lower_y = ssef(aligned_P0.y * nrdir0.y, | |||||
| aligned_P1.y * nrdir1.y, | |||||
| 0.0f, | |||||
| 0.0f), | |||||
| lower_z = ssef(aligned_P0.z * nrdir0.z, | |||||
| aligned_P1.z * nrdir1.z, | |||||
| 0.0f, | |||||
| 0.0f); | |||||
| ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f), | |||||
| upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f), | |||||
| upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f); | |||||
| ssef tnear_x = min(lower_x, upper_x); | |||||
| ssef tnear_y = min(lower_y, upper_y); | |||||
| ssef tnear_z = min(lower_z, upper_z); | |||||
| ssef tfar_x = max(lower_x, upper_x); | |||||
| ssef tfar_y = max(lower_y, upper_y); | |||||
| ssef tfar_z = max(lower_z, upper_z); | |||||
| const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); | |||||
| const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); | |||||
| sseb vmask; | |||||
| if(difl != 0.0f) { | |||||
| const float round_down = 1.0f - difl; | |||||
| const float round_up = 1.0f + difl; | |||||
| vmask = round_down*tnear <= round_up*tfar; | |||||
| } | |||||
| else { | |||||
| vmask = tnear <= tfar; | |||||
| } | |||||
| dist[0] = tnear.f[0]; | |||||
| dist[1] = tnear.f[1]; | |||||
| int mask = (int)movemask(vmask); | |||||
| # ifdef __VISIBILITY_FLAG__ | |||||
| /* this visibility test gives a 5% performance hit, how to solve? */ | |||||
| float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); | |||||
| int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | | |||||
| (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); | |||||
| return cmask; | |||||
| # else | |||||
| return mask & 3; | |||||
| # endif | |||||
| } | |||||
| ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg, | ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg, | ||||
| const float3& P, | const float3& P, | ||||
| const float3& dir, | const float3& dir, | ||||
| const ssef& isect_near, | const ssef& isect_near, | ||||
| const ssef& isect_far, | const ssef& isect_far, | ||||
| const ssef& tsplat, | const ssef& tsplat, | ||||
| const ssef Psplat[3], | const ssef Psplat[3], | ||||
| const ssef idirsplat[3], | const ssef idirsplat[3], | ||||
| Show All 21 Lines | return bvh_aligned_node_intersect(kg, | ||||
| Psplat, | Psplat, | ||||
| idirsplat, | idirsplat, | ||||
| shufflexyz, | shufflexyz, | ||||
| node_addr, | node_addr, | ||||
| visibility, | visibility, | ||||
| dist); | dist); | ||||
| } | } | ||||
| } | } | ||||
| ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg, | |||||
| const float3& P, | |||||
| const float3& dir, | |||||
| const ssef& isect_near, | |||||
| const ssef& isect_far, | |||||
| const ssef& tsplat, | |||||
| const ssef Psplat[3], | |||||
| const ssef idirsplat[3], | |||||
| const shuffle_swap_t shufflexyz[3], | |||||
| const float difl, | |||||
| const float extmax, | |||||
| const int node_addr, | |||||
| const uint visibility, | |||||
| float dist[2]) | |||||
| { | |||||
| float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); | |||||
| if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { | |||||
| return bvh_unaligned_node_intersect_robust(kg, | |||||
| P, | |||||
| dir, | |||||
| isect_near, | |||||
| isect_far, | |||||
| difl, | |||||
| node_addr, | |||||
| visibility, | |||||
| dist); | |||||
| } | |||||
| else { | |||||
| return bvh_aligned_node_intersect_robust(kg, | |||||
| P, | |||||
| dir, | |||||
| tsplat, | |||||
| Psplat, | |||||
| idirsplat, | |||||
| shufflexyz, | |||||
| difl, | |||||
| extmax, | |||||
| node_addr, | |||||
| visibility, | |||||
| dist); | |||||
| } | |||||
| } | |||||
| #endif /* !defined(__KERNEL_SSE2__) */ | #endif /* !defined(__KERNEL_SSE2__) */ | ||||