Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/geom/geom_curve_intersect.h
| Show All 29 Lines | ccl_device_forceinline bool cardinal_curve_intersect( | ||||
| KernelGlobals *kg, | KernelGlobals *kg, | ||||
| Intersection *isect, | Intersection *isect, | ||||
| const float3 ccl_ref P, | const float3 ccl_ref P, | ||||
| const float3 ccl_ref dir, | const float3 ccl_ref dir, | ||||
| uint visibility, | uint visibility, | ||||
| int object, | int object, | ||||
| int curveAddr, | int curveAddr, | ||||
| float time, | float time, | ||||
| int type, | int type) | ||||
| uint *lcg_state, | |||||
| float difl, | |||||
| float extmax) | |||||
| { | { | ||||
| const bool is_curve_primitive = (type & PRIMITIVE_CURVE); | const bool is_curve_primitive = (type & PRIMITIVE_CURVE); | ||||
| if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { | if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { | ||||
| const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); | const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); | ||||
| if(time < prim_time.x || time > prim_time.y) { | if(time < prim_time.x || time > prim_time.y) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 167 Lines • ▼ Show 20 Lines | #endif | ||||
| float upper, lower; | float upper, lower; | ||||
| float zextrem[4]; | float zextrem[4]; | ||||
| curvebounds(&lower, &upper, &zextrem[0], &zextrem[1], &zextrem[2], &zextrem[3], curve_coef[0].z, curve_coef[1].z, curve_coef[2].z, curve_coef[3].z); | curvebounds(&lower, &upper, &zextrem[0], &zextrem[1], &zextrem[2], &zextrem[3], curve_coef[0].z, curve_coef[1].z, curve_coef[2].z, curve_coef[3].z); | ||||
| if(lower - r_curr > isect->t || upper + r_curr < epsilon) | if(lower - r_curr > isect->t || upper + r_curr < epsilon) | ||||
| return false; | return false; | ||||
| /* minimum width extension */ | /* minimum width extension */ | ||||
| float mw_extension = min(difl * fabsf(upper), extmax); | |||||
| float r_ext = mw_extension + r_curr; | |||||
| float xextrem[4]; | float xextrem[4]; | ||||
| curvebounds(&lower, &upper, &xextrem[0], &xextrem[1], &xextrem[2], &xextrem[3], curve_coef[0].x, curve_coef[1].x, curve_coef[2].x, curve_coef[3].x); | curvebounds(&lower, &upper, &xextrem[0], &xextrem[1], &xextrem[2], &xextrem[3], curve_coef[0].x, curve_coef[1].x, curve_coef[2].x, curve_coef[3].x); | ||||
| if(lower > r_ext || upper < -r_ext) | if(lower > r_curr || upper < -r_curr) | ||||
| return false; | return false; | ||||
| float yextrem[4]; | float yextrem[4]; | ||||
| curvebounds(&lower, &upper, &yextrem[0], &yextrem[1], &yextrem[2], &yextrem[3], curve_coef[0].y, curve_coef[1].y, curve_coef[2].y, curve_coef[3].y); | curvebounds(&lower, &upper, &yextrem[0], &yextrem[1], &yextrem[2], &yextrem[3], curve_coef[0].y, curve_coef[1].y, curve_coef[2].y, curve_coef[3].y); | ||||
| if(lower > r_ext || upper < -r_ext) | if(lower > r_curr || upper < -r_curr) | ||||
| return false; | return false; | ||||
| /* setup recurrent loop */ | /* setup recurrent loop */ | ||||
| int level = 1 << depth; | int level = 1 << depth; | ||||
| int tree = 0; | int tree = 0; | ||||
| float resol = 1.0f / (float)level; | float resol = 1.0f / (float)level; | ||||
| bool hit = false; | bool hit = false; | ||||
| ▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | if(zextrem[2] >= i_st && zextrem[2] <= i_en) { | ||||
| bminz = min(bminz,zextrem[3]); | bminz = min(bminz,zextrem[3]); | ||||
| bmaxz = max(bmaxz,zextrem[3]); | bmaxz = max(bmaxz,zextrem[3]); | ||||
| } | } | ||||
| float r1 = r_st + (r_en - r_st) * i_st; | float r1 = r_st + (r_en - r_st) * i_st; | ||||
| float r2 = r_st + (r_en - r_st) * i_en; | float r2 = r_st + (r_en - r_st) * i_en; | ||||
| r_curr = max(r1, r2); | r_curr = max(r1, r2); | ||||
| mw_extension = min(difl * fabsf(bmaxz), extmax); | if(bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_curr|| bmaxx < -r_curr|| bminy > r_curr|| bmaxy < -r_curr) { | ||||
| float r_ext = mw_extension + r_curr; | |||||
| float coverage = 1.0f; | |||||
| if(bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) { | |||||
| /* the bounding box does not overlap the square centered at O */ | /* the bounding box does not overlap the square centered at O */ | ||||
| tree += level; | tree += level; | ||||
| level = tree & -tree; | level = tree & -tree; | ||||
| } | } | ||||
| else if(level == 1) { | else if(level == 1) { | ||||
| /* the maximum recursion depth is reached. | /* the maximum recursion depth is reached. | ||||
| * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0. | * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0. | ||||
| ▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines | #endif | ||||
| if(dot(tg, dp_en) < 0) | if(dot(tg, dp_en) < 0) | ||||
| dp_en *= -1; | dp_en *= -1; | ||||
| if(dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) { | if(dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) { | ||||
| tree++; | tree++; | ||||
| level = tree & -tree; | level = tree & -tree; | ||||
| continue; | continue; | ||||
| } | } | ||||
| /* compute coverage */ | if(p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_curr * r_curr || p_curr.z <= epsilon || isect->t < p_curr.z) { | ||||
| float r_ext = r_curr; | |||||
| coverage = 1.0f; | |||||
| if(difl != 0.0f) { | |||||
| mw_extension = min(difl * fabsf(bmaxz), extmax); | |||||
| r_ext = mw_extension + r_curr; | |||||
| #ifdef __KERNEL_SSE__ | |||||
| const float3 p_curr_sq = p_curr * p_curr; | |||||
| const float3 dxxx(_mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128))); | |||||
| float d = dxxx.x; | |||||
| #else | |||||
| float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y); | |||||
| #endif | |||||
| float d0 = d - r_curr; | |||||
| float d1 = d + r_curr; | |||||
| float inv_mw_extension = 1.0f/mw_extension; | |||||
| if(d0 >= 0) | |||||
| coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f; | |||||
| else // inside | |||||
| coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f; | |||||
| } | |||||
| if(p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) { | |||||
| tree++; | tree++; | ||||
| level = tree & -tree; | level = tree & -tree; | ||||
| continue; | continue; | ||||
| } | } | ||||
| t = p_curr.z; | t = p_curr.z; | ||||
| /* stochastic fade from minimum width */ | |||||
| if(difl != 0.0f && lcg_state) { | |||||
| if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) | |||||
| return hit; | |||||
| } | |||||
| } | } | ||||
| else { | else { | ||||
| float l = len(p_en - p_st); | float l = len(p_en - p_st); | ||||
| /* minimum width extension */ | |||||
| float or1 = r1; | |||||
| float or2 = r2; | |||||
| if(difl != 0.0f) { | |||||
| mw_extension = min(len(p_st - P) * difl, extmax); | |||||
| or1 = r1 < mw_extension ? mw_extension : r1; | |||||
| mw_extension = min(len(p_en - P) * difl, extmax); | |||||
| or2 = r2 < mw_extension ? mw_extension : r2; | |||||
| } | |||||
| /* --- */ | |||||
| float invl = 1.0f/l; | float invl = 1.0f/l; | ||||
| float3 tg = (p_en - p_st) * invl; | float3 tg = (p_en - p_st) * invl; | ||||
| gd = (or2 - or1) * invl; | gd = (r2 - r1) * invl; | ||||
| float difz = -dot(p_st,tg); | float difz = -dot(p_st,tg); | ||||
| float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd)); | float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd)); | ||||
| float invcyla = 1.0f/cyla; | float invcyla = 1.0f/cyla; | ||||
| float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1))); | float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + r1))); | ||||
| float tcentre = -halfb*invcyla; | float tcentre = -halfb*invcyla; | ||||
| float zcentre = difz + (tg.z * tcentre); | float zcentre = difz + (tg.z * tcentre); | ||||
| float3 tdif = - p_st; | float3 tdif = - p_st; | ||||
| tdif.z += tcentre; | tdif.z += tcentre; | ||||
| float tdifz = dot(tdif,tg); | float tdifz = dot(tdif,tg); | ||||
| float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1))); | float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + r1))); | ||||
| float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd; | float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - r1*r1 - 2*r1*tdifz*gd; | ||||
| float td = tb*tb - 4*cyla*tc; | float td = tb*tb - 4*cyla*tc; | ||||
| if(td < 0.0f) { | if(td < 0.0f) { | ||||
| tree++; | tree++; | ||||
| level = tree & -tree; | level = tree & -tree; | ||||
| continue; | continue; | ||||
| } | } | ||||
| float rootd = sqrtf(td); | float rootd = sqrtf(td); | ||||
| Show All 17 Lines | #endif | ||||
| level = tree & -tree; | level = tree & -tree; | ||||
| continue; | continue; | ||||
| } | } | ||||
| float w = (zcentre + (tg.z * correction)) * invl; | float w = (zcentre + (tg.z * correction)) * invl; | ||||
| w = saturate(w); | w = saturate(w); | ||||
| /* compute u on the curve segment */ | /* compute u on the curve segment */ | ||||
| u = i_st * (1 - w) + i_en * w; | u = i_st * (1 - w) + i_en * w; | ||||
| /* stochastic fade from minimum width */ | |||||
| if(difl != 0.0f && lcg_state) { | |||||
| r_curr = r1 + (r2 - r1) * w; | |||||
| r_ext = or1 + (or2 - or1) * w; | |||||
| coverage = r_curr/r_ext; | |||||
| if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) | |||||
| return hit; | |||||
| } | |||||
| } | } | ||||
| /* we found a new intersection */ | /* we found a new intersection */ | ||||
| #ifdef __VISIBILITY_FLAG__ | #ifdef __VISIBILITY_FLAG__ | ||||
| /* visibility flag test. we do it here under the assumption | /* visibility flag test. we do it here under the assumption | ||||
| * that most triangles are culled by node flags */ | * that most triangles are culled by node flags */ | ||||
| if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) | if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) | ||||
| #endif | #endif | ||||
| Show All 23 Lines | |||||
| ccl_device_forceinline bool curve_intersect(KernelGlobals *kg, | ccl_device_forceinline bool curve_intersect(KernelGlobals *kg, | ||||
| Intersection *isect, | Intersection *isect, | ||||
| float3 P, | float3 P, | ||||
| float3 direction, | float3 direction, | ||||
| uint visibility, | uint visibility, | ||||
| int object, | int object, | ||||
| int curveAddr, | int curveAddr, | ||||
| float time, | float time, | ||||
| int type, | int type) | ||||
| uint *lcg_state, | |||||
| float difl, | |||||
| float extmax) | |||||
| { | { | ||||
| /* define few macros to minimize code duplication for SSE */ | /* define few macros to minimize code duplication for SSE */ | ||||
| #ifndef __KERNEL_SSE2__ | #ifndef __KERNEL_SSE2__ | ||||
| # define len3_squared(x) len_squared(x) | # define len3_squared(x) len_squared(x) | ||||
| # define len3(x) len(x) | # define len3(x) len(x) | ||||
| # define dot3(x, y) dot(x, y) | # define dot3(x, y) dot(x, y) | ||||
| #endif | #endif | ||||
| Show All 24 Lines | if(is_curve_primitive) { | ||||
| P_curve[0] = kernel_tex_fetch(__curve_keys, k0); | P_curve[0] = kernel_tex_fetch(__curve_keys, k0); | ||||
| P_curve[1] = kernel_tex_fetch(__curve_keys, k1); | P_curve[1] = kernel_tex_fetch(__curve_keys, k1); | ||||
| } | } | ||||
| else { | else { | ||||
| int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; | int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; | ||||
| motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve); | motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve); | ||||
| } | } | ||||
| float or1 = P_curve[0].w; | float r1 = P_curve[0].w; | ||||
| float or2 = P_curve[1].w; | float r2 = P_curve[1].w; | ||||
| float3 p1 = float4_to_float3(P_curve[0]); | float3 p1 = float4_to_float3(P_curve[0]); | ||||
| float3 p2 = float4_to_float3(P_curve[1]); | float3 p2 = float4_to_float3(P_curve[1]); | ||||
| /* minimum width extension */ | /* minimum width extension */ | ||||
| float r1 = or1; | |||||
| float r2 = or2; | |||||
| float3 dif = P - p1; | float3 dif = P - p1; | ||||
| float3 dif_second = P - p2; | float3 dif_second = P - p2; | ||||
| if(difl != 0.0f) { | |||||
| float pixelsize = min(len3(dif) * difl, extmax); | |||||
| r1 = or1 < pixelsize ? pixelsize : or1; | |||||
| pixelsize = min(len3(dif_second) * difl, extmax); | |||||
| r2 = or2 < pixelsize ? pixelsize : or2; | |||||
| } | |||||
| /* --- */ | |||||
| float3 p21_diff = p2 - p1; | float3 p21_diff = p2 - p1; | ||||
| float3 sphere_dif1 = (dif + dif_second) * 0.5f; | float3 sphere_dif1 = (dif + dif_second) * 0.5f; | ||||
| float3 dir = direction; | float3 dir = direction; | ||||
| float sphere_b_tmp = dot3(dir, sphere_dif1); | float sphere_b_tmp = dot3(dir, sphere_dif1); | ||||
| float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir; | float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir; | ||||
| #else | #else | ||||
| ssef P_curve[2]; | ssef P_curve[2]; | ||||
| if(is_curve_primitive) { | if(is_curve_primitive) { | ||||
| P_curve[0] = load4f(&kg->__curve_keys.data[k0].x); | P_curve[0] = load4f(&kg->__curve_keys.data[k0].x); | ||||
| P_curve[1] = load4f(&kg->__curve_keys.data[k1].x); | P_curve[1] = load4f(&kg->__curve_keys.data[k1].x); | ||||
| } | } | ||||
| else { | else { | ||||
| int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; | int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; | ||||
| motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4*)&P_curve); | motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4*)&P_curve); | ||||
| } | } | ||||
| const ssef or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]); | ssef r12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]); | ||||
| ssef r12 = or12; | |||||
| const ssef vP = load4f(P); | const ssef vP = load4f(P); | ||||
| const ssef dif = vP - P_curve[0]; | const ssef dif = vP - P_curve[0]; | ||||
| const ssef dif_second = vP - P_curve[1]; | const ssef dif_second = vP - P_curve[1]; | ||||
| if(difl != 0.0f) { | |||||
| const ssef len1_sq = len3_squared_splat(dif); | |||||
| const ssef len2_sq = len3_squared_splat(dif_second); | |||||
| const ssef len12 = mm_sqrt(shuffle<0, 0, 0, 0>(len1_sq, len2_sq)); | |||||
| const ssef pixelsize12 = min(len12 * difl, ssef(extmax)); | |||||
| r12 = max(or12, pixelsize12); | |||||
| } | |||||
| float or1 = extract<0>(or12), or2 = extract<0>(shuffle<2>(or12)); | |||||
| float r1 = extract<0>(r12), r2 = extract<0>(shuffle<2>(r12)); | float r1 = extract<0>(r12), r2 = extract<0>(shuffle<2>(r12)); | ||||
| const ssef p21_diff = P_curve[1] - P_curve[0]; | const ssef p21_diff = P_curve[1] - P_curve[0]; | ||||
| const ssef sphere_dif1 = (dif + dif_second) * 0.5f; | const ssef sphere_dif1 = (dif + dif_second) * 0.5f; | ||||
| const ssef dir = load4f(direction); | const ssef dir = load4f(direction); | ||||
| const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1); | const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1); | ||||
| const ssef sphere_dif2 = nmadd(sphere_b_tmp, dir, sphere_dif1); | const ssef sphere_dif2 = nmadd(sphere_b_tmp, dir, sphere_dif1); | ||||
| #endif | #endif | ||||
| ▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines | if(t < isect->t) { | ||||
| if(flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) { | if(flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) { | ||||
| // backface = true; | // backface = true; | ||||
| correction = ((-tb + rootd)/(2*a)); | correction = ((-tb + rootd)/(2*a)); | ||||
| t = tcentre + correction; | t = tcentre + correction; | ||||
| z = zcentre + (dirz * correction); | z = zcentre + (dirz * correction); | ||||
| } | } | ||||
| /* stochastic fade from minimum width */ | |||||
| float adjradius = or1 + z * (or2 - or1) * invl; | |||||
| adjradius = adjradius / (r1 + z * gd); | |||||
| if(lcg_state && adjradius != 1.0f) { | |||||
| if(lcg_step_float(lcg_state) > adjradius) | |||||
| return false; | |||||
| } | |||||
| /* --- */ | |||||
| if(t > 0.0f && t < isect->t && z >= 0 && z <= l) { | if(t > 0.0f && t < isect->t && z >= 0 && z <= l) { | ||||
| if(flags & CURVE_KN_ENCLOSEFILTER) { | if(flags & CURVE_KN_ENCLOSEFILTER) { | ||||
| float enc_ratio = 1.01f; | float enc_ratio = 1.01f; | ||||
| if((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) { | if((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) { | ||||
| float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio)); | float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio)); | ||||
| float c2 = dot3(dif, dif) - difz * difz * (1 + gd*gd*enc_ratio*enc_ratio) - r1*r1*enc_ratio*enc_ratio - 2*r1*difz*gd*enc_ratio; | float c2 = dot3(dif, dif) - difz * difz * (1 + gd*gd*enc_ratio*enc_ratio) - r1*r1*enc_ratio*enc_ratio - 2*r1*difz*gd*enc_ratio; | ||||
| if(a2*c2 < 0.0f) | if(a2*c2 < 0.0f) | ||||
| ▲ Show 20 Lines • Show All 190 Lines • Show Last 20 Lines | |||||