Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
| Show All 18 Lines | |||||
| # include "nanovdb/util/SampleFromVoxels.h" | # include "nanovdb/util/SampleFromVoxels.h" | ||||
| #endif | #endif | ||||
| /* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */ | /* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */ | ||||
| ccl_device float cubic_w0(float a) | ccl_device float cubic_w0(float a) | ||||
| { | { | ||||
| return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f); | return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f); | ||||
| } | } | ||||
| ccl_device float cubic_w1(float a) | ccl_device float cubic_w1(float a) | ||||
| { | { | ||||
| return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f); | return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f); | ||||
| } | } | ||||
| ccl_device float cubic_w2(float a) | ccl_device float cubic_w2(float a) | ||||
| { | { | ||||
| return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f); | return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f); | ||||
| } | } | ||||
| ccl_device float cubic_w3(float a) | ccl_device float cubic_w3(float a) | ||||
| { | { | ||||
| return (1.0f / 6.0f) * (a * a * a); | return (1.0f / 6.0f) * (a * a * a); | ||||
| } | } | ||||
| /* g0 and g1 are the two amplitude functions. */ | /* g0 and g1 are the two amplitude functions. */ | ||||
| ccl_device float cubic_g0(float a) | ccl_device float cubic_g0(float a) | ||||
| { | { | ||||
| return cubic_w0(a) + cubic_w1(a); | return cubic_w0(a) + cubic_w1(a); | ||||
| } | } | ||||
| ccl_device float cubic_g1(float a) | ccl_device float cubic_g1(float a) | ||||
| { | { | ||||
| return cubic_w2(a) + cubic_w3(a); | return cubic_w2(a) + cubic_w3(a); | ||||
| } | } | ||||
| /* h0 and h1 are the two offset functions */ | /* h0 and h1 are the two offset functions */ | ||||
| ccl_device float cubic_h0(float a) | ccl_device float cubic_h0(float a) | ||||
| { | { | ||||
| /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ | return (cubic_w1(a) / cubic_g0(a)) - 1.0f; | ||||
| return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f; | |||||
| } | } | ||||
| ccl_device float cubic_h1(float a) | ccl_device float cubic_h1(float a) | ||||
| { | { | ||||
| return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f; | return (cubic_w3(a) / cubic_g1(a)) + 1.0f; | ||||
| } | } | ||||
| /* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */ | /* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */ | ||||
| template<typename T> | template<typename T> | ||||
| ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, float y) | ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo &info, float x, float y) | ||||
| { | { | ||||
| CUtexObject tex = (CUtexObject)info.data; | CUtexObject tex = (CUtexObject)info.data; | ||||
| x = (x * info.width) - 0.5f; | x = (x * info.width) - 0.5f; | ||||
| y = (y * info.height) - 0.5f; | y = (y * info.height) - 0.5f; | ||||
| float px = floor(x); | float px = floor(x); | ||||
| float py = floor(y); | float py = floor(y); | ||||
| float fx = x - px; | float fx = x - px; | ||||
| float fy = y - py; | float fy = y - py; | ||||
| float g0x = cubic_g0(fx); | float g0x = cubic_g0(fx); | ||||
| float g1x = cubic_g1(fx); | float g1x = cubic_g1(fx); | ||||
| float x0 = (px + cubic_h0(fx)) / info.width; | /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ | ||||
| float x1 = (px + cubic_h1(fx)) / info.width; | float x0 = (px + cubic_h0(fx) + 0.5f) / info.width; | ||||
| float y0 = (py + cubic_h0(fy)) / info.height; | float x1 = (px + cubic_h1(fx) + 0.5f) / info.width; | ||||
| float y1 = (py + cubic_h1(fy)) / info.height; | float y0 = (py + cubic_h0(fy) + 0.5f) / info.height; | ||||
| float y1 = (py + cubic_h1(fy) + 0.5f) / info.height; | |||||
| return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) + g1x * tex2D<T>(tex, x1, y0)) + | return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) + g1x * tex2D<T>(tex, x1, y0)) + | ||||
| cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) + g1x * tex2D<T>(tex, x1, y1)); | cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) + g1x * tex2D<T>(tex, x1, y1)); | ||||
| } | } | ||||
| /* Fast tricubic texture lookup using 8 trilinear lookups. */ | /* Fast tricubic texture lookup using 8 trilinear lookups. */ | ||||
| template<typename T> | template<typename T> | ||||
| ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo &info, float x, float y, float z) | ccl_device T kernel_tex_image_interp_tricubic(const TextureInfo &info, float x, float y, float z) | ||||
| { | { | ||||
| CUtexObject tex = (CUtexObject)info.data; | CUtexObject tex = (CUtexObject)info.data; | ||||
| x = (x * info.width) - 0.5f; | x = (x * info.width) - 0.5f; | ||||
| y = (y * info.height) - 0.5f; | y = (y * info.height) - 0.5f; | ||||
| z = (z * info.depth) - 0.5f; | z = (z * info.depth) - 0.5f; | ||||
| float px = floor(x); | float px = floor(x); | ||||
| float py = floor(y); | float py = floor(y); | ||||
| float pz = floor(z); | float pz = floor(z); | ||||
| float fx = x - px; | float fx = x - px; | ||||
| float fy = y - py; | float fy = y - py; | ||||
| float fz = z - pz; | float fz = z - pz; | ||||
| float g0x = cubic_g0(fx); | float g0x = cubic_g0(fx); | ||||
| float g1x = cubic_g1(fx); | float g1x = cubic_g1(fx); | ||||
| float g0y = cubic_g0(fy); | float g0y = cubic_g0(fy); | ||||
| float g1y = cubic_g1(fy); | float g1y = cubic_g1(fy); | ||||
| float g0z = cubic_g0(fz); | float g0z = cubic_g0(fz); | ||||
| float g1z = cubic_g1(fz); | float g1z = cubic_g1(fz); | ||||
| float x0 = (px + cubic_h0(fx)) / info.width; | /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ | ||||
| float x1 = (px + cubic_h1(fx)) / info.width; | float x0 = (px + cubic_h0(fx) + 0.5f) / info.width; | ||||
| float y0 = (py + cubic_h0(fy)) / info.height; | float x1 = (px + cubic_h1(fx) + 0.5f) / info.width; | ||||
| float y1 = (py + cubic_h1(fy)) / info.height; | float y0 = (py + cubic_h0(fy) + 0.5f) / info.height; | ||||
| float z0 = (pz + cubic_h0(fz)) / info.depth; | float y1 = (py + cubic_h1(fy) + 0.5f) / info.height; | ||||
| float z1 = (pz + cubic_h1(fz)) / info.depth; | float z0 = (pz + cubic_h0(fz) + 0.5f) / info.depth; | ||||
| float z1 = (pz + cubic_h1(fz) + 0.5f) / info.depth; | |||||
| return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) + g1x * tex3D<T>(tex, x1, y0, z0)) + | return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) + g1x * tex3D<T>(tex, x1, y0, z0)) + | ||||
| g1y * (g0x * tex3D<T>(tex, x0, y1, z0) + g1x * tex3D<T>(tex, x1, y1, z0))) + | g1y * (g0x * tex3D<T>(tex, x0, y1, z0) + g1x * tex3D<T>(tex, x1, y1, z0))) + | ||||
| g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) + g1x * tex3D<T>(tex, x1, y0, z1)) + | g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) + g1x * tex3D<T>(tex, x1, y0, z1)) + | ||||
| g1y * (g0x * tex3D<T>(tex, x0, y1, z1) + g1x * tex3D<T>(tex, x1, y1, z1))); | g1y * (g0x * tex3D<T>(tex, x0, y1, z1) + g1x * tex3D<T>(tex, x1, y1, z1))); | ||||
| } | } | ||||
| #ifdef WITH_NANOVDB | #ifdef WITH_NANOVDB | ||||
| template<typename T, typename S> | |||||
| ccl_device T kernel_tex_image_interp_tricubic_nanovdb(S &s, float x, float y, float z) | |||||
| { | |||||
| float px = floor(x); | |||||
| float py = floor(y); | |||||
| float pz = floor(z); | |||||
| float fx = x - px; | |||||
| float fy = y - py; | |||||
| float fz = z - pz; | |||||
| float g0x = cubic_g0(fx); | |||||
| float g1x = cubic_g1(fx); | |||||
| float g0y = cubic_g0(fy); | |||||
| float g1y = cubic_g1(fy); | |||||
| float g0z = cubic_g0(fz); | |||||
| float g1z = cubic_g1(fz); | |||||
| float x0 = px + cubic_h0(fx); | |||||
| float x1 = px + cubic_h1(fx); | |||||
| float y0 = py + cubic_h0(fy); | |||||
| float y1 = py + cubic_h1(fy); | |||||
| float z0 = pz + cubic_h0(fz); | |||||
| float z1 = pz + cubic_h1(fz); | |||||
| using namespace nanovdb; | |||||
| return g0z * (g0y * (g0x * s(Vec3f(x0, y0, z0)) + g1x * s(Vec3f(x1, y0, z0))) + | |||||
| g1y * (g0x * s(Vec3f(x0, y1, z0)) + g1x * s(Vec3f(x1, y1, z0)))) + | |||||
| g1z * (g0y * (g0x * s(Vec3f(x0, y0, z1)) + g1x * s(Vec3f(x1, y0, z1))) + | |||||
| g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1)))); | |||||
| } | |||||
| template<typename T> | template<typename T> | ||||
| ccl_device_inline T kernel_tex_image_interp_nanovdb( | ccl_device_inline T kernel_tex_image_interp_nanovdb( | ||||
| const TextureInfo &info, float x, float y, float z, uint interpolation) | const TextureInfo &info, float x, float y, float z, uint interpolation) | ||||
| { | { | ||||
| const nanovdb::Vec3f xyz(x, y, z); | using namespace nanovdb; | ||||
| nanovdb::NanoGrid<T> *const grid = (nanovdb::NanoGrid<T> *)info.data; | typedef ReadAccessor<NanoRoot<T>> ReadAccessorT; | ||||
| const nanovdb::NanoRoot<T> &root = grid->tree().root(); | |||||
| NanoGrid<T> *const grid = (NanoGrid<T> *)info.data; | |||||
| const NanoRoot<T> &root = grid->tree().root(); | |||||
| typedef nanovdb::ReadAccessor<nanovdb::NanoRoot<T>> ReadAccessorT; | |||||
| switch (interpolation) { | switch (interpolation) { | ||||
| case INTERPOLATION_CLOSEST: | case INTERPOLATION_CLOSEST: | ||||
| return nanovdb::SampleFromVoxels<ReadAccessorT, 0, false>(root)(xyz); | return NearestNeighborSampler<ReadAccessorT, false>(root)(Vec3f(x, y, z)); | ||||
| case INTERPOLATION_LINEAR: | case INTERPOLATION_LINEAR: | ||||
| return nanovdb::SampleFromVoxels<ReadAccessorT, 1, false>(root)(xyz); | return TrilinearSampler<ReadAccessorT, false>(root)(Vec3f(x - 0.5f, y - 0.5f, z - 0.5f)); | ||||
| default: | default: | ||||
| return nanovdb::SampleFromVoxels<ReadAccessorT, 3, false>(root)(xyz); | TrilinearSampler<ReadAccessorT, false> s(root); | ||||
| return kernel_tex_image_interp_tricubic_nanovdb<T>(s, x - 0.5f, y - 0.5f, z - 0.5f); | |||||
| } | } | ||||
| } | } | ||||
| #endif | #endif | ||||
| ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) | ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) | ||||
| { | { | ||||
| const TextureInfo &info = kernel_tex_fetch(__texture_info, id); | const TextureInfo &info = kernel_tex_fetch(__texture_info, id); | ||||
| ▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines | if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { | ||||
| nanovdb::Vec3f f = kernel_tex_image_interp_nanovdb<nanovdb::Vec3f>( | nanovdb::Vec3f f = kernel_tex_image_interp_nanovdb<nanovdb::Vec3f>( | ||||
| info, x, y, z, interpolation); | info, x, y, z, interpolation); | ||||
| return make_float4(f[0], f[1], f[2], 1.0f); | return make_float4(f[0], f[1], f[2], 1.0f); | ||||
| } | } | ||||
| #endif | #endif | ||||
| if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || | if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || | ||||
| texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) { | texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) { | ||||
| if (interpolation == INTERPOLATION_CUBIC) { | if (interpolation == INTERPOLATION_CUBIC) { | ||||
| return kernel_tex_image_interp_bicubic_3d<float4>(info, x, y, z); | return kernel_tex_image_interp_tricubic<float4>(info, x, y, z); | ||||
| } | } | ||||
| else { | else { | ||||
| CUtexObject tex = (CUtexObject)info.data; | CUtexObject tex = (CUtexObject)info.data; | ||||
| return tex3D<float4>(tex, x, y, z); | return tex3D<float4>(tex, x, y, z); | ||||
| } | } | ||||
| } | } | ||||
| else { | else { | ||||
| float f; | float f; | ||||
| if (interpolation == INTERPOLATION_CUBIC) { | if (interpolation == INTERPOLATION_CUBIC) { | ||||
| f = kernel_tex_image_interp_bicubic_3d<float>(info, x, y, z); | f = kernel_tex_image_interp_tricubic<float>(info, x, y, z); | ||||
| } | } | ||||
| else { | else { | ||||
| CUtexObject tex = (CUtexObject)info.data; | CUtexObject tex = (CUtexObject)info.data; | ||||
| f = tex3D<float>(tex, x, y, z); | f = tex3D<float>(tex, x, y, z); | ||||
| } | } | ||||
| return make_float4(f, f, f, 1.0f); | return make_float4(f, f, f, 1.0f); | ||||
| } | } | ||||
| } | } | ||||