Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/svm/svm_image.h
| Show All 12 Lines | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| CCL_NAMESPACE_BEGIN | CCL_NAMESPACE_BEGIN | ||||
| /* Float4 textures on various devices. */ | /* Float4 textures on various devices. */ | ||||
| #if defined(__KERNEL_CPU__) | #if defined(__KERNEL_CPU__) | ||||
| #define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_IMAGES_CPU | # define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_IMAGES_CPU | ||||
| #elif defined(__KERNEL_CUDA__) | #elif defined(__KERNEL_CUDA__) | ||||
| # if __CUDA_ARCH__ < 300 | |||||
| #define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_IMAGES_CUDA | # define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_IMAGES_CUDA | ||||
| #else | # else | ||||
| # define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_IMAGES_CUDA_KEPLER | |||||
| # endif | |||||
| #else | |||||
| #define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_IMAGES_OPENCL | # define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_IMAGES_OPENCL | ||||
| #endif | #endif | ||||
| #ifdef __KERNEL_OPENCL__ | #ifdef __KERNEL_OPENCL__ | ||||
| /* For OpenCL all images are packed in a single array, and we do manual lookup | /* For OpenCL all images are packed in a single array, and we do manual lookup | ||||
| * and interpolation. */ | * and interpolation. */ | ||||
| ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset) | ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset) | ||||
| ▲ Show 20 Lines • Show All 112 Lines • ▼ Show 20 Lines | # ifdef __KERNEL_SSE2__ | ||||
| float4 &r = (float4 &)r_ssef; | float4 &r = (float4 &)r_ssef; | ||||
| r = kernel_tex_image_interp(id, x, y); | r = kernel_tex_image_interp(id, x, y); | ||||
| # else | # else | ||||
| float4 r = kernel_tex_image_interp(id, x, y); | float4 r = kernel_tex_image_interp(id, x, y); | ||||
| # endif | # endif | ||||
| #else | #else | ||||
| float4 r; | float4 r; | ||||
| # if __CUDA_ARCH__ < 300 | |||||
| /* not particularly proud of this massive switch, what are the | /* not particularly proud of this massive switch, what are the | ||||
| * alternatives? | * alternatives? | ||||
| * - use a single big 1D texture, and do our own lookup/filtering | * - use a single big 1D texture, and do our own lookup/filtering | ||||
| * - group by size and use a 3d texture, performance impact | * - group by size and use a 3d texture, performance impact | ||||
| * - group into larger texture with some padding for correct lerp | * - group into larger texture with some padding for correct lerp | ||||
| * | * | ||||
| * also note that cuda has a textures limit (128 for Fermi, 256 for Kepler), | * also note that cuda has a textures limit (128 for Fermi, 256 for Kepler), | ||||
| * and we cannot use all since we still need some for other storage */ | * and we cannot use all since we still need some for other storage */ | ||||
| ▲ Show 20 Lines • Show All 87 Lines • ▼ Show 20 Lines | switch(id) { | ||||
| case 85: r = kernel_tex_image_interp(__tex_image_byte4_085, x, y); break; | case 85: r = kernel_tex_image_interp(__tex_image_byte4_085, x, y); break; | ||||
| case 86: r = kernel_tex_image_interp(__tex_image_byte4_086, x, y); break; | case 86: r = kernel_tex_image_interp(__tex_image_byte4_086, x, y); break; | ||||
| case 87: r = kernel_tex_image_interp(__tex_image_byte4_087, x, y); break; | case 87: r = kernel_tex_image_interp(__tex_image_byte4_087, x, y); break; | ||||
| case 88: r = kernel_tex_image_interp(__tex_image_byte4_088, x, y); break; | case 88: r = kernel_tex_image_interp(__tex_image_byte4_088, x, y); break; | ||||
| case 89: r = kernel_tex_image_interp(__tex_image_byte4_089, x, y); break; | case 89: r = kernel_tex_image_interp(__tex_image_byte4_089, x, y); break; | ||||
| case 90: r = kernel_tex_image_interp(__tex_image_byte4_090, x, y); break; | case 90: r = kernel_tex_image_interp(__tex_image_byte4_090, x, y); break; | ||||
| case 91: r = kernel_tex_image_interp(__tex_image_byte4_091, x, y); break; | case 91: r = kernel_tex_image_interp(__tex_image_byte4_091, x, y); break; | ||||
| case 92: r = kernel_tex_image_interp(__tex_image_byte4_092, x, y); break; | case 92: r = kernel_tex_image_interp(__tex_image_byte4_092, x, y); break; | ||||
| # if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300) | |||||
| case 93: r = kernel_tex_image_interp(__tex_image_byte4_093, x, y); break; | |||||
| case 94: r = kernel_tex_image_interp(__tex_image_byte4_094, x, y); break; | |||||
| case 95: r = kernel_tex_image_interp(__tex_image_byte4_095, x, y); break; | |||||
| case 96: r = kernel_tex_image_interp(__tex_image_byte4_096, x, y); break; | |||||
| case 97: r = kernel_tex_image_interp(__tex_image_byte4_097, x, y); break; | |||||
| case 98: r = kernel_tex_image_interp(__tex_image_byte4_098, x, y); break; | |||||
| case 99: r = kernel_tex_image_interp(__tex_image_byte4_099, x, y); break; | |||||
| case 100: r = kernel_tex_image_interp(__tex_image_byte4_100, x, y); break; | |||||
| case 101: r = kernel_tex_image_interp(__tex_image_byte4_101, x, y); break; | |||||
| case 102: r = kernel_tex_image_interp(__tex_image_byte4_102, x, y); break; | |||||
| case 103: r = kernel_tex_image_interp(__tex_image_byte4_103, x, y); break; | |||||
| case 104: r = kernel_tex_image_interp(__tex_image_byte4_104, x, y); break; | |||||
| case 105: r = kernel_tex_image_interp(__tex_image_byte4_105, x, y); break; | |||||
| case 106: r = kernel_tex_image_interp(__tex_image_byte4_106, x, y); break; | |||||
| case 107: r = kernel_tex_image_interp(__tex_image_byte4_107, x, y); break; | |||||
| case 108: r = kernel_tex_image_interp(__tex_image_byte4_108, x, y); break; | |||||
| case 109: r = kernel_tex_image_interp(__tex_image_byte4_109, x, y); break; | |||||
| case 110: r = kernel_tex_image_interp(__tex_image_byte4_110, x, y); break; | |||||
| case 111: r = kernel_tex_image_interp(__tex_image_byte4_111, x, y); break; | |||||
| case 112: r = kernel_tex_image_interp(__tex_image_byte4_112, x, y); break; | |||||
| case 113: r = kernel_tex_image_interp(__tex_image_byte4_113, x, y); break; | |||||
| case 114: r = kernel_tex_image_interp(__tex_image_byte4_114, x, y); break; | |||||
| case 115: r = kernel_tex_image_interp(__tex_image_byte4_115, x, y); break; | |||||
| case 116: r = kernel_tex_image_interp(__tex_image_byte4_116, x, y); break; | |||||
| case 117: r = kernel_tex_image_interp(__tex_image_byte4_117, x, y); break; | |||||
| case 118: r = kernel_tex_image_interp(__tex_image_byte4_118, x, y); break; | |||||
| case 119: r = kernel_tex_image_interp(__tex_image_byte4_119, x, y); break; | |||||
| case 120: r = kernel_tex_image_interp(__tex_image_byte4_120, x, y); break; | |||||
| case 121: r = kernel_tex_image_interp(__tex_image_byte4_121, x, y); break; | |||||
| case 122: r = kernel_tex_image_interp(__tex_image_byte4_122, x, y); break; | |||||
| case 123: r = kernel_tex_image_interp(__tex_image_byte4_123, x, y); break; | |||||
| case 124: r = kernel_tex_image_interp(__tex_image_byte4_124, x, y); break; | |||||
| case 125: r = kernel_tex_image_interp(__tex_image_byte4_125, x, y); break; | |||||
| case 126: r = kernel_tex_image_interp(__tex_image_byte4_126, x, y); break; | |||||
| case 127: r = kernel_tex_image_interp(__tex_image_byte4_127, x, y); break; | |||||
| case 128: r = kernel_tex_image_interp(__tex_image_byte4_128, x, y); break; | |||||
| case 129: r = kernel_tex_image_interp(__tex_image_byte4_129, x, y); break; | |||||
| case 130: r = kernel_tex_image_interp(__tex_image_byte4_130, x, y); break; | |||||
| case 131: r = kernel_tex_image_interp(__tex_image_byte4_131, x, y); break; | |||||
| case 132: r = kernel_tex_image_interp(__tex_image_byte4_132, x, y); break; | |||||
| case 133: r = kernel_tex_image_interp(__tex_image_byte4_133, x, y); break; | |||||
| case 134: r = kernel_tex_image_interp(__tex_image_byte4_134, x, y); break; | |||||
| case 135: r = kernel_tex_image_interp(__tex_image_byte4_135, x, y); break; | |||||
| case 136: r = kernel_tex_image_interp(__tex_image_byte4_136, x, y); break; | |||||
| case 137: r = kernel_tex_image_interp(__tex_image_byte4_137, x, y); break; | |||||
| case 138: r = kernel_tex_image_interp(__tex_image_byte4_138, x, y); break; | |||||
| case 139: r = kernel_tex_image_interp(__tex_image_byte4_139, x, y); break; | |||||
| case 140: r = kernel_tex_image_interp(__tex_image_byte4_140, x, y); break; | |||||
| case 141: r = kernel_tex_image_interp(__tex_image_byte4_141, x, y); break; | |||||
| case 142: r = kernel_tex_image_interp(__tex_image_byte4_142, x, y); break; | |||||
| case 143: r = kernel_tex_image_interp(__tex_image_byte4_143, x, y); break; | |||||
| case 144: r = kernel_tex_image_interp(__tex_image_byte4_144, x, y); break; | |||||
| case 145: r = kernel_tex_image_interp(__tex_image_byte4_145, x, y); break; | |||||
| case 146: r = kernel_tex_image_interp(__tex_image_byte4_146, x, y); break; | |||||
| case 147: r = kernel_tex_image_interp(__tex_image_byte4_147, x, y); break; | |||||
| case 148: r = kernel_tex_image_interp(__tex_image_byte4_148, x, y); break; | |||||
| case 149: r = kernel_tex_image_interp(__tex_image_byte4_149, x, y); break; | |||||
| case 150: r = kernel_tex_image_interp(__tex_image_byte4_150, x, y); break; | |||||
| # endif | |||||
| default: | default: | ||||
| kernel_assert(0); | kernel_assert(0); | ||||
| return make_float4(0.0f, 0.0f, 0.0f, 0.0f); | return make_float4(0.0f, 0.0f, 0.0f, 0.0f); | ||||
| } | } | ||||
| # else | |||||
| CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id); | |||||
| if(id < 2048) /* TODO(dingto): Make this a variable */ | |||||
| r = kernel_tex_image_interp_float4(tex, x, y); | |||||
| else { | |||||
| float f = kernel_tex_image_interp_float(tex, x, y); | |||||
| r = make_float4(f, f, f, 1.0); | |||||
| } | |||||
| # endif | |||||
| #endif | #endif | ||||
| #ifdef __KERNEL_SSE2__ | #ifdef __KERNEL_SSE2__ | ||||
| float alpha = r.w; | float alpha = r.w; | ||||
| if(use_alpha && alpha != 1.0f && alpha != 0.0f) { | if(use_alpha && alpha != 1.0f && alpha != 0.0f) { | ||||
| r_ssef = r_ssef / ssef(alpha); | r_ssef = r_ssef / ssef(alpha); | ||||
| if(id >= TEX_NUM_FLOAT4_IMAGES) | if(id >= TEX_NUM_FLOAT4_IMAGES) | ||||
| ▲ Show 20 Lines • Show All 190 Lines • Show Last 20 Lines | |||||