Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/kernel_compat_cuda.h
| Show First 20 Lines • Show All 61 Lines • ▼ Show 20 Lines | |||||
| typedef texture<uint4, 1> texture_uint4; | typedef texture<uint4, 1> texture_uint4; | ||||
| typedef texture<uchar4, 1> texture_uchar4; | typedef texture<uchar4, 1> texture_uchar4; | ||||
| typedef texture<float4, 2> texture_image_float4; | typedef texture<float4, 2> texture_image_float4; | ||||
| typedef texture<float4, 3> texture_image3d_float4; | typedef texture<float4, 3> texture_image3d_float4; | ||||
| typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4; | typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4; | ||||
| /* Macros to handle different memory storage on different devices */ | /* Macros to handle different memory storage on different devices */ | ||||
| /* In order to use full 6GB of memory on Titan cards, use arrays instead | /* On Fermi cards (4xx and 5xx), we use regular textures for both data and images. | ||||
| * of textures. On earlier cards this seems slower, but on Titan it is | * On Kepler (6xx) and above, we use Bindless Textures for images and arrays for data. | ||||
| * actually slightly faster in tests. */ | * | ||||
| * Arrays are necessary in order to use the full VRAM on newer cards, and it's slightly faster. | |||||
| * Using Arrays on Fermi turned out to be slower.*/ | |||||
| /* Fermi */ | |||||
| #if __CUDA_ARCH__ < 300 | #if __CUDA_ARCH__ < 300 | ||||
| # define __KERNEL_CUDA_TEX_STORAGE__ | # define __KERNEL_CUDA_TEX_STORAGE__ | ||||
| #endif | |||||
| #ifdef __KERNEL_CUDA_TEX_STORAGE__ | |||||
| # define kernel_tex_fetch(t, index) tex1Dfetch(t, index) | # define kernel_tex_fetch(t, index) tex1Dfetch(t, index) | ||||
| # define kernel_tex_image_interp(t, x, y) tex2D(t, x, y) | |||||
| # define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z) | |||||
| /* Kepler */ | |||||
| #else | #else | ||||
| # define kernel_tex_fetch(t, index) t[(index)] | # define kernel_tex_fetch(t, index) t[(index)] | ||||
| # define kernel_tex_image_interp_float4(t, x, y) tex2D<float4>(t, x, y) | |||||
| # define kernel_tex_image_interp_float(t, x, y) tex2D<float>(t, x, y) | |||||
| # define kernel_tex_image_interp_3d_float4(t, x, y, z) tex3D<float4>(t, x, y, z) | |||||
| # define kernel_tex_image_interp_3d_float(t, x, y, z) tex3D<float>(t, x, y, z) | |||||
| #endif | #endif | ||||
| #define kernel_tex_image_interp(t, x, y) tex2D(t, x, y) | |||||
| #define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z) | |||||
| #define kernel_data __data | #define kernel_data __data | ||||
| /* Use fast math functions */ | /* Use fast math functions */ | ||||
| #define cosf(x) __cosf(((float)(x))) | #define cosf(x) __cosf(((float)(x))) | ||||
| #define sinf(x) __sinf(((float)(x))) | #define sinf(x) __sinf(((float)(x))) | ||||
| #define powf(x, y) __powf(((float)(x)), ((float)(y))) | #define powf(x, y) __powf(((float)(x)), ((float)(y))) | ||||
| #define tanf(x) __tanf(((float)(x))) | #define tanf(x) __tanf(((float)(x))) | ||||
| #define logf(x) __logf(((float)(x))) | #define logf(x) __logf(((float)(x))) | ||||
| #define expf(x) __expf(((float)(x))) | #define expf(x) __expf(((float)(x))) | ||||
| #endif /* __KERNEL_COMPAT_CUDA_H__ */ | #endif /* __KERNEL_COMPAT_CUDA_H__ */ | ||||