Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/device_cuda.cpp
| Show First 20 Lines • Show All 107 Lines • ▼ Show 20 Lines | |||||
| }; | }; | ||||
| class CUDADevice : public Device | class CUDADevice : public Device | ||||
| { | { | ||||
| public: | public: | ||||
| DedicatedTaskPool task_pool; | DedicatedTaskPool task_pool; | ||||
| CUdevice cuDevice; | CUdevice cuDevice; | ||||
| CUcontext cuContext; | CUcontext cuContext; | ||||
| CUmodule cuModule; | CUmodule cuModule, cuFilterModule; | ||||
| map<device_ptr, bool> tex_interp_map; | map<device_ptr, bool> tex_interp_map; | ||||
| map<device_ptr, uint> tex_bindless_map; | map<device_ptr, uint> tex_bindless_map; | ||||
| int cuDevId; | int cuDevId; | ||||
| int cuDevArchitecture; | int cuDevArchitecture; | ||||
| bool first_error; | bool first_error; | ||||
| struct PixelMem { | struct PixelMem { | ||||
| GLuint cuPBO; | GLuint cuPBO; | ||||
| ▲ Show 20 Lines • Show All 166 Lines • ▼ Show 20 Lines | #define cuda_error(stmt) cuda_error_(stmt, #stmt) | ||||
| { | { | ||||
| return DebugFlags().cuda.split_kernel; | return DebugFlags().cuda.split_kernel; | ||||
| } | } | ||||
| /* Common NVCC flags which stays the same regardless of shading model, | /* Common NVCC flags which stays the same regardless of shading model, | ||||
| * kernel sources md5 and only depends on compiler or compilation settings. | * kernel sources md5 and only depends on compiler or compilation settings. | ||||
| */ | */ | ||||
| string compile_kernel_get_common_cflags( | string compile_kernel_get_common_cflags( | ||||
| const DeviceRequestedFeatures& requested_features, bool split=false) | const DeviceRequestedFeatures& requested_features, | ||||
| bool filter=false, bool split=false) | |||||
| { | { | ||||
| const int cuda_version = cuewCompilerVersion(); | const int cuda_version = cuewCompilerVersion(); | ||||
| const int machine = system_cpu_bits(); | const int machine = system_cpu_bits(); | ||||
| const string source_path = path_get("source"); | const string source_path = path_get("source"); | ||||
| const string include_path = source_path; | const string include_path = source_path; | ||||
| string cflags = string_printf("-m%d " | string cflags = string_printf("-m%d " | ||||
| "--ptxas-options=\"-v\" " | "--ptxas-options=\"-v\" " | ||||
| "--use_fast_math " | "--use_fast_math " | ||||
| "-DNVCC " | "-DNVCC " | ||||
| "-D__KERNEL_CUDA_VERSION__=%d " | "-D__KERNEL_CUDA_VERSION__=%d " | ||||
| "-I\"%s\"", | "-I\"%s\"", | ||||
| machine, | machine, | ||||
| cuda_version, | cuda_version, | ||||
| include_path.c_str()); | include_path.c_str()); | ||||
| if(use_adaptive_compilation()) { | if(!filter && use_adaptive_compilation()) { | ||||
| cflags += " " + requested_features.get_build_options(); | cflags += " " + requested_features.get_build_options(); | ||||
| } | } | ||||
| const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS"); | const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS"); | ||||
| if(extra_cflags) { | if(extra_cflags) { | ||||
| cflags += string(" ") + string(extra_cflags); | cflags += string(" ") + string(extra_cflags); | ||||
| } | } | ||||
| #ifdef WITH_CYCLES_DEBUG | #ifdef WITH_CYCLES_DEBUG | ||||
| cflags += " -D__KERNEL_DEBUG__"; | cflags += " -D__KERNEL_DEBUG__"; | ||||
| Show All 31 Lines | bool compile_check_compiler() { | ||||
| else if(cuda_version != 80) { | else if(cuda_version != 80) { | ||||
| printf("CUDA version %d.%d detected, build may succeed but only " | printf("CUDA version %d.%d detected, build may succeed but only " | ||||
| "CUDA 8.0 is officially supported.\n", | "CUDA 8.0 is officially supported.\n", | ||||
| major, minor); | major, minor); | ||||
| } | } | ||||
| return true; | return true; | ||||
| } | } | ||||
| string compile_kernel(const DeviceRequestedFeatures& requested_features, bool split=false) | string compile_kernel(const DeviceRequestedFeatures& requested_features, | ||||
| bool filter=false, bool split=false) | |||||
| { | { | ||||
| const char *name, *source; | const char *name, *source; | ||||
| if(split) { | if(filter) { | ||||
| name = "filter"; | |||||
| source = "filter.cu"; | |||||
| } | |||||
| else if(split) { | |||||
| name = "kernel_split"; | name = "kernel_split"; | ||||
| source = "kernel_split.cu"; | source = "kernel_split.cu"; | ||||
| } | } | ||||
| else { | else { | ||||
| name = "kernel"; | name = "kernel"; | ||||
| source = "kernel.cu"; | source = "kernel.cu"; | ||||
| } | } | ||||
| /* Compute cubin name. */ | /* Compute cubin name. */ | ||||
| int major, minor; | int major, minor; | ||||
| cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); | cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); | ||||
| cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); | cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); | ||||
| /* Attempt to use kernel provided with Blender. */ | /* Attempt to use kernel provided with Blender. */ | ||||
| if(!use_adaptive_compilation()) { | if(!use_adaptive_compilation()) { | ||||
| const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", | const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", | ||||
| name, major, minor)); | name, major, minor)); | ||||
| VLOG(1) << "Testing for pre-compiled kernel " << cubin << "."; | VLOG(1) << "Testing for pre-compiled kernel " << cubin << "."; | ||||
| if(path_exists(cubin)) { | if(path_exists(cubin)) { | ||||
| VLOG(1) << "Using precompiled kernel."; | VLOG(1) << "Using precompiled kernel."; | ||||
| return cubin; | return cubin; | ||||
| } | } | ||||
| } | } | ||||
| const string common_cflags = | const string common_cflags = | ||||
| compile_kernel_get_common_cflags(requested_features, split); | compile_kernel_get_common_cflags(requested_features, filter, split); | ||||
| /* Try to use locally compiled kernel. */ | /* Try to use locally compiled kernel. */ | ||||
| const string source_path = path_get("source"); | const string source_path = path_get("source"); | ||||
| const string kernel_md5 = path_files_md5_hash(source_path); | const string kernel_md5 = path_files_md5_hash(source_path); | ||||
| /* We include cflags into md5 so changing cuda toolkit or changing other | /* We include cflags into md5 so changing cuda toolkit or changing other | ||||
| * compiler command line arguments makes sure cubin gets re-built. | * compiler command line arguments makes sure cubin gets re-built. | ||||
| */ | */ | ||||
| ▲ Show 20 Lines • Show All 78 Lines • ▼ Show 20 Lines | bool load_kernels(const DeviceRequestedFeatures& requested_features) | ||||
| if(cuContext == 0) | if(cuContext == 0) | ||||
| return false; | return false; | ||||
| /* check if GPU is supported */ | /* check if GPU is supported */ | ||||
| if(!support_device(requested_features)) | if(!support_device(requested_features)) | ||||
| return false; | return false; | ||||
| /* get kernel */ | /* get kernel */ | ||||
| string cubin = compile_kernel(requested_features, use_split_kernel()); | string cubin = compile_kernel(requested_features, false, use_split_kernel()); | ||||
| if(cubin == "") | if(cubin == "") | ||||
| return false; | return false; | ||||
| string filter_cubin = compile_kernel(requested_features, true, false); | |||||
| if(filter_cubin == "") | |||||
| return false; | |||||
| /* open module */ | /* open module */ | ||||
| cuda_push_context(); | cuda_push_context(); | ||||
| string cubin_data; | string cubin_data; | ||||
| CUresult result; | CUresult result; | ||||
| if(path_read_text(cubin, cubin_data)) | if(path_read_text(cubin, cubin_data)) | ||||
| result = cuModuleLoadData(&cuModule, cubin_data.c_str()); | result = cuModuleLoadData(&cuModule, cubin_data.c_str()); | ||||
| else | else | ||||
| result = CUDA_ERROR_FILE_NOT_FOUND; | result = CUDA_ERROR_FILE_NOT_FOUND; | ||||
| if(cuda_error_(result, "cuModuleLoad")) | if(cuda_error_(result, "cuModuleLoad")) | ||||
| cuda_error_message(string_printf("Failed loading CUDA kernel %s.", cubin.c_str())); | cuda_error_message(string_printf("Failed loading CUDA kernel %s.", cubin.c_str())); | ||||
| if(path_read_text(filter_cubin, cubin_data)) | |||||
| result = cuModuleLoadData(&cuFilterModule, cubin_data.c_str()); | |||||
| else | |||||
| result = CUDA_ERROR_FILE_NOT_FOUND; | |||||
| if(cuda_error_(result, "cuModuleLoad")) | |||||
| cuda_error_message(string_printf("Failed loading CUDA kernel %s.", filter_cubin.c_str())); | |||||
| cuda_pop_context(); | cuda_pop_context(); | ||||
| return (result == CUDA_SUCCESS); | return (result == CUDA_SUCCESS); | ||||
| } | } | ||||
| void load_bindless_mapping() | void load_bindless_mapping() | ||||
| { | { | ||||
| if(info.has_bindless_textures && need_bindless_mapping) { | if(info.has_bindless_textures && need_bindless_mapping) { | ||||
| ▲ Show 20 Lines • Show All 1,375 Lines • Show Last 20 Lines | |||||