Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/device_cuda.cpp
| Show First 20 Lines • Show All 764 Lines • ▼ Show 20 Lines | else { | ||||
| cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader")); | cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader")); | ||||
| } | } | ||||
| /* do tasks in smaller chunks, so we can cancel it */ | /* do tasks in smaller chunks, so we can cancel it */ | ||||
| const int shader_chunk_size = 65536; | const int shader_chunk_size = 65536; | ||||
| const int start = task.shader_x; | const int start = task.shader_x; | ||||
| const int end = task.shader_x + task.shader_w; | const int end = task.shader_x + task.shader_w; | ||||
| for(int shader_x = start; shader_x < end; shader_x += shader_chunk_size) { | for(int sample = 0; sample < task.num_samples; sample++) { | ||||
| if(task.get_cancel()) | if(task.get_cancel()) | ||||
| break; | break; | ||||
| for(int shader_x = start; shader_x < end; shader_x += shader_chunk_size) { | |||||
| int shader_w = min(shader_chunk_size, end - shader_x); | int shader_w = min(shader_chunk_size, end - shader_x); | ||||
| for(int sample = 0; sample < task.num_samples; sample++) { | |||||
| /* pass in parameters */ | /* pass in parameters */ | ||||
| int offset = 0; | int offset = 0; | ||||
| cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input))); | cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input))); | ||||
| offset += sizeof(d_input); | offset += sizeof(d_input); | ||||
| cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output))); | cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output))); | ||||
| offset += sizeof(d_output); | offset += sizeof(d_output); | ||||
| Show All 22 Lines | for(int sample = 0; sample < task.num_samples; sample++) { | ||||
| int xblocks = (shader_w + threads_per_block - 1)/threads_per_block; | int xblocks = (shader_w + threads_per_block - 1)/threads_per_block; | ||||
| cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1)); | cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1)); | ||||
| cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1)); | cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1)); | ||||
| cuda_assert(cuLaunchGrid(cuShader, xblocks, 1)); | cuda_assert(cuLaunchGrid(cuShader, xblocks, 1)); | ||||
| cuda_assert(cuCtxSynchronize()); | cuda_assert(cuCtxSynchronize()); | ||||
| } | } | ||||
| if(task.update_progress_sample) | |||||
sergey: Suggestion: Make DeviceTask::update_prgoress to use a pointer to render tile insteado f a… | |||||
| task.update_progress_sample(); | |||||
| } | } | ||||
| cuda_pop_context(); | cuda_pop_context(); | ||||
| } | } | ||||
| CUdeviceptr map_pixels(device_ptr mem) | CUdeviceptr map_pixels(device_ptr mem) | ||||
| { | { | ||||
| if(!background) { | if(!background) { | ||||
| ▲ Show 20 Lines • Show All 237 Lines • ▼ Show 20 Lines | #endif | ||||
| public: | public: | ||||
| CUDADeviceTask(CUDADevice *device, DeviceTask& task) | CUDADeviceTask(CUDADevice *device, DeviceTask& task) | ||||
| : DeviceTask(task) | : DeviceTask(task) | ||||
| { | { | ||||
| run = function_bind(&CUDADevice::thread_run, device, this); | run = function_bind(&CUDADevice::thread_run, device, this); | ||||
| } | } | ||||
| }; | }; | ||||
| int task_count(DeviceTask& task) | |||||
| { | |||||
| return 1; | |||||
| } | |||||
| void task_add(DeviceTask& task) | void task_add(DeviceTask& task) | ||||
| { | { | ||||
| if(task.type == DeviceTask::FILM_CONVERT) { | if(task.type == DeviceTask::FILM_CONVERT) { | ||||
| /* must be done in main thread due to opengl access */ | /* must be done in main thread due to opengl access */ | ||||
| film_convert(task, task.buffer, task.rgba_byte, task.rgba_half); | film_convert(task, task.buffer, task.rgba_byte, task.rgba_half); | ||||
| cuda_push_context(); | cuda_push_context(); | ||||
| cuda_assert(cuCtxSynchronize()); | cuda_assert(cuCtxSynchronize()); | ||||
| ▲ Show 20 Lines • Show All 78 Lines • Show Last 20 Lines | |||||
Suggestion: Make DeviceTask::update_prgoress to use a pointer to render tile insteado f a reference and call it from here with NULL tile. This way you wouldn't worry about calling update to often.