Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/device_cuda.cpp
| Show First 20 Lines • Show All 770 Lines • ▼ Show 20 Lines | void shader(DeviceTask& task) | ||||
| const int end = task.shader_x + task.shader_w; | const int end = task.shader_x + task.shader_w; | ||||
| for(int shader_x = start; shader_x < end; shader_x += shader_chunk_size) { | for(int shader_x = start; shader_x < end; shader_x += shader_chunk_size) { | ||||
| if(task.get_cancel()) | if(task.get_cancel()) | ||||
| break; | break; | ||||
| int shader_w = min(shader_chunk_size, end - shader_x); | int shader_w = min(shader_chunk_size, end - shader_x); | ||||
| for(int sample = 0; sample < task.num_samples; sample++) { | |||||
| /* pass in parameters */ | /* pass in parameters */ | ||||
| int offset = 0; | int offset = 0; | ||||
| cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input))); | cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input))); | ||||
| offset += sizeof(d_input); | offset += sizeof(d_input); | ||||
| cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output))); | cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output))); | ||||
| offset += sizeof(d_output); | offset += sizeof(d_output); | ||||
| int shader_eval_type = task.shader_eval_type; | int shader_eval_type = task.shader_eval_type; | ||||
| offset = align_up(offset, __alignof(shader_eval_type)); | offset = align_up(offset, __alignof(shader_eval_type)); | ||||
| cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type)); | cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type)); | ||||
| offset += sizeof(task.shader_eval_type); | offset += sizeof(task.shader_eval_type); | ||||
| cuda_assert(cuParamSeti(cuShader, offset, shader_x)); | cuda_assert(cuParamSeti(cuShader, offset, shader_x)); | ||||
| offset += sizeof(shader_x); | offset += sizeof(shader_x); | ||||
| cuda_assert(cuParamSeti(cuShader, offset, shader_w)); | cuda_assert(cuParamSeti(cuShader, offset, shader_w)); | ||||
| offset += sizeof(shader_w); | offset += sizeof(shader_w); | ||||
| cuda_assert(cuParamSeti(cuShader, offset, sample)); | cuda_assert(cuParamSeti(cuShader, offset, task.sample)); | ||||
| offset += sizeof(sample); | offset += sizeof(task.sample); | ||||
| cuda_assert(cuParamSetSize(cuShader, offset)); | cuda_assert(cuParamSetSize(cuShader, offset)); | ||||
| /* launch kernel */ | /* launch kernel */ | ||||
| int threads_per_block; | int threads_per_block; | ||||
| cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader)); | cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader)); | ||||
| int xblocks = (shader_w + threads_per_block - 1)/threads_per_block; | int xblocks = (shader_w + threads_per_block - 1)/threads_per_block; | ||||
| cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1)); | cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1)); | ||||
| cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1)); | cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1)); | ||||
| cuda_assert(cuLaunchGrid(cuShader, xblocks, 1)); | cuda_assert(cuLaunchGrid(cuShader, xblocks, 1)); | ||||
| cuda_assert(cuCtxSynchronize()); | cuda_assert(cuCtxSynchronize()); | ||||
| } | } | ||||
| } | |||||
sergey: Suggestion: Make DeviceTask::update_prgoress to use a pointer to render tile insteado f a… | |||||
| cuda_pop_context(); | cuda_pop_context(); | ||||
| } | } | ||||
| CUdeviceptr map_pixels(device_ptr mem) | CUdeviceptr map_pixels(device_ptr mem) | ||||
| { | { | ||||
| if(!background) { | if(!background) { | ||||
| PixelMem pmem = pixel_mem_map[mem]; | PixelMem pmem = pixel_mem_map[mem]; | ||||
| CUdeviceptr buffer; | CUdeviceptr buffer; | ||||
| ▲ Show 20 Lines • Show All 329 Lines • Show Last 20 Lines | |||||
Suggestion: Make DeviceTask::update_prgoress to use a pointer to render tile insteado f a reference and call it from here with NULL tile. This way you wouldn't worry about calling update to often.