Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/device_cpu.cpp
| Show First 20 Lines • Show All 179 Lines • ▼ Show 20 Lines | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 | ||||
| for(int x = tile.x; x < tile.x + tile.w; x++) { | for(int x = tile.x; x < tile.x + tile.w; x++) { | ||||
| kernel_cpu_avx2_path_trace(&kg, render_buffer, rng_state, | kernel_cpu_avx2_path_trace(&kg, render_buffer, rng_state, | ||||
| sample, x, y, tile.offset, tile.stride); | sample, x, y, tile.offset, tile.stride); | ||||
| } | } | ||||
| } | } | ||||
| tile.sample = sample + 1; | tile.sample = sample + 1; | ||||
| task.update_progress(tile); | task.update_progress(&tile); | ||||
| } | } | ||||
| } | } | ||||
| else | else | ||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX | ||||
| if(system_cpu_support_avx()) { | if(system_cpu_support_avx()) { | ||||
| for(int sample = start_sample; sample < end_sample; sample++) { | for(int sample = start_sample; sample < end_sample; sample++) { | ||||
| if (task.get_cancel() || task_pool.canceled()) { | if (task.get_cancel() || task_pool.canceled()) { | ||||
| if(task.need_finish_queue == false) | if(task.need_finish_queue == false) | ||||
| break; | break; | ||||
| } | } | ||||
| for(int y = tile.y; y < tile.y + tile.h; y++) { | for(int y = tile.y; y < tile.y + tile.h; y++) { | ||||
| for(int x = tile.x; x < tile.x + tile.w; x++) { | for(int x = tile.x; x < tile.x + tile.w; x++) { | ||||
| kernel_cpu_avx_path_trace(&kg, render_buffer, rng_state, | kernel_cpu_avx_path_trace(&kg, render_buffer, rng_state, | ||||
| sample, x, y, tile.offset, tile.stride); | sample, x, y, tile.offset, tile.stride); | ||||
| } | } | ||||
| } | } | ||||
| tile.sample = sample + 1; | tile.sample = sample + 1; | ||||
| task.update_progress(tile); | task.update_progress(&tile); | ||||
| } | } | ||||
| } | } | ||||
| else | else | ||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 | ||||
| if(system_cpu_support_sse41()) { | if(system_cpu_support_sse41()) { | ||||
| for(int sample = start_sample; sample < end_sample; sample++) { | for(int sample = start_sample; sample < end_sample; sample++) { | ||||
| if (task.get_cancel() || task_pool.canceled()) { | if (task.get_cancel() || task_pool.canceled()) { | ||||
| if(task.need_finish_queue == false) | if(task.need_finish_queue == false) | ||||
| break; | break; | ||||
| } | } | ||||
| for(int y = tile.y; y < tile.y + tile.h; y++) { | for(int y = tile.y; y < tile.y + tile.h; y++) { | ||||
| for(int x = tile.x; x < tile.x + tile.w; x++) { | for(int x = tile.x; x < tile.x + tile.w; x++) { | ||||
| kernel_cpu_sse41_path_trace(&kg, render_buffer, rng_state, | kernel_cpu_sse41_path_trace(&kg, render_buffer, rng_state, | ||||
| sample, x, y, tile.offset, tile.stride); | sample, x, y, tile.offset, tile.stride); | ||||
| } | } | ||||
| } | } | ||||
| tile.sample = sample + 1; | tile.sample = sample + 1; | ||||
| task.update_progress(tile); | task.update_progress(&tile); | ||||
| } | } | ||||
| } | } | ||||
| else | else | ||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 | ||||
| if(system_cpu_support_sse3()) { | if(system_cpu_support_sse3()) { | ||||
| for(int sample = start_sample; sample < end_sample; sample++) { | for(int sample = start_sample; sample < end_sample; sample++) { | ||||
| if (task.get_cancel() || task_pool.canceled()) { | if (task.get_cancel() || task_pool.canceled()) { | ||||
| if(task.need_finish_queue == false) | if(task.need_finish_queue == false) | ||||
| break; | break; | ||||
| } | } | ||||
| for(int y = tile.y; y < tile.y + tile.h; y++) { | for(int y = tile.y; y < tile.y + tile.h; y++) { | ||||
| for(int x = tile.x; x < tile.x + tile.w; x++) { | for(int x = tile.x; x < tile.x + tile.w; x++) { | ||||
| kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state, | kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state, | ||||
| sample, x, y, tile.offset, tile.stride); | sample, x, y, tile.offset, tile.stride); | ||||
| } | } | ||||
| } | } | ||||
| tile.sample = sample + 1; | tile.sample = sample + 1; | ||||
| task.update_progress(tile); | task.update_progress(&tile); | ||||
| } | } | ||||
| } | } | ||||
| else | else | ||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 | ||||
| if(system_cpu_support_sse2()) { | if(system_cpu_support_sse2()) { | ||||
| for(int sample = start_sample; sample < end_sample; sample++) { | for(int sample = start_sample; sample < end_sample; sample++) { | ||||
| if (task.get_cancel() || task_pool.canceled()) { | if (task.get_cancel() || task_pool.canceled()) { | ||||
| if(task.need_finish_queue == false) | if(task.need_finish_queue == false) | ||||
| break; | break; | ||||
| } | } | ||||
| for(int y = tile.y; y < tile.y + tile.h; y++) { | for(int y = tile.y; y < tile.y + tile.h; y++) { | ||||
| for(int x = tile.x; x < tile.x + tile.w; x++) { | for(int x = tile.x; x < tile.x + tile.w; x++) { | ||||
| kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state, | kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state, | ||||
| sample, x, y, tile.offset, tile.stride); | sample, x, y, tile.offset, tile.stride); | ||||
| } | } | ||||
| } | } | ||||
| tile.sample = sample + 1; | tile.sample = sample + 1; | ||||
| task.update_progress(tile); | task.update_progress(&tile); | ||||
| } | } | ||||
| } | } | ||||
| else | else | ||||
| #endif | #endif | ||||
| { | { | ||||
| for(int sample = start_sample; sample < end_sample; sample++) { | for(int sample = start_sample; sample < end_sample; sample++) { | ||||
| if (task.get_cancel() || task_pool.canceled()) { | if (task.get_cancel() || task_pool.canceled()) { | ||||
| if(task.need_finish_queue == false) | if(task.need_finish_queue == false) | ||||
| break; | break; | ||||
| } | } | ||||
| for(int y = tile.y; y < tile.y + tile.h; y++) { | for(int y = tile.y; y < tile.y + tile.h; y++) { | ||||
| for(int x = tile.x; x < tile.x + tile.w; x++) { | for(int x = tile.x; x < tile.x + tile.w; x++) { | ||||
| kernel_cpu_path_trace(&kg, render_buffer, rng_state, | kernel_cpu_path_trace(&kg, render_buffer, rng_state, | ||||
| sample, x, y, tile.offset, tile.stride); | sample, x, y, tile.offset, tile.stride); | ||||
| } | } | ||||
| } | } | ||||
| tile.sample = sample + 1; | tile.sample = sample + 1; | ||||
| task.update_progress(tile); | task.update_progress(&tile); | ||||
| } | } | ||||
| } | } | ||||
| task.release_tile(tile); | task.release_tile(tile); | ||||
| if(task_pool.canceled()) { | if(task_pool.canceled()) { | ||||
| if(task.need_finish_queue == false) | if(task.need_finish_queue == false) | ||||
| break; | break; | ||||
| ▲ Show 20 Lines • Show All 122 Lines • ▼ Show 20 Lines | void thread_shader(DeviceTask& task) | ||||
| KernelGlobals kg = kernel_globals; | KernelGlobals kg = kernel_globals; | ||||
| #ifdef WITH_OSL | #ifdef WITH_OSL | ||||
| OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); | OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); | ||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 | ||||
| if(system_cpu_support_avx2()) { | if(system_cpu_support_avx2()) { | ||||
| for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { | for(int sample = 0; sample < task.num_samples; sample++) { | ||||
| for(int sample = 0; sample < task.num_samples; sample++) | for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) | ||||
| kernel_cpu_avx2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | kernel_cpu_avx2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | ||||
| if(task.get_cancel() || task_pool.canceled()) | if(task.get_cancel() || task_pool.canceled()) | ||||
| break; | break; | ||||
| task.update_progress(NULL); | |||||
| } | } | ||||
| } | } | ||||
| else | else | ||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX | ||||
| if(system_cpu_support_avx()) { | if(system_cpu_support_avx()) { | ||||
| for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { | for(int sample = 0; sample < task.num_samples; sample++) { | ||||
| for(int sample = 0; sample < task.num_samples; sample++) | for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) | ||||
| kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | ||||
| if(task.get_cancel() || task_pool.canceled()) | if(task.get_cancel() || task_pool.canceled()) | ||||
| break; | break; | ||||
| task.update_progress(NULL); | |||||
| } | } | ||||
| } | } | ||||
| else | else | ||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 | ||||
| if(system_cpu_support_sse41()) { | if(system_cpu_support_sse41()) { | ||||
| for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { | for(int sample = 0; sample < task.num_samples; sample++) { | ||||
| for(int sample = 0; sample < task.num_samples; sample++) | for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) | ||||
| kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | ||||
| if(task.get_cancel() || task_pool.canceled()) | if(task.get_cancel() || task_pool.canceled()) | ||||
| break; | break; | ||||
| task.update_progress(NULL); | |||||
| } | } | ||||
| } | } | ||||
| else | else | ||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 | ||||
| if(system_cpu_support_sse3()) { | if(system_cpu_support_sse3()) { | ||||
| for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { | for(int sample = 0; sample < task.num_samples; sample++) { | ||||
| for(int sample = 0; sample < task.num_samples; sample++) | for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) | ||||
| kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | ||||
| if(task.get_cancel() || task_pool.canceled()) | if(task.get_cancel() || task_pool.canceled()) | ||||
| break; | break; | ||||
| task.update_progress(NULL); | |||||
| } | } | ||||
| } | } | ||||
| else | else | ||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 | ||||
| if(system_cpu_support_sse2()) { | if(system_cpu_support_sse2()) { | ||||
| for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { | for(int sample = 0; sample < task.num_samples; sample++) { | ||||
| for(int sample = 0; sample < task.num_samples; sample++) | for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) | ||||
| kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | ||||
| if(task.get_cancel() || task_pool.canceled()) | if(task.get_cancel() || task_pool.canceled()) | ||||
| break; | break; | ||||
| task.update_progress(NULL); | |||||
| } | } | ||||
| } | } | ||||
| else | else | ||||
| #endif | #endif | ||||
| { | { | ||||
| for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { | for(int sample = 0; sample < task.num_samples; sample++) { | ||||
| for(int sample = 0; sample < task.num_samples; sample++) | for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) | ||||
| kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | ||||
| if(task.get_cancel() || task_pool.canceled()) | if(task.get_cancel() || task_pool.canceled()) | ||||
| break; | break; | ||||
| task.update_progress(NULL); | |||||
| } | } | ||||
| } | } | ||||
| #ifdef WITH_OSL | #ifdef WITH_OSL | ||||
| OSLShader::thread_free(&kg); | OSLShader::thread_free(&kg); | ||||
| #endif | #endif | ||||
| } | } | ||||
| int get_split_task_count(DeviceTask& task) | |||||
| { | |||||
| if (task.type == DeviceTask::SHADER) | |||||
| return task.get_subtask_count(TaskScheduler::num_threads(), 256); | |||||
| else | |||||
| return task.get_subtask_count(TaskScheduler::num_threads()); | |||||
| } | |||||
| void task_add(DeviceTask& task) | void task_add(DeviceTask& task) | ||||
| { | { | ||||
| /* split task into smaller ones */ | /* split task into smaller ones */ | ||||
| list<DeviceTask> tasks; | list<DeviceTask> tasks; | ||||
| if(task.type == DeviceTask::SHADER) | if(task.type == DeviceTask::SHADER) | ||||
| task.split(tasks, TaskScheduler::num_threads(), 256); | task.split(tasks, TaskScheduler::num_threads(), 256); | ||||
| else | else | ||||
| Show All 38 Lines | |||||