Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/device_cpu.cpp
| Context not available. | |||||
| #endif | #endif | ||||
| RenderTile tile; | RenderTile tile; | ||||
| while(task.acquire_tile(this, tile)) { | void(*path_trace_function)(KernelGlobals*, float*, unsigned int*, int, int, int, int, int); | ||||
| float *render_buffer = (float*)tile.buffer; | |||||
| uint *rng_state = (uint*)tile.rng_state; | |||||
| int start_sample = tile.start_sample; | |||||
| int end_sample = tile.start_sample + tile.num_samples; | |||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 | ||||
| if(system_cpu_support_avx2()) { | if (system_cpu_support_avx2()) | ||||
| for(int sample = start_sample; sample < end_sample; sample++) { | path_trace_function = kernel_cpu_avx2_path_trace; | ||||
| if (task.get_cancel() || task_pool.canceled()) { | else | ||||
| if(task.need_finish_queue == false) | |||||
| break; | |||||
| } | |||||
| for(int y = tile.y; y < tile.y + tile.h; y++) { | |||||
| for(int x = tile.x; x < tile.x + tile.w; x++) { | |||||
| kernel_cpu_avx2_path_trace(&kg, render_buffer, rng_state, | |||||
| sample, x, y, tile.offset, tile.stride); | |||||
| } | |||||
| } | |||||
| tile.sample = sample + 1; | |||||
| task.update_progress(tile); | |||||
| } | |||||
| } | |||||
| else | |||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX | ||||
| if(system_cpu_support_avx()) { | if (system_cpu_support_avx()) | ||||
| for(int sample = start_sample; sample < end_sample; sample++) { | path_trace_function = kernel_cpu_avx_path_trace; | ||||
| if (task.get_cancel() || task_pool.canceled()) { | else | ||||
| if(task.need_finish_queue == false) | |||||
| break; | |||||
| } | |||||
| for(int y = tile.y; y < tile.y + tile.h; y++) { | |||||
| for(int x = tile.x; x < tile.x + tile.w; x++) { | |||||
| kernel_cpu_avx_path_trace(&kg, render_buffer, rng_state, | |||||
| sample, x, y, tile.offset, tile.stride); | |||||
| } | |||||
| } | |||||
| tile.sample = sample + 1; | |||||
| task.update_progress(tile); | |||||
| } | |||||
| } | |||||
| else | |||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 | ||||
| if(system_cpu_support_sse41()) { | if (system_cpu_support_sse41()) | ||||
| for(int sample = start_sample; sample < end_sample; sample++) { | path_trace_function = kernel_cpu_sse41_path_trace; | ||||
| if (task.get_cancel() || task_pool.canceled()) { | else | ||||
| if(task.need_finish_queue == false) | |||||
| break; | |||||
| } | |||||
| for(int y = tile.y; y < tile.y + tile.h; y++) { | |||||
| for(int x = tile.x; x < tile.x + tile.w; x++) { | |||||
| kernel_cpu_sse41_path_trace(&kg, render_buffer, rng_state, | |||||
| sample, x, y, tile.offset, tile.stride); | |||||
| } | |||||
| } | |||||
| tile.sample = sample + 1; | |||||
| task.update_progress(tile); | |||||
| } | |||||
| } | |||||
| else | |||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 | ||||
| if(system_cpu_support_sse3()) { | if (system_cpu_support_sse3()) | ||||
| for(int sample = start_sample; sample < end_sample; sample++) { | path_trace_function = kernel_cpu_sse3_path_trace; | ||||
| if (task.get_cancel() || task_pool.canceled()) { | else | ||||
| if(task.need_finish_queue == false) | |||||
| break; | |||||
| } | |||||
| for(int y = tile.y; y < tile.y + tile.h; y++) { | |||||
| for(int x = tile.x; x < tile.x + tile.w; x++) { | |||||
| kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state, | |||||
| sample, x, y, tile.offset, tile.stride); | |||||
| } | |||||
| } | |||||
| tile.sample = sample + 1; | |||||
| task.update_progress(tile); | |||||
| } | |||||
| } | |||||
| else | |||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 | ||||
| if(system_cpu_support_sse2()) { | if (system_cpu_support_sse2()) | ||||
| for(int sample = start_sample; sample < end_sample; sample++) { | path_trace_function = kernel_cpu_sse2_path_trace; | ||||
| if (task.get_cancel() || task_pool.canceled()) { | else | ||||
| if(task.need_finish_queue == false) | |||||
| break; | |||||
| } | |||||
| for(int y = tile.y; y < tile.y + tile.h; y++) { | |||||
| for(int x = tile.x; x < tile.x + tile.w; x++) { | |||||
| kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state, | |||||
| sample, x, y, tile.offset, tile.stride); | |||||
| } | |||||
| } | |||||
| tile.sample = sample + 1; | |||||
| task.update_progress(tile); | |||||
| } | |||||
| } | |||||
| else | |||||
| #endif | #endif | ||||
| { | path_trace_function = kernel_cpu_path_trace; | ||||
| for(int sample = start_sample; sample < end_sample; sample++) { | |||||
| if (task.get_cancel() || task_pool.canceled()) { | while(task.acquire_tile(this, tile)) { | ||||
| if(task.need_finish_queue == false) | float *render_buffer = (float*)tile.buffer; | ||||
| break; | uint *rng_state = (uint*)tile.rng_state; | ||||
| } | int start_sample = tile.start_sample; | ||||
| int end_sample = tile.start_sample + tile.num_samples; | |||||
| for(int y = tile.y; y < tile.y + tile.h; y++) { | |||||
| for(int x = tile.x; x < tile.x + tile.w; x++) { | for(int sample = start_sample; sample < end_sample; sample++) { | ||||
| kernel_cpu_path_trace(&kg, render_buffer, rng_state, | if (task.get_cancel() || task_pool.canceled()) { | ||||
| sample, x, y, tile.offset, tile.stride); | if(task.need_finish_queue == false) | ||||
| } | break; | ||||
| } | |||||
| tile.sample = sample + 1; | |||||
| task.update_progress(tile); | |||||
| } | } | ||||
| for(int y = tile.y; y < tile.y + tile.h; y++) { | |||||
| for(int x = tile.x; x < tile.x + tile.w; x++) | |||||
| path_trace_function(&kg, render_buffer, rng_state, sample, x, y, tile.offset, tile.stride); | |||||
| } | |||||
| tile.sample = sample + 1; | |||||
| task.update_progress(tile); | |||||
| } | } | ||||
| task.release_tile(tile); | task.release_tile(tile); | ||||
| Context not available. | |||||
| { | { | ||||
| float sample_scale = 1.0f/(task.sample + 1); | float sample_scale = 1.0f/(task.sample + 1); | ||||
| if(task.rgba_half) { | void(*convert_to_half_float_function)(KernelGlobals*, uchar4*, float*, float, int, int, int, int); | ||||
| void(*convert_to_byte_function)(KernelGlobals*, uchar4*, float*, float, int, int, int, int); | |||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 | ||||
| if(system_cpu_support_avx2()) { | if (system_cpu_support_avx2()) { | ||||
| for(int y = task.y; y < task.y + task.h; y++) | convert_to_half_float_function = kernel_cpu_avx2_convert_to_half_float; | ||||
| for(int x = task.x; x < task.x + task.w; x++) | convert_to_byte_function = kernel_cpu_avx2_convert_to_byte; | ||||
| kernel_cpu_avx2_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer, | } | ||||
| sample_scale, x, y, task.offset, task.stride); | else | ||||
| } | |||||
| else | |||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX | ||||
| if(system_cpu_support_avx()) { | if (system_cpu_support_avx()) { | ||||
| for(int y = task.y; y < task.y + task.h; y++) | convert_to_half_float_function = kernel_cpu_avx_convert_to_half_float; | ||||
| for(int x = task.x; x < task.x + task.w; x++) | convert_to_byte_function = kernel_cpu_avx_convert_to_byte; | ||||
| kernel_cpu_avx_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer, | } | ||||
| sample_scale, x, y, task.offset, task.stride); | else | ||||
| } | |||||
| else | |||||
| #endif | |||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 | |||||
| if(system_cpu_support_sse41()) { | |||||
| for(int y = task.y; y < task.y + task.h; y++) | |||||
| for(int x = task.x; x < task.x + task.w; x++) | |||||
| kernel_cpu_sse41_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer, | |||||
| sample_scale, x, y, task.offset, task.stride); | |||||
| } | |||||
| else | |||||
| #endif | |||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 | |||||
| if(system_cpu_support_sse3()) { | |||||
| for(int y = task.y; y < task.y + task.h; y++) | |||||
| for(int x = task.x; x < task.x + task.w; x++) | |||||
| kernel_cpu_sse3_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer, | |||||
| sample_scale, x, y, task.offset, task.stride); | |||||
| } | |||||
| else | |||||
| #endif | |||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 | |||||
| if(system_cpu_support_sse2()) { | |||||
| for(int y = task.y; y < task.y + task.h; y++) | |||||
| for(int x = task.x; x < task.x + task.w; x++) | |||||
| kernel_cpu_sse2_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer, | |||||
| sample_scale, x, y, task.offset, task.stride); | |||||
| } | |||||
| else | |||||
| #endif | #endif | ||||
| { | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 | ||||
| for(int y = task.y; y < task.y + task.h; y++) | if (system_cpu_support_sse41()) { | ||||
| for(int x = task.x; x < task.x + task.w; x++) | convert_to_half_float_function = kernel_cpu_sse41_convert_to_half_float; | ||||
| kernel_cpu_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer, | convert_to_byte_function = kernel_cpu_sse41_convert_to_byte; | ||||
| sample_scale, x, y, task.offset, task.stride); | |||||
| } | |||||
| } | } | ||||
| else { | else | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 | |||||
| if(system_cpu_support_avx2()) { | |||||
| for(int y = task.y; y < task.y + task.h; y++) | |||||
| for(int x = task.x; x < task.x + task.w; x++) | |||||
| kernel_cpu_avx2_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer, | |||||
| sample_scale, x, y, task.offset, task.stride); | |||||
| } | |||||
| else | |||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX | |||||
| if(system_cpu_support_avx()) { | |||||
| for(int y = task.y; y < task.y + task.h; y++) | |||||
| for(int x = task.x; x < task.x + task.w; x++) | |||||
| kernel_cpu_avx_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer, | |||||
| sample_scale, x, y, task.offset, task.stride); | |||||
| } | |||||
| else | |||||
| #endif | |||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 | |||||
| if(system_cpu_support_sse41()) { | |||||
| for(int y = task.y; y < task.y + task.h; y++) | |||||
| for(int x = task.x; x < task.x + task.w; x++) | |||||
| kernel_cpu_sse41_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer, | |||||
| sample_scale, x, y, task.offset, task.stride); | |||||
| } | |||||
| else | |||||
| #endif | |||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 | ||||
| if(system_cpu_support_sse3()) { | if (system_cpu_support_sse3()) { | ||||
| for(int y = task.y; y < task.y + task.h; y++) | convert_to_half_float_function = kernel_cpu_sse3_convert_to_half_float; | ||||
| for(int x = task.x; x < task.x + task.w; x++) | convert_to_byte_function = kernel_cpu_sse3_convert_to_byte; | ||||
| kernel_cpu_sse3_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer, | } | ||||
| sample_scale, x, y, task.offset, task.stride); | else | ||||
| } | |||||
| else | |||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 | ||||
| if(system_cpu_support_sse2()) { | if (system_cpu_support_sse2()) { | ||||
| for(int y = task.y; y < task.y + task.h; y++) | convert_to_half_float_function = kernel_cpu_sse2_convert_to_half_float; | ||||
| for(int x = task.x; x < task.x + task.w; x++) | convert_to_byte_function = kernel_cpu_sse2_convert_to_byte; | ||||
| kernel_cpu_sse2_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer, | } | ||||
| sample_scale, x, y, task.offset, task.stride); | |||||
| } | |||||
| else | |||||
| #endif | #endif | ||||
| { | { | ||||
| for(int y = task.y; y < task.y + task.h; y++) | convert_to_half_float_function = kernel_cpu_convert_to_half_float; | ||||
| for(int x = task.x; x < task.x + task.w; x++) | convert_to_byte_function = kernel_cpu_convert_to_byte; | ||||
| kernel_cpu_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer, | } | ||||
| sample_scale, x, y, task.offset, task.stride); | |||||
| } | if(task.rgba_half) { | ||||
| for(int y = task.y; y < task.y + task.h; y++) | |||||
| for(int x = task.x; x < task.x + task.w; x++) | |||||
| convert_to_half_float_function(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer, | |||||
| sample_scale, x, y, task.offset, task.stride); | |||||
| } | |||||
| else { | |||||
| for(int y = task.y; y < task.y + task.h; y++) | |||||
| for(int x = task.x; x < task.x + task.w; x++) | |||||
| convert_to_byte_function(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer, | |||||
| sample_scale, x, y, task.offset, task.stride); | |||||
| } | } | ||||
| } | } | ||||
| Context not available. | |||||
| #ifdef WITH_OSL | #ifdef WITH_OSL | ||||
| OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); | OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); | ||||
| #endif | #endif | ||||
| void(*shader_function)(KernelGlobals*, uint4*, float4*, int, int, int); | |||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 | ||||
| if(system_cpu_support_avx2()) { | if (system_cpu_support_avx2()) | ||||
| for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { | shader_function = kernel_cpu_avx2_shader; | ||||
| for(int sample = 0; sample < task.num_samples; sample++) | |||||
| kernel_cpu_avx2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | |||||
| if(task.get_cancel() || task_pool.canceled()) | |||||
| break; | |||||
| } | |||||
| } | |||||
| else | else | ||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX | ||||
| if(system_cpu_support_avx()) { | if (system_cpu_support_avx()) | ||||
| for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { | shader_function = kernel_cpu_avx_shader; | ||||
| for(int sample = 0; sample < task.num_samples; sample++) | |||||
| kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | |||||
| if(task.get_cancel() || task_pool.canceled()) | |||||
| break; | |||||
| } | |||||
| } | |||||
| else | else | ||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 | ||||
| if(system_cpu_support_sse41()) { | if (system_cpu_support_sse41()) | ||||
| for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { | shader_function = kernel_cpu_sse41_shader; | ||||
| for(int sample = 0; sample < task.num_samples; sample++) | |||||
| kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | |||||
| if(task.get_cancel() || task_pool.canceled()) | |||||
| break; | |||||
| } | |||||
| } | |||||
| else | else | ||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 | ||||
| if(system_cpu_support_sse3()) { | if (system_cpu_support_sse3()) | ||||
| for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { | shader_function = kernel_cpu_sse3_shader; | ||||
| for(int sample = 0; sample < task.num_samples; sample++) | |||||
| kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | |||||
| if(task.get_cancel() || task_pool.canceled()) | |||||
| break; | |||||
| } | |||||
| } | |||||
| else | else | ||||
| #endif | #endif | ||||
| #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 | #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 | ||||
| if(system_cpu_support_sse2()) { | if (system_cpu_support_sse2()) | ||||
| for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { | shader_function = kernel_cpu_sse2_shader; | ||||
| for(int sample = 0; sample < task.num_samples; sample++) | |||||
| kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | |||||
| if(task.get_cancel() || task_pool.canceled()) | |||||
| break; | |||||
| } | |||||
| } | |||||
| else | else | ||||
| #endif | #endif | ||||
| { | shader_function = kernel_cpu_shader; | ||||
| for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { | |||||
| for(int sample = 0; sample < task.num_samples; sample++) | |||||
| kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | |||||
| if(task.get_cancel() || task_pool.canceled()) | for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { | ||||
| break; | for(int sample = 0; sample < task.num_samples; sample++) | ||||
| } | kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, sample); | ||||
| if(task.get_cancel() || task_pool.canceled()) | |||||
| break; | |||||
| } | } | ||||
| #ifdef WITH_OSL | #ifdef WITH_OSL | ||||
| Context not available. | |||||