Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/device_cpu.cpp
| Show First 20 Lines • Show All 873 Lines • ▼ Show 20 Lines | for (int y = task->rect.y; y < task->rect.w; y++) { | ||||
| task->buffer.pass_stride); | task->buffer.pass_stride); | ||||
| } | } | ||||
| } | } | ||||
| return true; | return true; | ||||
| } | } | ||||
| bool adaptive_sampling_filter(KernelGlobals *kg, RenderTile &tile, int sample) | bool adaptive_sampling_filter(KernelGlobals *kg, RenderTile &tile, int sample) | ||||
| { | { | ||||
| WorkTile wtile; | WorkTile wtile = tile.work_tile(); | ||||
| wtile.x = tile.x; | |||||
| wtile.y = tile.y; | |||||
| wtile.w = tile.w; | |||||
| wtile.h = tile.h; | |||||
| wtile.offset = tile.offset; | |||||
| wtile.stride = tile.stride; | |||||
| wtile.buffer = (float *)tile.buffer; | |||||
| /* For CPU we do adaptive stopping per sample so we can stop earlier, but | /* For CPU we do adaptive stopping per sample so we can stop earlier, but | ||||
| * for combined CPU + GPU rendering we match the GPU and do it per tile | * for combined CPU + GPU rendering we match the GPU and do it per tile | ||||
| * after a given number of sample steps. */ | * after a given number of sample steps. */ | ||||
| if (!kernel_data.integrator.adaptive_stop_per_sample) { | if (!kernel_data.integrator.adaptive_stop_per_sample) { | ||||
| for (int y = wtile.y; y < wtile.y + wtile.h; ++y) { | for (int y = wtile.y; y < wtile.y + wtile.h; ++y) { | ||||
| for (int x = wtile.x; x < wtile.x + wtile.w; ++x) { | for (int x = wtile.x; x < wtile.x + wtile.w; ++x) { | ||||
| const int index = wtile.offset + x + y * wtile.stride; | const int index = wtile.offset + x + y * wtile.stride; | ||||
| Show All 10 Lines | bool adaptive_sampling_filter(KernelGlobals *kg, RenderTile &tile, int sample) | ||||
| for (int x = wtile.x; x < wtile.x + wtile.w; ++x) { | for (int x = wtile.x; x < wtile.x + wtile.w; ++x) { | ||||
| any |= kernel_do_adaptive_filter_y(kg, x, &wtile); | any |= kernel_do_adaptive_filter_y(kg, x, &wtile); | ||||
| } | } | ||||
| return (!any); | return (!any); | ||||
| } | } | ||||
| void adaptive_sampling_post(const RenderTile &tile, KernelGlobals *kg) | void adaptive_sampling_post(const RenderTile &tile, KernelGlobals *kg) | ||||
| { | { | ||||
| float *render_buffer = (float *)tile.buffer; | float *render_buffer = (float *)tile.get_buffer(); | ||||
| for (int y = tile.y; y < tile.y + tile.h; y++) { | for (int y = tile.get_y(); y < tile.get_y() + tile.get_h(); y++) { | ||||
| for (int x = tile.x; x < tile.x + tile.w; x++) { | for (int x = tile.get_x(); x < tile.get_x() + tile.get_w(); x++) { | ||||
| int index = tile.offset + x + y * tile.stride; | int index = tile.get_offset() + x + y * tile.get_stride(); | ||||
| ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride; | ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride; | ||||
| if (buffer[kernel_data.film.pass_sample_count] < 0.0f) { | if (buffer[kernel_data.film.pass_sample_count] < 0.0f) { | ||||
| buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count]; | buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count]; | ||||
| float sample_multiplier = tile.sample / max((float)tile.start_sample + 1.0f, | float sample_multiplier = tile.get_sample() / | ||||
| max((float)tile.get_start_sample() + 1.0f, | |||||
| buffer[kernel_data.film.pass_sample_count]); | buffer[kernel_data.film.pass_sample_count]); | ||||
| if (sample_multiplier != 1.0f) { | if (sample_multiplier != 1.0f) { | ||||
| kernel_adaptive_post_adjust(kg, buffer, sample_multiplier); | kernel_adaptive_post_adjust(kg, buffer, sample_multiplier); | ||||
| } | } | ||||
| } | } | ||||
| else { | else { | ||||
| kernel_adaptive_post_adjust(kg, buffer, tile.sample / (tile.sample - 1.0f)); | kernel_adaptive_post_adjust(kg, buffer, tile.get_sample() / (tile.get_sample() - 1.0f)); | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| void render(DeviceTask &task, RenderTile &tile, KernelGlobals *kg) | void render(DeviceTask &task, RenderTile &tile, KernelGlobals *kg) | ||||
| { | { | ||||
| const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE; | const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE; | ||||
| scoped_timer timer(&tile.buffers->render_time); | scoped_timer timer(&tile.get_buffers()->get_render_time()); | ||||
| Coverage coverage(kg, tile); | Coverage coverage(kg, tile); | ||||
| if (use_coverage) { | if (use_coverage) { | ||||
| coverage.init_path_trace(); | coverage.init_path_trace(); | ||||
| } | } | ||||
| float *render_buffer = (float *)tile.buffer; | float *render_buffer = (float *)tile.get_buffer(); | ||||
| int start_sample = tile.start_sample; | int start_sample = tile.get_start_sample(); | ||||
| int end_sample = tile.start_sample + tile.num_samples; | int end_sample = tile.get_start_sample() + tile.get_num_samples(); | ||||
| /* Needed for Embree. */ | /* Needed for Embree. */ | ||||
| SIMD_SET_FLUSH_TO_ZERO; | SIMD_SET_FLUSH_TO_ZERO; | ||||
| for (int sample = start_sample; sample < end_sample; sample++) { | for (int sample = start_sample; sample < end_sample; sample++) { | ||||
| if (task.get_cancel() || task_pool.canceled()) { | if (task.get_cancel() || task_pool.canceled()) { | ||||
| if (task.need_finish_queue == false) | if (task.need_finish_queue == false) | ||||
| break; | break; | ||||
| } | } | ||||
| if (tile.stealing_state == RenderTile::CAN_BE_STOLEN && task.get_tile_stolen()) { | if (tile.get_stealing_state() == RenderTile::CAN_BE_STOLEN && task.get_tile_stolen()) { | ||||
| tile.stealing_state = RenderTile::WAS_STOLEN; | tile.get_stealing_state() = RenderTile::WAS_STOLEN; | ||||
| break; | break; | ||||
| } | } | ||||
| if (tile.task == RenderTile::PATH_TRACE) { | if (tile.get_task() == RenderTile::PATH_TRACE) { | ||||
| for (int y = tile.y; y < tile.y + tile.h; y++) { | for (int y = tile.get_y(); y < tile.get_y() + tile.get_h(); y++) { | ||||
| for (int x = tile.x; x < tile.x + tile.w; x++) { | for (int x = tile.get_x(); x < tile.get_x() + tile.get_w(); x++) { | ||||
| if (use_coverage) { | if (use_coverage) { | ||||
| coverage.init_pixel(x, y); | coverage.init_pixel(x, y); | ||||
| } | } | ||||
| path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride); | path_trace_kernel()( | ||||
| kg, render_buffer, sample, x, y, tile.get_offset(), tile.get_stride()); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| else { | else { | ||||
| for (int y = tile.y; y < tile.y + tile.h; y++) { | for (int y = tile.get_y(); y < tile.get_y() + tile.get_h(); y++) { | ||||
| for (int x = tile.x; x < tile.x + tile.w; x++) { | for (int x = tile.get_x(); x < tile.get_x() + tile.get_w(); x++) { | ||||
| bake_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride); | bake_kernel()(kg, render_buffer, sample, x, y, tile.get_offset(), tile.get_stride()); | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| tile.sample = sample + 1; | tile.get_sample() = sample + 1; | ||||
| if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(sample)) { | if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(sample)) { | ||||
| const bool stop = adaptive_sampling_filter(kg, tile, sample); | const bool stop = adaptive_sampling_filter(kg, tile, sample); | ||||
| if (stop) { | if (stop) { | ||||
| const int num_progress_samples = end_sample - sample; | const int num_progress_samples = end_sample - sample; | ||||
| tile.sample = end_sample; | tile.get_sample() = end_sample; | ||||
| task.update_progress(&tile, tile.w * tile.h * num_progress_samples); | task.update_progress(&tile, tile.get_w() * tile.get_h() * num_progress_samples); | ||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| task.update_progress(&tile, tile.w * tile.h); | task.update_progress(&tile, tile.get_w() * tile.get_h()); | ||||
| } | } | ||||
| if (use_coverage) { | if (use_coverage) { | ||||
| coverage.finalize(); | coverage.finalize(); | ||||
| } | } | ||||
| if (task.adaptive_sampling.use) { | if (task.adaptive_sampling.use) { | ||||
| adaptive_sampling_post(tile, kg); | adaptive_sampling_post(tile, kg); | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 109 Lines • ▼ Show 20 Lines | #else | ||||
| (void)scale; | (void)scale; | ||||
| #endif | #endif | ||||
| } | } | ||||
| void denoise_openimagedenoise(DeviceTask &task, RenderTile &rtile) | void denoise_openimagedenoise(DeviceTask &task, RenderTile &rtile) | ||||
| { | { | ||||
| if (task.type == DeviceTask::DENOISE_BUFFER) { | if (task.type == DeviceTask::DENOISE_BUFFER) { | ||||
| /* Copy pixels from compute device to CPU (no-op for CPU device). */ | /* Copy pixels from compute device to CPU (no-op for CPU device). */ | ||||
| rtile.buffers->buffer.copy_from_device(); | rtile.get_buffers()->get_buffer().copy_from_device(); | ||||
| denoise_openimagedenoise_buffer(task, | denoise_openimagedenoise_buffer(task, | ||||
| (float *)rtile.buffer, | (float *)rtile.get_buffer(), | ||||
| rtile.offset, | rtile.get_offset(), | ||||
| rtile.stride, | rtile.get_stride(), | ||||
| rtile.x, | rtile.get_x(), | ||||
| rtile.y, | rtile.get_y(), | ||||
| rtile.w, | rtile.get_w(), | ||||
| rtile.h, | rtile.get_h(), | ||||
| 1.0f / rtile.sample); | 1.0f / rtile.get_sample()); | ||||
| /* todo: it may be possible to avoid this copy, but we have to ensure that | /* todo: it may be possible to avoid this copy, but we have to ensure that | ||||
| * when other code copies data from the device it doesn't overwrite the | * when other code copies data from the device it doesn't overwrite the | ||||
| * denoiser buffers. */ | * denoiser buffers. */ | ||||
| rtile.buffers->buffer.copy_to_device(); | rtile.get_buffers()->get_buffer().copy_to_device(); | ||||
| } | } | ||||
| else { | else { | ||||
| /* Per-tile denoising. */ | /* Per-tile denoising. */ | ||||
| rtile.sample = rtile.start_sample + rtile.num_samples; | rtile.get_sample() = rtile.get_start_sample() + rtile.get_num_samples(); | ||||
| const float scale = 1.0f / rtile.sample; | const float scale = 1.0f / rtile.get_sample(); | ||||
| const float invscale = rtile.sample; | const float invscale = rtile.get_sample(); | ||||
| const size_t pass_stride = task.pass_stride; | const size_t pass_stride = task.pass_stride; | ||||
| /* Map neighboring tiles into one buffer for denoising. */ | /* Map neighboring tiles into one buffer for denoising. */ | ||||
| RenderTileNeighbors neighbors(rtile); | RenderTileNeighbors neighbors(rtile); | ||||
| task.map_neighbor_tiles(neighbors, this); | task.map_neighbor_tiles(neighbors, this); | ||||
| RenderTile ¢er_tile = neighbors.tiles[RenderTileNeighbors::CENTER]; | RenderTile ¢er_tile = neighbors.get_tiles()[RenderTileNeighbors::CENTER]; | ||||
| rtile = center_tile; | rtile = center_tile; | ||||
| /* Calculate size of the tile to denoise (including overlap). The overlap | /* Calculate size of the tile to denoise (including overlap). The overlap | ||||
| * size was chosen empirically. OpenImageDenoise specifies an overlap size | * size was chosen empirically. OpenImageDenoise specifies an overlap size | ||||
| * of 128 but this is significantly bigger than typical tile size. */ | * of 128 but this is significantly bigger than typical tile size. */ | ||||
| const int4 rect = rect_clip(rect_expand(center_tile.bounds(), 64), neighbors.bounds()); | const int4 rect = rect_clip(rect_expand(center_tile.bounds(), 64), neighbors.bounds()); | ||||
| const int2 rect_size = make_int2(rect.z - rect.x, rect.w - rect.y); | const int2 rect_size = make_int2(rect.z - rect.x, rect.w - rect.y); | ||||
| /* Adjacent tiles are in separate memory regions, copy into single buffer. */ | /* Adjacent tiles are in separate memory regions, copy into single buffer. */ | ||||
| array<float> merged(rect_size.x * rect_size.y * task.pass_stride); | array<float> merged(rect_size.x * rect_size.y * task.pass_stride); | ||||
| for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { | foreach (RenderTile &ntile, neighbors.get_tiles()) { | ||||
| RenderTile &ntile = neighbors.tiles[i]; | if (!ntile.get_buffer()) { | ||||
| if (!ntile.buffer) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| const int xmin = max(ntile.x, rect.x); | const int xmin = max(ntile.get_x(), rect.x); | ||||
| const int ymin = max(ntile.y, rect.y); | const int ymin = max(ntile.get_y(), rect.y); | ||||
| const int xmax = min(ntile.x + ntile.w, rect.z); | const int xmax = min(ntile.get_x() + ntile.get_w(), rect.z); | ||||
| const int ymax = min(ntile.y + ntile.h, rect.w); | const int ymax = min(ntile.get_y() + ntile.get_h(), rect.w); | ||||
| const size_t tile_offset = ntile.offset + xmin + ymin * ntile.stride; | const size_t tile_offset = ntile.get_offset() + xmin + ymin * ntile.get_stride(); | ||||
| const float *tile_buffer = (float *)ntile.buffer + tile_offset * pass_stride; | const float *tile_buffer = (float *)ntile.get_buffer() + tile_offset * pass_stride; | ||||
| const size_t merged_stride = rect_size.x; | const size_t merged_stride = rect_size.x; | ||||
| const size_t merged_offset = (xmin - rect.x) + (ymin - rect.y) * merged_stride; | const size_t merged_offset = (xmin - rect.x) + (ymin - rect.y) * merged_stride; | ||||
| float *merged_buffer = merged.data() + merged_offset * pass_stride; | float *merged_buffer = merged.data() + merged_offset * pass_stride; | ||||
| for (int y = ymin; y < ymax; y++) { | for (int y = ymin; y < ymax; y++) { | ||||
| for (int x = 0; x < pass_stride * (xmax - xmin); x++) { | for (int x = 0; x < pass_stride * (xmax - xmin); x++) { | ||||
| merged_buffer[x] = tile_buffer[x] * scale; | merged_buffer[x] = tile_buffer[x] * scale; | ||||
| } | } | ||||
| tile_buffer += ntile.stride * pass_stride; | tile_buffer += ntile.get_stride() * pass_stride; | ||||
| merged_buffer += merged_stride * pass_stride; | merged_buffer += merged_stride * pass_stride; | ||||
| } | } | ||||
| } | } | ||||
| /* Denoise */ | /* Denoise */ | ||||
| denoise_openimagedenoise_buffer( | denoise_openimagedenoise_buffer( | ||||
| task, merged.data(), 0, rect_size.x, 0, 0, rect_size.x, rect_size.y, 1.0f); | task, merged.data(), 0, rect_size.x, 0, 0, rect_size.x, rect_size.y, 1.0f); | ||||
| /* Copy back result from merged buffer. */ | /* Copy back result from merged buffer. */ | ||||
| RenderTile &ntile = neighbors.target; | RenderTile &ntile = neighbors.get_target(); | ||||
| if (ntile.buffer) { | if (ntile.get_buffer()) { | ||||
| const int xmin = max(ntile.x, rect.x); | const int xmin = max(ntile.get_x(), rect.x); | ||||
| const int ymin = max(ntile.y, rect.y); | const int ymin = max(ntile.get_y(), rect.y); | ||||
| const int xmax = min(ntile.x + ntile.w, rect.z); | const int xmax = min(ntile.get_x() + ntile.get_w(), rect.z); | ||||
| const int ymax = min(ntile.y + ntile.h, rect.w); | const int ymax = min(ntile.get_y() + ntile.get_h(), rect.w); | ||||
| const size_t tile_offset = ntile.offset + xmin + ymin * ntile.stride; | const size_t tile_offset = ntile.get_offset() + xmin + ymin * ntile.get_stride(); | ||||
| float *tile_buffer = (float *)ntile.buffer + tile_offset * pass_stride; | float *tile_buffer = (float *)ntile.get_buffer() + tile_offset * pass_stride; | ||||
| const size_t merged_stride = rect_size.x; | const size_t merged_stride = rect_size.x; | ||||
| const size_t merged_offset = (xmin - rect.x) + (ymin - rect.y) * merged_stride; | const size_t merged_offset = (xmin - rect.x) + (ymin - rect.y) * merged_stride; | ||||
| const float *merged_buffer = merged.data() + merged_offset * pass_stride; | const float *merged_buffer = merged.data() + merged_offset * pass_stride; | ||||
| for (int y = ymin; y < ymax; y++) { | for (int y = ymin; y < ymax; y++) { | ||||
| for (int x = 0; x < pass_stride * (xmax - xmin); x += pass_stride) { | for (int x = 0; x < pass_stride * (xmax - xmin); x += pass_stride) { | ||||
| tile_buffer[x + 0] = merged_buffer[x + 0] * invscale; | tile_buffer[x + 0] = merged_buffer[x + 0] * invscale; | ||||
| tile_buffer[x + 1] = merged_buffer[x + 1] * invscale; | tile_buffer[x + 1] = merged_buffer[x + 1] * invscale; | ||||
| tile_buffer[x + 2] = merged_buffer[x + 2] * invscale; | tile_buffer[x + 2] = merged_buffer[x + 2] * invscale; | ||||
| } | } | ||||
| tile_buffer += ntile.stride * pass_stride; | tile_buffer += ntile.get_stride() * pass_stride; | ||||
| merged_buffer += merged_stride * pass_stride; | merged_buffer += merged_stride * pass_stride; | ||||
| } | } | ||||
| } | } | ||||
| task.unmap_neighbor_tiles(neighbors, this); | task.unmap_neighbor_tiles(neighbors, this); | ||||
| } | } | ||||
| } | } | ||||
| void denoise_nlm(DenoisingTask &denoising, RenderTile &tile) | void denoise_nlm(DenoisingTask &denoising, RenderTile &tile) | ||||
| { | { | ||||
| ProfilingHelper profiling(denoising.profiler, PROFILING_DENOISING); | ProfilingHelper profiling(denoising.profiler, PROFILING_DENOISING); | ||||
| tile.sample = tile.start_sample + tile.num_samples; | tile.get_sample() = tile.get_start_sample() + tile.get_num_samples(); | ||||
| denoising.functions.construct_transform = function_bind( | denoising.functions.construct_transform = function_bind( | ||||
| &CPUDevice::denoising_construct_transform, this, &denoising); | &CPUDevice::denoising_construct_transform, this, &denoising); | ||||
| denoising.functions.accumulate = function_bind( | denoising.functions.accumulate = function_bind( | ||||
| &CPUDevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising); | &CPUDevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising); | ||||
| denoising.functions.solve = function_bind(&CPUDevice::denoising_solve, this, _1, &denoising); | denoising.functions.solve = function_bind(&CPUDevice::denoising_solve, this, _1, &denoising); | ||||
| denoising.functions.divide_shadow = function_bind( | denoising.functions.divide_shadow = function_bind( | ||||
| &CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); | &CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); | ||||
| denoising.functions.non_local_means = function_bind( | denoising.functions.non_local_means = function_bind( | ||||
| &CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); | &CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); | ||||
| denoising.functions.combine_halves = function_bind( | denoising.functions.combine_halves = function_bind( | ||||
| &CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); | &CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); | ||||
| denoising.functions.get_feature = function_bind( | denoising.functions.get_feature = function_bind( | ||||
| &CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising); | &CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising); | ||||
| denoising.functions.write_feature = function_bind( | denoising.functions.write_feature = function_bind( | ||||
| &CPUDevice::denoising_write_feature, this, _1, _2, _3, &denoising); | &CPUDevice::denoising_write_feature, this, _1, _2, _3, &denoising); | ||||
| denoising.functions.detect_outliers = function_bind( | denoising.functions.detect_outliers = function_bind( | ||||
| &CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising); | &CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising); | ||||
| denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h); | denoising.filter_area = make_int4(tile.get_x(), tile.get_y(), tile.get_w(), tile.get_h()); | ||||
| denoising.render_buffer.samples = tile.sample; | denoising.render_buffer.samples = tile.get_sample(); | ||||
| denoising.buffer.gpu_temporary_mem = false; | denoising.buffer.gpu_temporary_mem = false; | ||||
| denoising.run_denoising(tile); | denoising.run_denoising(tile); | ||||
| } | } | ||||
| void thread_render(DeviceTask &task) | void thread_render(DeviceTask &task) | ||||
| { | { | ||||
| if (task_pool.canceled()) { | if (task_pool.canceled()) { | ||||
| Show All 33 Lines | if ((tile_types & RenderTile::DENOISE) && task.denoising.type == DENOISER_OPENIMAGEDENOISE) { | ||||
| if (!oidn_task_lock.try_lock()) { | if (!oidn_task_lock.try_lock()) { | ||||
| tile_types &= ~RenderTile::DENOISE; | tile_types &= ~RenderTile::DENOISE; | ||||
| hold_denoise_lock = true; | hold_denoise_lock = true; | ||||
| } | } | ||||
| } | } | ||||
| RenderTile tile; | RenderTile tile; | ||||
| while (task.acquire_tile(this, tile, tile_types)) { | while (task.acquire_tile(this, tile, tile_types)) { | ||||
| if (tile.task == RenderTile::PATH_TRACE) { | if (tile.get_task() == RenderTile::PATH_TRACE) { | ||||
| if (use_split_kernel) { | if (use_split_kernel) { | ||||
| device_only_memory<uchar> void_buffer(this, "void_buffer"); | device_only_memory<uchar> void_buffer(this, "void_buffer"); | ||||
| split_kernel->path_trace(task, tile, kgbuffer, void_buffer); | split_kernel->path_trace(task, tile, kgbuffer, void_buffer); | ||||
| } | } | ||||
| else { | else { | ||||
| render(task, tile, kg); | render(task, tile, kg); | ||||
| } | } | ||||
| } | } | ||||
| else if (tile.task == RenderTile::BAKE) { | else if (tile.get_task() == RenderTile::BAKE) { | ||||
| render(task, tile, kg); | render(task, tile, kg); | ||||
| } | } | ||||
| else if (tile.task == RenderTile::DENOISE) { | else if (tile.get_task() == RenderTile::DENOISE) { | ||||
| if (task.denoising.type == DENOISER_OPENIMAGEDENOISE) { | if (task.denoising.type == DENOISER_OPENIMAGEDENOISE) { | ||||
| denoise_openimagedenoise(task, tile); | denoise_openimagedenoise(task, tile); | ||||
| } | } | ||||
| else if (task.denoising.type == DENOISER_NLM) { | else if (task.denoising.type == DENOISER_NLM) { | ||||
| if (denoising == NULL) { | if (denoising == NULL) { | ||||
| denoising = new DenoisingTask(this, task); | denoising = new DenoisingTask(this, task); | ||||
| denoising->profiler = &kg->profiler; | denoising->profiler = &kg->profiler; | ||||
| } | } | ||||
| denoise_nlm(*denoising, tile); | denoise_nlm(*denoising, tile); | ||||
| } | } | ||||
| task.update_progress(&tile, tile.w * tile.h); | task.update_progress(&tile, tile.get_w() * tile.get_h()); | ||||
| } | } | ||||
| task.release_tile(tile); | task.release_tile(tile); | ||||
| if (task_pool.canceled()) { | if (task_pool.canceled()) { | ||||
| if (task.need_finish_queue == false) | if (task.need_finish_queue == false) | ||||
| break; | break; | ||||
| } | } | ||||
| Show All 9 Lines | void thread_render(DeviceTask &task) | ||||
| kg->~KernelGlobals(); | kg->~KernelGlobals(); | ||||
| kgbuffer.free(); | kgbuffer.free(); | ||||
| delete split_kernel; | delete split_kernel; | ||||
| delete denoising; | delete denoising; | ||||
| } | } | ||||
| void thread_denoise(DeviceTask &task) | void thread_denoise(DeviceTask &task) | ||||
| { | { | ||||
| RenderTile tile; | RenderTile tile = RenderTile::from_device_task(task, true); | ||||
| tile.x = task.x; | |||||
| tile.y = task.y; | |||||
| tile.w = task.w; | |||||
| tile.h = task.h; | |||||
| tile.buffer = task.buffer; | |||||
| tile.sample = task.sample + task.num_samples; | |||||
| tile.num_samples = task.num_samples; | |||||
| tile.start_sample = task.sample; | |||||
| tile.offset = task.offset; | |||||
| tile.stride = task.stride; | |||||
| tile.buffers = task.buffers; | |||||
| if (task.denoising.type == DENOISER_OPENIMAGEDENOISE) { | if (task.denoising.type == DENOISER_OPENIMAGEDENOISE) { | ||||
| denoise_openimagedenoise(task, tile); | denoise_openimagedenoise(task, tile); | ||||
| } | } | ||||
| else { | else { | ||||
| DenoisingTask denoising(this, task); | DenoisingTask denoising(this, task); | ||||
| ProfilingState denoising_profiler_state; | ProfilingState denoising_profiler_state; | ||||
| profiler.add_state(&denoising_profiler_state); | profiler.add_state(&denoising_profiler_state); | ||||
| denoising.profiler = &denoising_profiler_state; | denoising.profiler = &denoising_profiler_state; | ||||
| denoise_nlm(denoising, tile); | denoise_nlm(denoising, tile); | ||||
| profiler.remove_state(&denoising_profiler_state); | profiler.remove_state(&denoising_profiler_state); | ||||
| } | } | ||||
| task.update_progress(&tile, tile.w * tile.h); | task.update_progress(&tile, tile.get_w() * tile.get_h()); | ||||
| } | } | ||||
| void thread_film_convert(DeviceTask &task) | void thread_film_convert(DeviceTask &task) | ||||
| { | { | ||||
| float sample_scale = 1.0f / (task.sample + 1); | float sample_scale = 1.0f / (task.sample + 1); | ||||
| if (task.rgba_half) { | if (task.rgba_half) { | ||||
| for (int y = task.y; y < task.y + task.h; y++) | for (int y = task.y; y < task.y + task.h; y++) | ||||
| ▲ Show 20 Lines • Show All 198 Lines • ▼ Show 20 Lines | for (int y = 0; y < dim.global_size[1]; y++) { | ||||
| for (int x = 0; x < dim.global_size[0]; x++) { | for (int x = 0; x < dim.global_size[0]; x++) { | ||||
| kg->global_id = make_int2(x, y); | kg->global_id = make_int2(x, y); | ||||
| device->data_init_kernel()((KernelGlobals *)kernel_globals.device_pointer, | device->data_init_kernel()((KernelGlobals *)kernel_globals.device_pointer, | ||||
| (KernelData *)data.device_pointer, | (KernelData *)data.device_pointer, | ||||
| (void *)split_data.device_pointer, | (void *)split_data.device_pointer, | ||||
| num_global_elements, | num_global_elements, | ||||
| (char *)ray_state.device_pointer, | (char *)ray_state.device_pointer, | ||||
| rtile.start_sample, | rtile.get_start_sample(), | ||||
| rtile.start_sample + rtile.num_samples, | rtile.get_start_sample() + rtile.get_num_samples(), | ||||
| rtile.x, | rtile.get_x(), | ||||
| rtile.y, | rtile.get_y(), | ||||
| rtile.w, | rtile.get_w(), | ||||
| rtile.h, | rtile.get_h(), | ||||
| rtile.offset, | rtile.get_offset(), | ||||
| rtile.stride, | rtile.get_stride(), | ||||
| (int *)queue_index.device_pointer, | (int *)queue_index.device_pointer, | ||||
| dim.global_size[0] * dim.global_size[1], | dim.global_size[0] * dim.global_size[1], | ||||
| (char *)use_queues_flags.device_pointer, | (char *)use_queues_flags.device_pointer, | ||||
| (uint *)work_pool_wgs.device_pointer, | (uint *)work_pool_wgs.device_pointer, | ||||
| rtile.num_samples, | rtile.get_num_samples(), | ||||
| (float *)rtile.buffer); | (float *)rtile.get_buffer()); | ||||
| } | } | ||||
| } | } | ||||
| return true; | return true; | ||||
| } | } | ||||
| SplitKernelFunction *CPUSplitKernel::get_split_kernel_function(const string &kernel_name, | SplitKernelFunction *CPUSplitKernel::get_split_kernel_function(const string &kernel_name, | ||||
| const DeviceRequestedFeatures &) | const DeviceRequestedFeatures &) | ||||
| ▲ Show 20 Lines • Show All 73 Lines • Show Last 20 Lines | |||||