Changeset View
Standalone View
intern/cycles/render/bake.cpp
| Show First 20 Lines • Show All 89 Lines • ▼ Show 20 Lines | return make_uint4( | ||||
| ); | ); | ||||
| } | } | ||||
| BakeManager::BakeManager() | BakeManager::BakeManager() | ||||
| { | { | ||||
| m_bake_data = NULL; | m_bake_data = NULL; | ||||
| m_is_baking = false; | m_is_baking = false; | ||||
| need_update = true; | need_update = true; | ||||
| m_shader_limit = 512 * 512; | |||||
| } | } | ||||
| BakeManager::~BakeManager() | BakeManager::~BakeManager() | ||||
| { | { | ||||
| if(m_bake_data) | if(m_bake_data) | ||||
| delete m_bake_data; | delete m_bake_data; | ||||
| } | } | ||||
| bool BakeManager::get_baking() | bool BakeManager::get_baking() | ||||
| { | { | ||||
| return m_is_baking; | return m_is_baking; | ||||
| } | } | ||||
| void BakeManager::set_baking(const bool value) | void BakeManager::set_baking(const bool value) | ||||
| { | { | ||||
| m_is_baking = value; | m_is_baking = value; | ||||
| } | } | ||||
| BakeData *BakeManager::init(const int object, const size_t tri_offset, const size_t num_pixels) | BakeData *BakeManager::init(const int object, const size_t tri_offset, const size_t num_pixels) | ||||
| { | { | ||||
| m_bake_data = new BakeData(object, tri_offset, num_pixels); | m_bake_data = new BakeData(object, tri_offset, num_pixels); | ||||
| return m_bake_data; | return m_bake_data; | ||||
| } | } | ||||
| void BakeManager::set_shader_limit(const size_t x, const size_t y) | |||||
| { | |||||
| m_shader_limit = x * y; | |||||
| m_shader_limit = pow(2, ceil(log(m_shader_limit)/log(2))); | |||||
| } | |||||
| bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress, ShaderEvalType shader_type, BakeData *bake_data, float result[]) | bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress, ShaderEvalType shader_type, BakeData *bake_data, float result[]) | ||||
| { | { | ||||
| size_t limit = bake_data->size(); | size_t num_pixels = bake_data->size(); | ||||
sergey: A bit arbitrary it seems, where are the numbers came from?
Just throwing ideas:
- Use tile… | |||||
Not Done Inline ActionsThat was just to confirm that 3k * 3k would render while 4k * 4k would not. To use the size from somewhere else is the way to go indeed. dfelinto: That was just to confirm that 3k * 3k would render while 4k * 4k would not. To use the size… | |||||
| progress.reset_sample(); | |||||
| this->num_parts = 0; | |||||
Not Done Inline ActionsSpaces around operators, same applies to some cases below. Also, it's nice readability currently, but seems you need to indent the whole cycle body. sergey: Spaces around operators, same applies to some cases below.
Also, it's nice readability… | |||||
Not Done Inline Actionsit is indented, but phabricator doesn't show those code changes. dfelinto: it is indented, but phabricator doesn't show those code changes. | |||||
| /* calculate the total parts for the progress bar */ | |||||
| for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) { | |||||
| size_t shader_size = fminf(num_pixels - shader_offset, m_shader_limit); | |||||
| DeviceTask task(DeviceTask::SHADER); | |||||
| task.shader_w = shader_size; | |||||
| this->num_parts += device->get_split_task_count(task); | |||||
| } | |||||
Not Done Inline ActionsWhy not to keep it where it used to be and avoid having rather obscure cycle? Don't really think you'll notice non-linearity in the progress. sergey: Why not to keep it where it used to be and avoid having rather obscure cycle? Don't really… | |||||
| this->num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1; | |||||
| for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) { | |||||
| size_t shader_size = fminf(num_pixels - shader_offset, m_shader_limit); | |||||
| /* setup input for device task */ | /* setup input for device task */ | ||||
| device_vector<uint4> d_input; | device_vector<uint4> d_input; | ||||
| uint4 *d_input_data = d_input.resize(limit * 2); | uint4 *d_input_data = d_input.resize(shader_size * 2); | ||||
| size_t d_input_size = 0; | size_t d_input_size = 0; | ||||
| for(size_t i = 0; i < limit; i++) { | for(size_t i = shader_offset; i < (shader_offset + shader_size); i++) { | ||||
| d_input_data[d_input_size++] = bake_data->data(i); | d_input_data[d_input_size++] = bake_data->data(i); | ||||
| d_input_data[d_input_size++] = bake_data->differentials(i); | d_input_data[d_input_size++] = bake_data->differentials(i); | ||||
| } | } | ||||
| if(d_input_size == 0) | if(d_input_size == 0) { | ||||
| m_is_baking = false; | |||||
| return false; | return false; | ||||
| } | |||||
| /* run device task */ | /* run device task */ | ||||
| device_vector<float4> d_output; | device_vector<float4> d_output; | ||||
| d_output.resize(limit); | d_output.resize(shader_size); | ||||
| /* needs to be up to data for attribute access */ | /* needs to be up to data for attribute access */ | ||||
| device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); | device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); | ||||
| device->mem_alloc(d_input, MEM_READ_ONLY); | device->mem_alloc(d_input, MEM_READ_ONLY); | ||||
| device->mem_copy_to(d_input); | device->mem_copy_to(d_input); | ||||
| device->mem_alloc(d_output, MEM_WRITE_ONLY); | device->mem_alloc(d_output, MEM_WRITE_ONLY); | ||||
| DeviceTask task(DeviceTask::SHADER); | DeviceTask task(DeviceTask::SHADER); | ||||
| task.shader_input = d_input.device_pointer; | task.shader_input = d_input.device_pointer; | ||||
| task.shader_output = d_output.device_pointer; | task.shader_output = d_output.device_pointer; | ||||
| task.shader_eval_type = shader_type; | task.shader_eval_type = shader_type; | ||||
| task.shader_x = 0; | task.shader_x = 0; | ||||
| task.shader_w = d_output.size(); | task.shader_w = d_output.size(); | ||||
| task.num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples: 1; | task.num_samples = this->num_samples; | ||||
| task.get_cancel = function_bind(&Progress::get_cancel, &progress); | task.get_cancel = function_bind(&Progress::get_cancel, &progress); | ||||
| task.update_progress_sample = function_bind(&Progress::increment_sample_update, &progress); | task.update_progress_sample = function_bind(&Progress::increment_sample_update, &progress); | ||||
Not Done Inline ActionsDoes it mean progress bar will go from 0 to 1 for every "tile" ? sergey: Does it mean progress bar will go from 0 to 1 for every "tile" ? | |||||
Not Done Inline ActionsYes it does, and it's the main issue to be handled before this patch is to be considered for real. One thing that could work is to get the total count (all shader parts times their respective num_tasks) before starting the loop. dfelinto: Yes it does, and it's the main issue to be handled before this patch is to be considered for… | |||||
Not Done Inline ActionsWould it work if we implement splitting in the add_task for shader jobs in CUDA device? sergey: Would it work if we implement splitting in the add_task for shader jobs in CUDA device? | |||||
Not Done Inline ActionsThat would be the ideal solution. This patch was actually to illustrate that. Though since I have no CUDA at hand (nor I'm very acknowledged in CUDA coding) I took this proof of concept approach. dfelinto: That would be the ideal solution. This patch was actually to illustrate that. Though since I… | |||||
| this->num_parts = device->get_split_task_count(task); | |||||
| this->num_samples = task.num_samples; | |||||
| device->task_add(task); | device->task_add(task); | ||||
| device->task_wait(); | device->task_wait(); | ||||
| if(progress.get_cancel()) { | if(progress.get_cancel()) { | ||||
| device->mem_free(d_input); | device->mem_free(d_input); | ||||
| device->mem_free(d_output); | device->mem_free(d_output); | ||||
| m_is_baking = false; | m_is_baking = false; | ||||
| return false; | return false; | ||||
| } | } | ||||
| device->mem_copy_from(d_output, 0, 1, d_output.size(), sizeof(float4)); | device->mem_copy_from(d_output, 0, 1, d_output.size(), sizeof(float4)); | ||||
| device->mem_free(d_input); | device->mem_free(d_input); | ||||
| device->mem_free(d_output); | device->mem_free(d_output); | ||||
| /* read result */ | /* read result */ | ||||
| int k = 0; | int k = 0; | ||||
| float4 *offset = (float4*)d_output.data_pointer; | float4 *offset = (float4*)d_output.data_pointer; | ||||
| size_t depth = 4; | size_t depth = 4; | ||||
| for(size_t i = 0; i < limit; i++) { | for(size_t i=shader_offset; i < (shader_offset + shader_size); i++) { | ||||
| size_t index = i * depth; | size_t index = i * depth; | ||||
| float4 out = offset[k++]; | float4 out = offset[k++]; | ||||
| if(bake_data->is_valid(i)) { | if(bake_data->is_valid(i)) { | ||||
| for(size_t j=0; j < 4; j++) { | for(size_t j=0; j < 4; j++) { | ||||
| result[index + j] = out[j]; | result[index + j] = out[j]; | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | |||||
| m_is_baking = false; | m_is_baking = false; | ||||
| return true; | return true; | ||||
| } | } | ||||
| void BakeManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) | void BakeManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) | ||||
| { | { | ||||
| if(!need_update) | if(!need_update) | ||||
| ▲ Show 20 Lines • Show All 43 Lines • Show Last 20 Lines | |||||
A bit arbitrary it seems, where are the numbers came from?
Just throwing ideas: