Changeset View
Standalone View
intern/cycles/render/bake.cpp
| Show First 20 Lines • Show All 115 Lines • ▼ Show 20 Lines | |||||
| BakeData *BakeManager::init(const int object, const int tri_offset, const int num_pixels) | BakeData *BakeManager::init(const int object, const int tri_offset, const int num_pixels) | ||||
| { | { | ||||
| m_bake_data = new BakeData(object, tri_offset, num_pixels); | m_bake_data = new BakeData(object, tri_offset, num_pixels); | ||||
| return m_bake_data; | return m_bake_data; | ||||
| } | } | ||||
| bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress, ShaderEvalType shader_type, BakeData *bake_data, float result[]) | bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress, ShaderEvalType shader_type, BakeData *bake_data, float result[]) | ||||
| { | { | ||||
| size_t limit = bake_data->size(); | size_t num_pixels = bake_data->size(); | ||||
| size_t shader_limit = 3072 * 3072; | |||||
sergey: A bit arbitrary it seems, where are the numbers came from?
Just throwing ideas:
- Use tile… | |||||
dfelintoAuthorUnsubmitted Not Done Inline ActionsThat was just to confirm that 3k * 3k would render while 4k * 4k would not. To use the size from somewhere else is the way to go indeed. dfelinto: That was just to confirm that 3k * 3k would render while 4k * 4k would not. To use the size… | |||||
| /* if CPU can use higher limit */ | |||||
| for(size_t shader_offset=0; shader_offset < num_pixels; shader_offset += shader_limit) { | |||||
sergeyUnsubmitted Not Done Inline ActionsSpaces around operators, same applies to some cases below. Also, it's nice readability currently, but seems you need to indent the whole cycle body. sergey: Spaces around operators, same applies to some cases below.
Also, it's nice readability… | |||||
dfelintoAuthorUnsubmitted Not Done Inline Actionsit is indented, but phabricator doesn't show those code changes. dfelinto: it is indented, but phabricator doesn't show those code changes. | |||||
| size_t shader_size = fminf(num_pixels - shader_offset, shader_limit); | |||||
| /* setup input for device task */ | /* setup input for device task */ | ||||
| device_vector<uint4> d_input; | device_vector<uint4> d_input; | ||||
| uint4 *d_input_data = d_input.resize(limit * 2); | uint4 *d_input_data = d_input.resize(shader_size * 2); | ||||
| size_t d_input_size = 0; | size_t d_input_size = 0; | ||||
Not Done Inline ActionsWhy not to keep it where it used to be and avoid having rather obscure cycle? Don't really think you'll notice non-linearity in the progress. sergey: Why not to keep it where it used to be and avoid having rather obscure cycle? Don't really… | |||||
| for(size_t i = 0; i < limit; i++) { | for(size_t i=shader_offset; i < (shader_offset + shader_size); i++) { | ||||
| d_input_data[d_input_size++] = bake_data->data(i); | d_input_data[d_input_size++] = bake_data->data(i); | ||||
| d_input_data[d_input_size++] = bake_data->differentials(i); | d_input_data[d_input_size++] = bake_data->differentials(i); | ||||
| } | } | ||||
| if(d_input_size == 0) | if(d_input_size == 0) { | ||||
| m_is_baking = false; | |||||
| return false; | return false; | ||||
| } | |||||
| /* run device task */ | /* run device task */ | ||||
| device_vector<float4> d_output; | device_vector<float4> d_output; | ||||
| d_output.resize(limit); | d_output.resize(shader_size); | ||||
| /* needs to be up to data for attribute access */ | /* needs to be up to data for attribute access */ | ||||
| device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); | device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); | ||||
| device->mem_alloc(d_input, MEM_READ_ONLY); | device->mem_alloc(d_input, MEM_READ_ONLY); | ||||
| device->mem_copy_to(d_input); | device->mem_copy_to(d_input); | ||||
| device->mem_alloc(d_output, MEM_WRITE_ONLY); | device->mem_alloc(d_output, MEM_WRITE_ONLY); | ||||
| DeviceTask task(DeviceTask::SHADER); | DeviceTask task(DeviceTask::SHADER); | ||||
| task.shader_input = d_input.device_pointer; | task.shader_input = d_input.device_pointer; | ||||
| task.shader_output = d_output.device_pointer; | task.shader_output = d_output.device_pointer; | ||||
| task.shader_eval_type = shader_type; | task.shader_eval_type = shader_type; | ||||
| task.shader_x = 0; | task.shader_x = shader_offset; | ||||
| task.shader_w = d_output.size(); | task.shader_w = d_output.size(); | ||||
| task.num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples: 1; | task.num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples: 1; | ||||
| task.get_cancel = function_bind(&Progress::get_cancel, &progress); | task.get_cancel = function_bind(&Progress::get_cancel, &progress); | ||||
| task.update_progress_sample = function_bind(&Progress::increment_sample_update, &progress); | task.update_progress_sample = function_bind(&Progress::increment_sample_update, &progress); | ||||
| this->num_parts = device->get_split_task_count(task); | this->num_parts = device->get_split_task_count(task); | ||||
| this->num_samples = task.num_samples; | this->num_samples = task.num_samples; | ||||
| progress.reset_sample(); | |||||
sergeyUnsubmitted Not Done Inline ActionsDoes it mean progress bar will go from 0 to 1 for every "tile" ? sergey: Does it mean progress bar will go from 0 to 1 for every "tile" ? | |||||
dfelintoAuthorUnsubmitted Not Done Inline ActionsYes it does, and it's the main issue to be handled before this patch is to be considered for real. One thing that could work is to get the total count (all shader parts times their respective num_tasks) before starting the loop. dfelinto: Yes it does, and it's the main issue to be handled before this patch is to be considered for… | |||||
sergeyUnsubmitted Not Done Inline ActionsWould it work if we implement splitting in the add_task for shader jobs in CUDA device? sergey: Would it work if we implement splitting in the add_task for shader jobs in CUDA device? | |||||
dfelintoAuthorUnsubmitted Not Done Inline ActionsThat would be the ideal solution. This patch was actually to illustrate that. Though since I have no CUDA at hand (nor I'm very acknowledged in CUDA coding) I took this proof of concept approach. dfelinto: That would be the ideal solution. This patch was actually to illustrate that. Though since I… | |||||
| device->task_add(task); | device->task_add(task); | ||||
| device->task_wait(); | device->task_wait(); | ||||
| if(progress.get_cancel()) { | if(progress.get_cancel()) { | ||||
| device->mem_free(d_input); | device->mem_free(d_input); | ||||
| device->mem_free(d_output); | device->mem_free(d_output); | ||||
| m_is_baking = false; | m_is_baking = false; | ||||
| return false; | return false; | ||||
| } | } | ||||
| device->mem_copy_from(d_output, 0, 1, d_output.size(), sizeof(float4)); | device->mem_copy_from(d_output, 0, 1, d_output.size(), sizeof(float4)); | ||||
| device->mem_free(d_input); | device->mem_free(d_input); | ||||
| device->mem_free(d_output); | device->mem_free(d_output); | ||||
| /* read result */ | /* read result */ | ||||
| int k = 0; | int k = 0; | ||||
| float4 *offset = (float4*)d_output.data_pointer; | float4 *offset = (float4*)d_output.data_pointer; | ||||
| size_t depth = 4; | size_t depth = 4; | ||||
| for(size_t i = 0; i < limit; i++) { | for(size_t i=shader_offset; i < (shader_offset + shader_size); i++) { | ||||
| size_t index = i * depth; | size_t index = i * depth; | ||||
| float4 out = offset[k++]; | float4 out = offset[k++]; | ||||
| if(bake_data->is_valid(i)) { | if(bake_data->is_valid(i)) { | ||||
| for(size_t j=0; j < 4; j++) { | for(size_t j=0; j < 4; j++) { | ||||
| result[index + j] = out[j]; | result[index + j] = out[j]; | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | |||||
| m_is_baking = false; | m_is_baking = false; | ||||
| return true; | return true; | ||||
| } | } | ||||
| void BakeManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) | void BakeManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) | ||||
| { | { | ||||
| if(!need_update) | if(!need_update) | ||||
| ▲ Show 20 Lines • Show All 43 Lines • Show Last 20 Lines | |||||
A bit arbitrary it seems, where are the numbers came from?
Just throwing ideas: