Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/kernel_volume.h
| Show First 20 Lines • Show All 621 Lines • ▼ Show 20 Lines | ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state, | ||||
| int max_steps; | int max_steps; | ||||
| float step_size, random_jitter_offset; | float step_size, random_jitter_offset; | ||||
| if(heterogeneous) { | if(heterogeneous) { | ||||
| const int global_max_steps = kernel_data.integrator.volume_max_steps; | const int global_max_steps = kernel_data.integrator.volume_max_steps; | ||||
| step_size = kernel_data.integrator.volume_step_size; | step_size = kernel_data.integrator.volume_step_size; | ||||
| /* compute exact steps in advance for malloc */ | /* compute exact steps in advance for malloc */ | ||||
| max_steps = max((int)ceilf(ray->t/step_size), 1); | max_steps = max((int)ceilf(ray->t/step_size), 1); | ||||
| /* NOTE: For the branched path tracing it's possible to have direct | |||||
| * and indirect light integration both having volume segments allocated. | |||||
| * We detect this using index in the pre-allocated memory. Currently we | |||||
| * only support two segments allocated at a time, if more needed some | |||||
| * modifications to the KernelGlobals will be needed. | |||||
| * | |||||
| * This gives us restrictions that decoupled record should only happen | |||||
| * in the stack manner, meaning if there's subsequent call of decoupled | |||||
| * record it'll need to free memory before it's caller frees memory. | |||||
| */ | |||||
| const int index = kg->decoupled_volume_steps_index; | |||||
| assert(index < sizeof(kg->decoupled_volume_steps) / | |||||
| sizeof(*kg->decoupled_volume_steps)); | |||||
| if(max_steps > global_max_steps) { | if(max_steps > global_max_steps) { | ||||
| max_steps = global_max_steps; | max_steps = global_max_steps; | ||||
| step_size = ray->t / (float)max_steps; | step_size = ray->t / (float)max_steps; | ||||
| } | } | ||||
| segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps); | if(kg->decoupled_volume_steps[index] == NULL) { | ||||
| kg->decoupled_volume_steps[index] = | |||||
| (VolumeStep*)malloc(sizeof(VolumeStep)*global_max_steps); | |||||
| } | |||||
| segment->steps = kg->decoupled_volume_steps[index]; | |||||
| random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size; | random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size; | ||||
| ++kg->decoupled_volume_steps_index; | |||||
brecht: In this case `global_max_steps` is fixed, so that should be ok.
For branched path tracing… | |||||
Not Done Inline ActionsWill do. sergey: Will do. | |||||
Not Done Inline ActionsActually I think it will be a problem with branched path tracing. In kernel_path_indirect it deallocates the array before doing the volume bounce, but you still use the array from the first and second bounces at the same time. It's just limited to 2 at the same time max. brecht: Actually I think it will be a problem with branched path tracing. In `kernel_path_indirect` it… | |||||
Not Done Inline ActionsWill need to have a closer look (hopefully tomorrow, not entirely sure what's exact first and second bounce you're refferring to. But if it's just two bounces to keep at a time we can store two pointers and bitmask of some sort to see what arrays are free. Should still be cheaper than doing alloc/free for each of integrations. Gimme some time to go over the corners in details tho.. sergey: Will need to have a closer look (hopefully tomorrow, not entirely sure what's exact first and… | |||||
| } | } | ||||
| else { | else { | ||||
| max_steps = 1; | max_steps = 1; | ||||
| step_size = ray->t; | step_size = ray->t; | ||||
| random_jitter_offset = 0.0f; | random_jitter_offset = 0.0f; | ||||
| segment->steps = &segment->stack_step; | segment->steps = &segment->stack_step; | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 96 Lines • ▼ Show 20 Lines | if(!is_zero(last_step->cdf_distance)) { | ||||
| for(int i = 0; i < numsteps; i++, step++) | for(int i = 0; i < numsteps; i++, step++) | ||||
| step->cdf_distance *= inv_cdf_distance_sum; | step->cdf_distance *= inv_cdf_distance_sum; | ||||
| } | } | ||||
| } | } | ||||
| ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment) | ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment) | ||||
| { | { | ||||
| if(segment->steps != &segment->stack_step) | if(segment->steps != &segment->stack_step) { | ||||
| free(segment->steps); | /* NOTE: We only allow free last allocated segment. | ||||
| * No random order of alloc/free is supported. | |||||
| */ | |||||
| assert(kg->decoupled_volume_steps_index > 0); | |||||
| assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]); | |||||
| --kg->decoupled_volume_steps_index; | |||||
| } | |||||
| } | } | ||||
| /* scattering for homogeneous and heterogeneous volumes, using decoupled ray | /* scattering for homogeneous and heterogeneous volumes, using decoupled ray | ||||
| * marching. | * marching. | ||||
| * | * | ||||
| * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */ | * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */ | ||||
| ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( | ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( | ||||
| KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, | KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, | ||||
| ▲ Show 20 Lines • Show All 452 Lines • Show Last 20 Lines | |||||
In this case global_max_steps is fixed, so that should be ok.
For branched path tracing, this relies on the fact that kernel_path_indirect does not use decoupled shading, so we never need two such arrays in memory at once. That's fine, but might be good to add a comment about that.