Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/device_optix.cpp
| Show First 20 Lines • Show All 191 Lines • ▼ Show 20 Lines | # endif | ||||
| OptixDenoiser denoiser = NULL; | OptixDenoiser denoiser = NULL; | ||||
| device_only_memory<unsigned char> denoiser_state; | device_only_memory<unsigned char> denoiser_state; | ||||
| int denoiser_input_passes = 0; | int denoiser_input_passes = 0; | ||||
| public: | public: | ||||
| OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_) | OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_) | ||||
| : CUDADevice(info_, stats_, profiler_, background_), | : CUDADevice(info_, stats_, profiler_, background_), | ||||
| sbt_data(this, "__sbt", MEM_READ_ONLY), | sbt_data(this, "__sbt", MEM_READ_ONLY), | ||||
| launch_params(this, "__params"), | launch_params(this, "__params", MEM_READ_ONLY), | ||||
| denoiser_state(this, "__denoiser_state") | denoiser_state(this, "__denoiser_state", MEM_READ_WRITE) | ||||
| { | { | ||||
| // Store number of CUDA streams in device info | // Store number of CUDA streams in device info | ||||
| info.cpu_threads = DebugFlags().optix.cuda_streams; | info.cpu_threads = DebugFlags().optix.cuda_streams; | ||||
| // Make the CUDA context current | // Make the CUDA context current | ||||
| if (!cuContext) { | if (!cuContext) { | ||||
| return; // Do not initialize if CUDA context creation failed already | return; // Do not initialize if CUDA context creation failed already | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 663 Lines • ▼ Show 20 Lines | if (task.denoising.type == DENOISER_OPTIX) { | ||||
| (task.pass_denoising_data + DENOISING_PASS_ALBEDO) * (int)sizeof(float), | (task.pass_denoising_data + DENOISING_PASS_ALBEDO) * (int)sizeof(float), | ||||
| (task.pass_denoising_data + DENOISING_PASS_NORMAL) * (int)sizeof(float)}; | (task.pass_denoising_data + DENOISING_PASS_NORMAL) * (int)sizeof(float)}; | ||||
| // Start with the current tile pointer offset | // Start with the current tile pointer offset | ||||
| int input_stride = pixel_stride; | int input_stride = pixel_stride; | ||||
| device_ptr input_ptr = rtile.buffer + pixel_offset; | device_ptr input_ptr = rtile.buffer + pixel_offset; | ||||
| // Copy tile data into a common buffer if necessary | // Copy tile data into a common buffer if necessary | ||||
| device_only_memory<float> input(this, "denoiser input"); | device_only_memory<float> input(this, "denoiser input", MEM_READ_WRITE); | ||||
| device_vector<TileInfo> tile_info_mem(this, "denoiser tile info", MEM_READ_WRITE); | device_vector<TileInfo> tile_info_mem(this, "denoiser tile info", MEM_READ_ONLY); | ||||
| bool contiguous_memory = true; | bool contiguous_memory = true; | ||||
| for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { | for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { | ||||
| if (neighbors.tiles[i].buffer && neighbors.tiles[i].buffer != rtile.buffer) { | if (neighbors.tiles[i].buffer && neighbors.tiles[i].buffer != rtile.buffer) { | ||||
| contiguous_memory = false; | contiguous_memory = false; | ||||
| } | } | ||||
| } | } | ||||
| Show All 28 Lines | if (task.denoising.type == DENOISER_OPTIX) { | ||||
| tile_info_mem.copy_to_device(); | tile_info_mem.copy_to_device(); | ||||
| void *args[] = { | void *args[] = { | ||||
| &input.device_pointer, &tile_info_mem.device_pointer, &rect.x, &task.pass_stride}; | &input.device_pointer, &tile_info_mem.device_pointer, &rect.x, &task.pass_stride}; | ||||
| launch_filter_kernel("kernel_cuda_filter_copy_input", rect_size.x, rect_size.y, args); | launch_filter_kernel("kernel_cuda_filter_copy_input", rect_size.x, rect_size.y, args); | ||||
| } | } | ||||
| # if OPTIX_DENOISER_NO_PIXEL_STRIDE | # if OPTIX_DENOISER_NO_PIXEL_STRIDE | ||||
| device_only_memory<float> input_rgb(this, "denoiser input rgb"); | device_only_memory<float> input_rgb(this, "denoiser input rgb", MEM_READ_WRITE); | ||||
| input_rgb.alloc_to_device(rect_size.x * rect_size.y * 3 * task.denoising.input_passes); | input_rgb.alloc_to_device(rect_size.x * rect_size.y * 3 * task.denoising.input_passes); | ||||
| void *input_args[] = {&input_rgb.device_pointer, | void *input_args[] = {&input_rgb.device_pointer, | ||||
| &input_ptr, | &input_ptr, | ||||
| &rect_size.x, | &rect_size.x, | ||||
| &rect_size.y, | &rect_size.y, | ||||
| &input_stride, | &input_stride, | ||||
| &task.pass_stride, | &task.pass_stride, | ||||
| ▲ Show 20 Lines • Show All 229 Lines • ▼ Show 20 Lines | bool build_optix_bvh(BVHOptiX *bvh, | ||||
| options.motionOptions.flags = OPTIX_MOTION_FLAG_START_VANISH | OPTIX_MOTION_FLAG_END_VANISH; | options.motionOptions.flags = OPTIX_MOTION_FLAG_START_VANISH | OPTIX_MOTION_FLAG_END_VANISH; | ||||
| options.motionOptions.timeBegin = 0.0f; | options.motionOptions.timeBegin = 0.0f; | ||||
| options.motionOptions.timeEnd = 1.0f; | options.motionOptions.timeEnd = 1.0f; | ||||
| check_result_optix_ret( | check_result_optix_ret( | ||||
| optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes)); | optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes)); | ||||
| // Allocate required output buffers | // Allocate required output buffers | ||||
| device_only_memory<char> temp_mem(this, "optix temp as build mem"); | device_only_memory<char> temp_mem(this, "optix temp as build mem", MEM_READ_WRITE); | ||||
| temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8); | temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8); | ||||
| if (!temp_mem.device_pointer) | if (!temp_mem.device_pointer) | ||||
| return false; // Make sure temporary memory allocation succeeded | return false; // Make sure temporary memory allocation succeeded | ||||
| // Acceleration structure memory has to be allocated on the device (not allowed to be on host) | |||||
| device_only_memory<char> &out_data = bvh->as_data; | device_only_memory<char> &out_data = bvh->as_data; | ||||
| if (operation == OPTIX_BUILD_OPERATION_BUILD) { | if (operation == OPTIX_BUILD_OPERATION_BUILD) { | ||||
| assert(out_data.device == this); | assert(out_data.device == this); | ||||
| out_data.alloc_to_device(sizes.outputSizeInBytes); | out_data.alloc_to_device(sizes.outputSizeInBytes); | ||||
| if (!out_data.device_pointer) | if (!out_data.device_pointer) | ||||
| return false; | return false; | ||||
| } | } | ||||
| else { | else { | ||||
| ▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | if (background) { | ||||
| &out_handle)); | &out_handle)); | ||||
| bvh->traversable_handle = static_cast<uint64_t>(out_handle); | bvh->traversable_handle = static_cast<uint64_t>(out_handle); | ||||
| // Wait for compaction to finish | // Wait for compaction to finish | ||||
| check_result_cuda_ret(cuStreamSynchronize(NULL)); | check_result_cuda_ret(cuStreamSynchronize(NULL)); | ||||
| std::swap(out_data.device_size, compacted_data.device_size); | std::swap(out_data.device_size, compacted_data.device_size); | ||||
| std::swap(out_data.device_pointer, compacted_data.device_pointer); | std::swap(out_data.device_pointer, compacted_data.device_pointer); | ||||
| // Original acceleration structure memory is freed when 'compacted_data' goes out of scope | |||||
| } | } | ||||
| } | } | ||||
| return true; | return true; | ||||
| } | } | ||||
| void build_bvh(BVH *bvh, Progress &progress, bool refit) override | void build_bvh(BVH *bvh, Progress &progress, bool refit) override | ||||
| { | { | ||||
| ▲ Show 20 Lines • Show All 621 Lines • Show Last 20 Lines | |||||