Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/device/device_multi.cpp
| Show First 20 Lines • Show All 256 Lines • ▼ Show 20 Lines | assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX || | ||||
| bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE); | bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE); | ||||
| BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh); | BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh); | ||||
| bvh_multi->sub_bvhs.resize(devices.size()); | bvh_multi->sub_bvhs.resize(devices.size()); | ||||
| vector<BVHMulti *> geom_bvhs; | vector<BVHMulti *> geom_bvhs; | ||||
| geom_bvhs.reserve(bvh->geometry.size()); | geom_bvhs.reserve(bvh->geometry.size()); | ||||
| foreach (Geometry *geom, bvh->geometry) { | foreach (Geometry *geom, bvh->geometry) { | ||||
| geom_bvhs.push_back(static_cast<BVHMulti *>(geom->bvh)); | geom_bvhs.push_back(static_cast<BVHMulti *>(geom->get_bvh())); | ||||
| } | } | ||||
| /* Broadcast acceleration structure build to all render devices */ | /* Broadcast acceleration structure build to all render devices */ | ||||
| size_t i = 0; | size_t i = 0; | ||||
| foreach (SubDevice &sub, devices) { | foreach (SubDevice &sub, devices) { | ||||
| /* Change geometry BVH pointers to the sub BVH */ | /* Change geometry BVH pointers to the sub BVH */ | ||||
| for (size_t k = 0; k < bvh->geometry.size(); ++k) { | for (size_t k = 0; k < bvh->geometry.size(); ++k) { | ||||
| bvh->geometry[k]->bvh = geom_bvhs[k]->sub_bvhs[i]; | bvh->geometry[k]->set_bvh(geom_bvhs[k]->sub_bvhs[i]); | ||||
| } | } | ||||
| if (!bvh_multi->sub_bvhs[i]) { | if (!bvh_multi->sub_bvhs[i]) { | ||||
| BVHParams params = bvh->params; | BVHParams params = bvh->params; | ||||
| if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX) | if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX) | ||||
| params.bvh_layout = BVH_LAYOUT_OPTIX; | params.bvh_layout = BVH_LAYOUT_OPTIX; | ||||
| else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE) | else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE) | ||||
| params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX : | params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX : | ||||
| Show All 11 Lines | bvh->geometry[k]->set_bvh(geom_bvhs[k]->sub_bvhs[i]); | ||||
| } | } | ||||
| sub.device->build_bvh(bvh_multi->sub_bvhs[i], progress, refit); | sub.device->build_bvh(bvh_multi->sub_bvhs[i], progress, refit); | ||||
| i++; | i++; | ||||
| } | } | ||||
| /* Change geomtry BVH pointers back to the multi BVH */ | /* Change geomtry BVH pointers back to the multi BVH */ | ||||
| for (size_t k = 0; k < bvh->geometry.size(); ++k) { | for (size_t k = 0; k < bvh->geometry.size(); ++k) { | ||||
| bvh->geometry[k]->bvh = geom_bvhs[k]; | bvh->geometry[k]->set_bvh(geom_bvhs[k]); | ||||
| } | } | ||||
| } | } | ||||
| virtual void *osl_memory() override | virtual void *osl_memory() override | ||||
| { | { | ||||
| if (devices.size() > 1) { | if (devices.size() > 1) { | ||||
| return NULL; | return NULL; | ||||
| } | } | ||||
| Show All 30 Lines | #endif | ||||
| SubDevice *find_suitable_mem_device(device_ptr key, const vector<SubDevice *> &island) | SubDevice *find_suitable_mem_device(device_ptr key, const vector<SubDevice *> &island) | ||||
| { | { | ||||
| assert(!island.empty()); | assert(!island.empty()); | ||||
| /* Get the memory owner of this key or the device with the lowest memory usage when new */ | /* Get the memory owner of this key or the device with the lowest memory usage when new */ | ||||
| SubDevice *owner_sub = island.front(); | SubDevice *owner_sub = island.front(); | ||||
| foreach (SubDevice *island_sub, island) { | foreach (SubDevice *island_sub, island) { | ||||
| if (key ? (island_sub->ptr_map.find(key) != island_sub->ptr_map.end()) : | if (key ? (island_sub->ptr_map.find(key) != island_sub->ptr_map.end()) : | ||||
| (island_sub->device->stats.mem_used < owner_sub->device->stats.mem_used)) { | (island_sub->device->stats.get_mem_used() < | ||||
| owner_sub->device->stats.get_mem_used())) { | |||||
| owner_sub = island_sub; | owner_sub = island_sub; | ||||
| } | } | ||||
| } | } | ||||
| return owner_sub; | return owner_sub; | ||||
| } | } | ||||
| inline device_ptr find_matching_mem(device_ptr key, SubDevice &sub) | inline device_ptr find_matching_mem(device_ptr key, SubDevice &sub) | ||||
| { | { | ||||
| ▲ Show 20 Lines • Show All 249 Lines • ▼ Show 20 Lines | foreach (SubDevice &sub, devices) { | ||||
| i++; | i++; | ||||
| } | } | ||||
| rgba.device_pointer = key; | rgba.device_pointer = key; | ||||
| } | } | ||||
| void map_tile(Device *sub_device, RenderTile &tile) override | void map_tile(Device *sub_device, RenderTile &tile) override | ||||
| { | { | ||||
| if (!tile.buffer) { | if (!tile.get_buffer()) { | ||||
| return; | return; | ||||
| } | } | ||||
| foreach (SubDevice &sub, devices) { | foreach (SubDevice &sub, devices) { | ||||
| if (sub.device == sub_device) { | if (sub.device == sub_device) { | ||||
| tile.buffer = find_matching_mem(tile.buffer, sub); | tile.get_buffer() = find_matching_mem(tile.get_buffer(), sub); | ||||
| return; | return; | ||||
| } | } | ||||
| } | } | ||||
| foreach (SubDevice &sub, denoising_devices) { | foreach (SubDevice &sub, denoising_devices) { | ||||
| if (sub.device == sub_device) { | if (sub.device == sub_device) { | ||||
| tile.buffer = sub.ptr_map[tile.buffer]; | tile.get_buffer() = sub.ptr_map[tile.get_buffer()]; | ||||
| return; | return; | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| int device_number(Device *sub_device) override | int device_number(Device *sub_device) override | ||||
| { | { | ||||
| int i = 0; | int i = 0; | ||||
| Show All 10 Lines | foreach (SubDevice &sub, denoising_devices) { | ||||
| i++; | i++; | ||||
| } | } | ||||
| return -1; | return -1; | ||||
| } | } | ||||
| void map_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) override | void map_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) override | ||||
| { | { | ||||
| for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { | foreach (RenderTile &tile, neighbors.get_tiles()) { | ||||
| RenderTile &tile = neighbors.tiles[i]; | if (!tile.get_buffers()) { | ||||
| if (!tile.buffers) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| device_vector<float> &mem = tile.buffers->buffer; | device_vector<float> &mem = tile.get_buffers()->get_buffer(); | ||||
| tile.buffer = mem.device_pointer; | tile.get_buffer() = mem.device_pointer; | ||||
| if (mem.device == this && matching_rendering_and_denoising_devices) { | if (mem.device == this && matching_rendering_and_denoising_devices) { | ||||
| /* Skip unnecessary copies in viewport mode (buffer covers the | /* Skip unnecessary copies in viewport mode (buffer covers the | ||||
| * whole image), but still need to fix up the tile device pointer. */ | * whole image), but still need to fix up the tile device pointer. */ | ||||
| map_tile(sub_device, tile); | map_tile(sub_device, tile); | ||||
| continue; | continue; | ||||
| } | } | ||||
| /* If the tile was rendered on another device, copy its memory to | /* If the tile was rendered on another device, copy its memory to | ||||
| * to the current device now, for the duration of the denoising task. | * to the current device now, for the duration of the denoising task. | ||||
| * Note that this temporarily modifies the RenderBuffers and calls | * Note that this temporarily modifies the RenderBuffers and calls | ||||
| * the device, so this function is not thread safe. */ | * the device, so this function is not thread safe. */ | ||||
| if (mem.device != sub_device) { | if (mem.device != sub_device) { | ||||
| /* Only copy from device to host once. This is faster, but | /* Only copy from device to host once. This is faster, but | ||||
| * also required for the case where a CPU thread is denoising | * also required for the case where a CPU thread is denoising | ||||
| * a tile rendered on the GPU. In that case we have to avoid | * a tile rendered on the GPU. In that case we have to avoid | ||||
| * overwriting the buffer being de-noised by the CPU thread. */ | * overwriting the buffer being de-noised by the CPU thread. */ | ||||
| if (!tile.buffers->map_neighbor_copied) { | if (!tile.get_buffers()->get_map_neighbor_copied()) { | ||||
| tile.buffers->map_neighbor_copied = true; | tile.get_buffers()->set_map_neighbor_copied(true); | ||||
| mem.copy_from_device(); | mem.copy_from_device(); | ||||
| } | } | ||||
| if (mem.device == this) { | if (mem.device == this) { | ||||
| /* Can re-use memory if tile is already allocated on the sub device. */ | /* Can re-use memory if tile.get_is() already allocated on the sub device. */ | ||||
| map_tile(sub_device, tile); | map_tile(sub_device, tile); | ||||
| mem.swap_device(sub_device, mem.device_size, tile.buffer); | mem.swap_device(sub_device, mem.device_size, tile.get_buffer()); | ||||
| } | } | ||||
| else { | else { | ||||
| mem.swap_device(sub_device, 0, 0); | mem.swap_device(sub_device, 0, 0); | ||||
| } | } | ||||
| mem.copy_to_device(); | mem.copy_to_device(); | ||||
| tile.buffer = mem.device_pointer; | tile.get_buffer() = mem.device_pointer; | ||||
| tile.device_size = mem.device_size; | tile.get_device_size() = mem.device_size; | ||||
| mem.restore_device(); | mem.restore_device(); | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| void unmap_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) override | void unmap_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) override | ||||
| { | { | ||||
| RenderTile &target_tile = neighbors.target; | RenderTile &target_tile = neighbors.get_target(); | ||||
| device_vector<float> &mem = target_tile.buffers->buffer; | device_vector<float> &mem = target_tile.get_buffers()->get_buffer(); | ||||
| if (mem.device == this && matching_rendering_and_denoising_devices) { | if (mem.device == this && matching_rendering_and_denoising_devices) { | ||||
| return; | return; | ||||
| } | } | ||||
| /* Copy denoised result back to the host. */ | /* Copy denoised result back to the host. */ | ||||
| mem.swap_device(sub_device, target_tile.device_size, target_tile.buffer); | mem.swap_device(sub_device, target_tile.get_device_size(), target_tile.get_buffer()); | ||||
| mem.copy_from_device(); | mem.copy_from_device(); | ||||
| mem.restore_device(); | mem.restore_device(); | ||||
| /* Copy denoised result to the original device. */ | /* Copy denoised result to the original device. */ | ||||
| mem.copy_to_device(); | mem.copy_to_device(); | ||||
| for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { | foreach (RenderTile &tile, neighbors.get_tiles()) { | ||||
| RenderTile &tile = neighbors.tiles[i]; | if (!tile.get_buffers()) { | ||||
| if (!tile.buffers) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| device_vector<float> &mem = tile.buffers->buffer; | device_vector<float> &mem = tile.get_buffers()->get_buffer(); | ||||
| if (mem.device != sub_device && mem.device != this) { | if (mem.device != sub_device && mem.device != this) { | ||||
| /* Free up memory again if it was allocated for the copy above. */ | /* Free up memory again if it was allocated for the copy above. */ | ||||
| mem.swap_device(sub_device, tile.device_size, tile.buffer); | mem.swap_device(sub_device, tile.get_device_size(), tile.get_buffer()); | ||||
| sub_device->mem_free(mem); | sub_device->mem_free(mem); | ||||
| mem.restore_device(); | mem.restore_device(); | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| int get_split_task_count(DeviceTask &task) override | int get_split_task_count(DeviceTask &task) override | ||||
| { | { | ||||
| ▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | foreach (SubDevice &sub, task_devices) { | ||||
| subtask.rgba_half = sub.ptr_map[task.rgba_half]; | subtask.rgba_half = sub.ptr_map[task.rgba_half]; | ||||
| if (task.shader_input) | if (task.shader_input) | ||||
| subtask.shader_input = find_matching_mem(task.shader_input, sub); | subtask.shader_input = find_matching_mem(task.shader_input, sub); | ||||
| if (task.shader_output) | if (task.shader_output) | ||||
| subtask.shader_output = find_matching_mem(task.shader_output, sub); | subtask.shader_output = find_matching_mem(task.shader_output, sub); | ||||
| sub.device->task_add(subtask); | sub.device->task_add(subtask); | ||||
| if (task.buffers && task.buffers->buffer.device == this) { | if (task.buffers && task.buffers->get_buffer().device == this) { | ||||
| /* Synchronize access to RenderBuffers, since 'map_neighbor_tiles' is not thread-safe. */ | /* Synchronize access to RenderBuffers, since 'map_neighbor_tiles' is not thread-safe. */ | ||||
| sub.device->task_wait(); | sub.device->task_wait(); | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| void task_wait() override | void task_wait() override | ||||
| Show All 22 Lines | |||||