Changeset View
Changeset View
Standalone View
Standalone View
source/blender/blenkernel/intern/image_gpu.c
| Show All 17 Lines | |||||
| */ | */ | ||||
| /** \file | /** \file | ||||
| * \ingroup bke | * \ingroup bke | ||||
| */ | */ | ||||
| #include "MEM_guardedalloc.h" | #include "MEM_guardedalloc.h" | ||||
| #include "BLI_bitmap.h" | |||||
| #include "BLI_boxpack_2d.h" | #include "BLI_boxpack_2d.h" | ||||
| #include "BLI_linklist.h" | #include "BLI_linklist.h" | ||||
| #include "BLI_listbase.h" | #include "BLI_listbase.h" | ||||
| #include "BLI_threads.h" | #include "BLI_threads.h" | ||||
| #include "DNA_image_types.h" | #include "DNA_image_types.h" | ||||
| #include "DNA_userdef_types.h" | #include "DNA_userdef_types.h" | ||||
| Show All 9 Lines | |||||
| #include "GPU_state.h" | #include "GPU_state.h" | ||||
| #include "GPU_texture.h" | #include "GPU_texture.h" | ||||
| #include "PIL_time.h" | #include "PIL_time.h" | ||||
| /* Prototypes. */ | /* Prototypes. */ | ||||
| static void gpu_free_unused_buffers(void); | static void gpu_free_unused_buffers(void); | ||||
| static void image_free_gpu(Image *ima, const bool immediate); | static void image_free_gpu(Image *ima, const bool immediate); | ||||
| static void image_update_gputexture_ex( | |||||
| Image *ima, ImageTile *tile, ImBuf *ibuf, int x, int y, int w, int h); | |||||
| /* Internal structs. */ | |||||
| #define IMA_PARTIAL_REFRESH_TILE_SIZE 256 | |||||
| typedef struct ImagePartialRefresh { | |||||
| struct ImagePartialRefresh *next, *prev; | |||||
| int tile_x; | |||||
| int tile_y; | |||||
| } ImagePartialRefresh; | |||||
| /* Is the alpha of the `GPUTexture` for a given image/ibuf premultiplied. */ | /* Is the alpha of the `GPUTexture` for a given image/ibuf premultiplied. */ | ||||
| bool BKE_image_has_gpu_texture_premultiplied_alpha(Image *image, ImBuf *ibuf) | bool BKE_image_has_gpu_texture_premultiplied_alpha(Image *image, ImBuf *ibuf) | ||||
| { | { | ||||
| if (image) { | if (image) { | ||||
| /* Render result and compositor output are always premultiplied */ | /* Render result and compositor output are always premultiplied */ | ||||
| if (ELEM(image->type, IMA_TYPE_R_RESULT, IMA_TYPE_COMPOSITE)) { | if (ELEM(image->type, IMA_TYPE_R_RESULT, IMA_TYPE_COMPOSITE)) { | ||||
| return true; | return true; | ||||
| Show All 9 Lines | else if (ibuf) { | ||||
| } | } | ||||
| else { | else { | ||||
| return image ? (image->alpha_mode == IMA_ALPHA_PREMUL) : true; | return image ? (image->alpha_mode == IMA_ALPHA_PREMUL) : true; | ||||
| } | } | ||||
| } | } | ||||
| return false; | return false; | ||||
| } | } | ||||
| /* -------------------------------------------------------------------- */ | /* -------------------------------------------------------------------- */ | ||||
fclem: Nitpick: Maybe `size_match()` is better function name (inverted obviously).
But to be fair… | |||||
| /** \name UDIM gpu texture | /** \name UDIM gpu texture | ||||
| * \{ */ | * \{ */ | ||||
| static bool is_over_resolution_limit(int w, int h) | static bool is_over_resolution_limit(int w, int h) | ||||
| { | { | ||||
| return (w > GPU_texture_size_with_limit(w) || h > GPU_texture_size_with_limit(h)); | return (w > GPU_texture_size_with_limit(w) || h > GPU_texture_size_with_limit(h)); | ||||
| } | } | ||||
| ▲ Show 20 Lines • Show All 210 Lines • ▼ Show 20 Lines | static GPUTexture *image_get_gpu_texture(Image *ima, | ||||
| * context and might as well ensure we have as much space free as possible. */ | * context and might as well ensure we have as much space free as possible. */ | ||||
| gpu_free_unused_buffers(); | gpu_free_unused_buffers(); | ||||
| /* Free GPU textures when requesting a different render pass/layer. | /* Free GPU textures when requesting a different render pass/layer. | ||||
| * When `iuser` isn't set (texture painting single image mode) we assume that | * When `iuser` isn't set (texture painting single image mode) we assume that | ||||
| * the current `pass` and `layer` should be 0. */ | * the current `pass` and `layer` should be 0. */ | ||||
| short requested_pass = iuser ? iuser->pass : 0; | short requested_pass = iuser ? iuser->pass : 0; | ||||
| short requested_layer = iuser ? iuser->layer : 0; | short requested_layer = iuser ? iuser->layer : 0; | ||||
| short requested_slot = ima->render_slot; | if (ima->gpu_pass != requested_pass || ima->gpu_layer != requested_layer) { | ||||
| if (ima->gpu_pass != requested_pass || ima->gpu_layer != requested_layer || | |||||
| ima->gpu_slot != requested_slot) { | |||||
| ima->gpu_pass = requested_pass; | ima->gpu_pass = requested_pass; | ||||
| ima->gpu_layer = requested_layer; | ima->gpu_layer = requested_layer; | ||||
| ima->gpu_slot = requested_slot; | |||||
| ima->gpuflag |= IMA_GPU_REFRESH; | ima->gpuflag |= IMA_GPU_REFRESH; | ||||
| } | } | ||||
| /* currently, gpu refresh tagging is used by ima sequences */ | /* Check if image has been updated and tagged to be updated (full or partial). */ | ||||
| if (ima->gpuflag & IMA_GPU_REFRESH) { | ImageTile *tile = BKE_image_get_tile(ima, 0); | ||||
| if (((ima->gpuflag & IMA_GPU_REFRESH) != 0) || | |||||
| ((ibuf == NULL || tile == NULL || !tile->ok) && | |||||
| ((ima->gpuflag & IMA_GPU_PARTIAL_REFRESH) != 0))) { | |||||
| image_free_gpu(ima, true); | image_free_gpu(ima, true); | ||||
| ima->gpuflag &= ~IMA_GPU_REFRESH; | BLI_freelistN(&ima->gpu_refresh_areas); | ||||
| ima->gpuflag &= ~(IMA_GPU_REFRESH | IMA_GPU_PARTIAL_REFRESH); | |||||
| } | |||||
| else if (ima->gpuflag & IMA_GPU_PARTIAL_REFRESH) { | |||||
Done Inline ActionsIf ibuf is NULL and partial refresh is requested, I think it should do a full free of the image. I'm not sure if that happens in practice, but seems safer. brecht: If `ibuf` is `NULL` and partial refresh is requested, I think it should do a full free of the… | |||||
| BLI_assert(ibuf); | |||||
| BLI_assert(tile && tile->ok); | |||||
| ImagePartialRefresh *refresh_area; | |||||
| while ((refresh_area = BLI_pophead(&ima->gpu_refresh_areas))) { | |||||
Done Inline Actionstile is assumed to be non-NULL here and ok here, it's not clear that is safe. brecht: `tile` is assumed to be non-NULL here and ok here, it's not clear that is safe. | |||||
| const int tile_offset_x = refresh_area->tile_x * IMA_PARTIAL_REFRESH_TILE_SIZE; | |||||
| const int tile_offset_y = refresh_area->tile_y * IMA_PARTIAL_REFRESH_TILE_SIZE; | |||||
| const int tile_width = MIN2(IMA_PARTIAL_REFRESH_TILE_SIZE, ibuf->x - tile_offset_x); | |||||
| const int tile_height = MIN2(IMA_PARTIAL_REFRESH_TILE_SIZE, ibuf->y - tile_offset_y); | |||||
| image_update_gputexture_ex( | |||||
| ima, tile, ibuf, tile_offset_x, tile_offset_y, tile_width, tile_height); | |||||
| MEM_freeN(refresh_area); | |||||
| } | |||||
| ima->gpuflag &= ~IMA_GPU_PARTIAL_REFRESH; | |||||
| } | } | ||||
| /* Tag as in active use for garbage collector. */ | /* Tag as in active use for garbage collector. */ | ||||
Done Inline ActionsBKE_image_tag_time is for garbage collection, not image sequences. So I'm not sure what this comment is about. brecht: `BKE_image_tag_time` is for garbage collection, not image sequences. So I'm not sure what this… | |||||
| BKE_image_tag_time(ima); | BKE_image_tag_time(ima); | ||||
| /* Test if we already have a texture. */ | /* Test if we already have a texture. */ | ||||
| const int current_view = iuser ? ((iuser->flag & IMA_SHOW_STEREO) != 0 ? iuser->multiview_eye : | const int current_view = iuser ? ((iuser->flag & IMA_SHOW_STEREO) != 0 ? iuser->multiview_eye : | ||||
| iuser->view) : | iuser->view) : | ||||
| 0; | 0; | ||||
| GPUTexture **tex = get_image_gpu_texture_ptr(ima, textarget, current_view); | GPUTexture **tex = get_image_gpu_texture_ptr(ima, textarget, current_view); | ||||
| if (*tex) { | if (*tex) { | ||||
| return *tex; | return *tex; | ||||
| } | } | ||||
| /* Check if we have a valid image. If not, we return a dummy | /* Check if we have a valid image. If not, we return a dummy | ||||
| * texture with zero bind-code so we don't keep trying. */ | * texture with zero bind-code so we don't keep trying. */ | ||||
| ImageTile *tile = BKE_image_get_tile(ima, 0); | |||||
| if (tile == NULL || tile->ok == 0) { | if (tile == NULL || tile->ok == 0) { | ||||
| *tex = image_gpu_texture_error_create(textarget); | *tex = image_gpu_texture_error_create(textarget); | ||||
| return *tex; | return *tex; | ||||
| } | } | ||||
| /* check if we have a valid image buffer */ | /* check if we have a valid image buffer */ | ||||
| ImBuf *ibuf_intern = ibuf; | ImBuf *ibuf_intern = ibuf; | ||||
| if (ibuf_intern == NULL) { | if (ibuf_intern == NULL) { | ||||
| ▲ Show 20 Lines • Show All 245 Lines • ▼ Show 20 Lines | ibuf = update_do_scale( | ||||
| rect, rect_float, &x, &y, &w, &h, tile_size[0], tile_size[1], full_w, full_h); | rect, rect_float, &x, &y, &w, &h, tile_size[0], tile_size[1], full_w, full_h); | ||||
| /* Shift to account for tile packing. */ | /* Shift to account for tile packing. */ | ||||
| x += tile_offset[0]; | x += tile_offset[0]; | ||||
| y += tile_offset[1]; | y += tile_offset[1]; | ||||
| } | } | ||||
| else { | else { | ||||
| /* Partial update with scaling. */ | /* Partial update with scaling. */ | ||||
| int limit_w = smaller_power_of_2_limit(full_w); | int limit_w = GPU_texture_width(tex); | ||||
| int limit_h = smaller_power_of_2_limit(full_h); | int limit_h = GPU_texture_height(tex); | ||||
| ibuf = update_do_scale(rect, rect_float, &x, &y, &w, &h, limit_w, limit_h, full_w, full_h); | ibuf = update_do_scale(rect, rect_float, &x, &y, &w, &h, limit_w, limit_h, full_w, full_h); | ||||
| } | } | ||||
| void *data = (ibuf->rect_float) ? (void *)(ibuf->rect_float) : (void *)(ibuf->rect); | void *data = (ibuf->rect_float) ? (void *)(ibuf->rect_float) : (void *)(ibuf->rect); | ||||
| eGPUDataFormat data_format = (ibuf->rect_float) ? GPU_DATA_FLOAT : GPU_DATA_UNSIGNED_BYTE; | eGPUDataFormat data_format = (ibuf->rect_float) ? GPU_DATA_FLOAT : GPU_DATA_UNSIGNED_BYTE; | ||||
| GPU_texture_update_sub(tex, data_format, data, x, y, layer, w, h, 1); | GPU_texture_update_sub(tex, data_format, data, x, y, layer, w, h, 1); | ||||
| Show All 35 Lines | static void gpu_texture_update_from_ibuf( | ||||
| GPUTexture *tex, Image *ima, ImBuf *ibuf, ImageTile *tile, int x, int y, int w, int h) | GPUTexture *tex, Image *ima, ImBuf *ibuf, ImageTile *tile, int x, int y, int w, int h) | ||||
| { | { | ||||
| bool scaled; | bool scaled; | ||||
| if (tile != NULL) { | if (tile != NULL) { | ||||
| int *tilesize = tile->runtime.tilearray_size; | int *tilesize = tile->runtime.tilearray_size; | ||||
| scaled = (ibuf->x != tilesize[0]) || (ibuf->y != tilesize[1]); | scaled = (ibuf->x != tilesize[0]) || (ibuf->y != tilesize[1]); | ||||
| } | } | ||||
| else { | else { | ||||
| scaled = is_over_resolution_limit(ibuf->x, ibuf->y); | scaled = (GPU_texture_width(tex) != ibuf->x) || (GPU_texture_height(tex) != ibuf->y); | ||||
| } | } | ||||
| if (scaled) { | if (scaled) { | ||||
| /* Extra padding to account for bleed from neighboring pixels. */ | /* Extra padding to account for bleed from neighboring pixels. */ | ||||
| const int padding = 4; | const int padding = 4; | ||||
| const int xmax = min_ii(x + w + padding, ibuf->x); | const int xmax = min_ii(x + w + padding, ibuf->x); | ||||
| const int ymax = min_ii(y + h + padding, ibuf->y); | const int ymax = min_ii(y + h + padding, ibuf->y); | ||||
| x = max_ii(x - padding, 0); | x = max_ii(x - padding, 0); | ||||
| ▲ Show 20 Lines • Show All 86 Lines • ▼ Show 20 Lines | static void gpu_texture_update_from_ibuf( | ||||
| } | } | ||||
| else { | else { | ||||
| ima->gpuflag &= ~IMA_GPU_MIPMAP_COMPLETE; | ima->gpuflag &= ~IMA_GPU_MIPMAP_COMPLETE; | ||||
| } | } | ||||
| GPU_texture_unbind(tex); | GPU_texture_unbind(tex); | ||||
| } | } | ||||
| static void image_update_gputexture_ex( | |||||
| Image *ima, ImageTile *tile, ImBuf *ibuf, int x, int y, int w, int h) | |||||
| { | |||||
| GPUTexture *tex = ima->gputexture[TEXTARGET_2D][0]; | |||||
| /* Check if we need to update the main gputexture. */ | |||||
| if (tex != NULL && tile == ima->tiles.first) { | |||||
| gpu_texture_update_from_ibuf(tex, ima, ibuf, NULL, x, y, w, h); | |||||
| } | |||||
| /* Check if we need to update the array gputexture. */ | |||||
| tex = ima->gputexture[TEXTARGET_2D_ARRAY][0]; | |||||
| if (tex != NULL) { | |||||
| gpu_texture_update_from_ibuf(tex, ima, ibuf, tile, x, y, w, h); | |||||
| } | |||||
| } | |||||
| /* Partial update of texture for texture painting. This is often much | /* Partial update of texture for texture painting. This is often much | ||||
| * quicker than fully updating the texture for high resolution images. */ | * quicker than fully updating the texture for high resolution images. */ | ||||
| void BKE_image_update_gputexture(Image *ima, ImageUser *iuser, int x, int y, int w, int h) | void BKE_image_update_gputexture(Image *ima, ImageUser *iuser, int x, int y, int w, int h) | ||||
| { | { | ||||
| ImBuf *ibuf = BKE_image_acquire_ibuf(ima, iuser, NULL); | ImBuf *ibuf = BKE_image_acquire_ibuf(ima, iuser, NULL); | ||||
| ImageTile *tile = BKE_image_get_tile_from_iuser(ima, iuser); | ImageTile *tile = BKE_image_get_tile_from_iuser(ima, iuser); | ||||
| if ((ibuf == NULL) || (w == 0) || (h == 0)) { | if ((ibuf == NULL) || (w == 0) || (h == 0)) { | ||||
| /* Full reload of texture. */ | /* Full reload of texture. */ | ||||
| BKE_image_free_gputextures(ima); | BKE_image_free_gputextures(ima); | ||||
| } | } | ||||
| image_update_gputexture_ex(ima, tile, ibuf, x, y, w, h); | |||||
| BKE_image_release_ibuf(ima, ibuf, NULL); | |||||
| } | |||||
| GPUTexture *tex = ima->gputexture[TEXTARGET_2D][0]; | void BKE_image_update_gputexture_delayed( | ||||
| /* Check if we need to update the main gputexture. */ | struct Image *ima, struct ImBuf *ibuf, int x, int y, int w, int h) | ||||
| if (tex != NULL && tile == ima->tiles.first) { | { | ||||
| gpu_texture_update_from_ibuf(tex, ima, ibuf, NULL, x, y, w, h); | /* Check for full refresh. */ | ||||
| if (ibuf && x == 0 && y == 0 && w == ibuf->x && h == ibuf->y) { | |||||
| ima->gpuflag |= IMA_GPU_REFRESH; | |||||
| } | |||||
| /* Check if we can promote partial refresh to a full refresh. */ | |||||
| if ((ima->gpuflag & (IMA_GPU_REFRESH | IMA_GPU_PARTIAL_REFRESH)) == | |||||
| (IMA_GPU_REFRESH | IMA_GPU_PARTIAL_REFRESH)) { | |||||
| ima->gpuflag &= ~IMA_GPU_PARTIAL_REFRESH; | |||||
| BLI_freelistN(&ima->gpu_refresh_areas); | |||||
| } | |||||
| /* Image is already marked for complete refresh. */ | |||||
| if (ima->gpuflag & IMA_GPU_REFRESH) { | |||||
| return; | |||||
| } | } | ||||
| /* Check if we need to update the array gputexture. */ | /* Schedule the tiles that covers the requested area. */ | ||||
| tex = ima->gputexture[TEXTARGET_2D_ARRAY][0]; | const int start_tile_x = x / IMA_PARTIAL_REFRESH_TILE_SIZE; | ||||
| if (tex != NULL) { | const int start_tile_y = y / IMA_PARTIAL_REFRESH_TILE_SIZE; | ||||
| gpu_texture_update_from_ibuf(tex, ima, ibuf, tile, x, y, w, h); | const int end_tile_x = (x + w) / IMA_PARTIAL_REFRESH_TILE_SIZE; | ||||
| const int end_tile_y = (y + h) / IMA_PARTIAL_REFRESH_TILE_SIZE; | |||||
| const int num_tiles_x = (end_tile_x + 1) - (start_tile_x); | |||||
| const int num_tiles_y = (end_tile_y + 1) - (start_tile_y); | |||||
| const int num_tiles = num_tiles_x * num_tiles_y; | |||||
| const int allocate_on_heap = BLI_BITMAP_SIZE(num_tiles) > 16; | |||||
| BLI_bitmap *requested_tiles = NULL; | |||||
| if (allocate_on_heap) { | |||||
| requested_tiles = BLI_BITMAP_NEW(num_tiles, __func__); | |||||
| } | |||||
Done Inline ActionsThis has O(n^2) time complexity, but I guess it's not that since n is only the number of threads in practice. If it was for all tiles in the image it would be a bigger problem, see rB8c113eb0c475: Render: Use GHash for storing render parts. brecht: This has O(n^2) time complexity, but I guess it's not that since n is only the number of… | |||||
| else { | |||||
| requested_tiles = BLI_BITMAP_NEW_ALLOCA(num_tiles); | |||||
| } | |||||
| int num_tiles_not_scheduled = num_tiles; | |||||
| LISTBASE_FOREACH (ImagePartialRefresh *, area, &ima->gpu_refresh_areas) { | |||||
| if (area->tile_x < start_tile_x || area->tile_x > end_tile_x || area->tile_y < start_tile_y || | |||||
| area->tile_y > end_tile_y) { | |||||
| continue; | |||||
| } | |||||
| int requested_tile_index = (area->tile_x - start_tile_x) + | |||||
| (area->tile_y - start_tile_y) * num_tiles_x; | |||||
| BLI_BITMAP_ENABLE(requested_tiles, requested_tile_index); | |||||
| num_tiles_not_scheduled--; | |||||
| if (num_tiles_not_scheduled == 0) { | |||||
| break; | |||||
| } | |||||
| } | } | ||||
| BKE_image_release_ibuf(ima, ibuf, NULL); | /* Schedule the tiles that aren't requested yet. */ | ||||
| if (num_tiles_not_scheduled) { | |||||
| int tile_index = 0; | |||||
| for (int tile_y = start_tile_y; tile_y <= end_tile_y; tile_y++) { | |||||
| for (int tile_x = start_tile_x; tile_x <= end_tile_x; tile_x++) { | |||||
| if (!BLI_BITMAP_TEST_BOOL(requested_tiles, tile_index)) { | |||||
| ImagePartialRefresh *area = MEM_mallocN(sizeof(ImagePartialRefresh), __func__); | |||||
| area->tile_x = tile_x; | |||||
| area->tile_y = tile_y; | |||||
| BLI_addtail(&ima->gpu_refresh_areas, area); | |||||
| } | |||||
| tile_index++; | |||||
| } | |||||
| } | |||||
| ima->gpuflag |= IMA_GPU_PARTIAL_REFRESH; | |||||
| } | |||||
| if (allocate_on_heap) { | |||||
| MEM_freeN(requested_tiles); | |||||
| } | |||||
| } | } | ||||
| /* these two functions are called on entering and exiting texture paint mode, | /* these two functions are called on entering and exiting texture paint mode, | ||||
| * temporary disabling/enabling mipmapping on all images for quick texture | * temporary disabling/enabling mipmapping on all images for quick texture | ||||
| * updates with glTexSubImage2D. images that didn't change don't have to be | * updates with glTexSubImage2D. images that didn't change don't have to be | ||||
| * re-uploaded to OpenGL */ | * re-uploaded to OpenGL */ | ||||
| void BKE_image_paint_set_mipmap(Main *bmain, bool mipmap) | void BKE_image_paint_set_mipmap(Main *bmain, bool mipmap) | ||||
| { | { | ||||
| Show All 25 Lines | |||||
Nitpick: Maybe size_match() is better function name (inverted obviously).
But to be fair, having a function that is used only once and is a one liner seems a bit silly and make the code a bit harder to read.