diff options
Diffstat (limited to 'src/vulkan/gpu_tex.c')
-rw-r--r-- | src/vulkan/gpu_tex.c | 1453 |
1 files changed, 1453 insertions, 0 deletions
diff --git a/src/vulkan/gpu_tex.c b/src/vulkan/gpu_tex.c new file mode 100644 index 0000000..7ab83b7 --- /dev/null +++ b/src/vulkan/gpu_tex.c @@ -0,0 +1,1453 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "gpu.h" + +void vk_tex_barrier(pl_gpu gpu, struct vk_cmd *cmd, pl_tex tex, + VkPipelineStageFlags2 stage, VkAccessFlags2 access, + VkImageLayout layout, uint32_t qf) +{ + struct pl_vk *p = PL_PRIV(gpu); + struct vk_ctx *vk = p->vk; + struct pl_tex_vk *tex_vk = PL_PRIV(tex); + pl_rc_ref(&tex_vk->rc); + pl_assert(!tex_vk->held); + pl_assert(!tex_vk->num_planes); + + // CONCURRENT images require transitioning to/from IGNORED, EXCLUSIVE + // images require transitioning to/from the concrete QF index + if (vk->pools.num == 1) { + if (tex_vk->qf == VK_QUEUE_FAMILY_IGNORED) + tex_vk->qf = cmd->pool->qf; + if (qf == VK_QUEUE_FAMILY_IGNORED) + qf = cmd->pool->qf; + } + + struct vk_sync_scope last; + bool is_trans = layout != tex_vk->layout, is_xfer = qf != tex_vk->qf; + last = vk_sem_barrier(cmd, &tex_vk->sem, stage, access, is_trans || is_xfer); + + VkImageMemoryBarrier2 barr = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .srcStageMask = last.stage, + .srcAccessMask = last.access, + .dstStageMask = stage, + .dstAccessMask = access, + .oldLayout = tex_vk->layout, + .newLayout = layout, + .srcQueueFamilyIndex = tex_vk->qf, + .dstQueueFamilyIndex = qf, + .image = tex_vk->img, + .subresourceRange = { + .aspectMask = tex_vk->aspect, + .levelCount = 1, + .layerCount = 1, + }, + }; + + if (tex_vk->may_invalidate) { + tex_vk->may_invalidate = false; + barr.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + } + + if (last.access || is_trans || is_xfer) { + vk_cmd_barrier(cmd, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .imageMemoryBarrierCount = 1, + .pImageMemoryBarriers = &barr, + }); + } + + tex_vk->qf = qf; + tex_vk->layout = layout; + vk_cmd_callback(cmd, (vk_cb) vk_tex_deref, gpu, tex); + + for (int i = 0; i < tex_vk->ext_deps.num; i++) + vk_cmd_dep(cmd, stage, tex_vk->ext_deps.elem[i]); + tex_vk->ext_deps.num = 0; + + if (tex_vk->ext_sync) { + vk_cmd_callback(cmd, (vk_cb) vk_sync_deref, gpu, tex_vk->ext_sync); + tex_vk->ext_sync = NULL; + } +} + +static void vk_tex_destroy(pl_gpu gpu, struct pl_tex_t *tex) +{ + if (!tex) + return; + + struct pl_vk *p = PL_PRIV(gpu); + struct vk_ctx *vk = p->vk; + struct pl_tex_vk *tex_vk = PL_PRIV(tex); + + vk_sync_deref(gpu, tex_vk->ext_sync); + vk->DestroyFramebuffer(vk->dev, tex_vk->framebuffer, PL_VK_ALLOC); + vk->DestroyImageView(vk->dev, tex_vk->view, PL_VK_ALLOC); + for (int i = 0; i < tex_vk->num_planes; i++) + vk_tex_deref(gpu, tex->planes[i]); + if (!tex_vk->external_img) { + vk->DestroyImage(vk->dev, tex_vk->img, PL_VK_ALLOC); + vk_malloc_free(vk->ma, &tex_vk->mem); + } + + pl_free(tex); +} + +void vk_tex_deref(pl_gpu gpu, pl_tex tex) +{ + if (!tex) + return; + + struct pl_tex_vk *tex_vk = PL_PRIV(tex); + if (pl_rc_deref(&tex_vk->rc)) + vk_tex_destroy(gpu, (struct pl_tex_t *) tex); +} + + +// Initializes non-VkImage values like the image view, framebuffers, etc. +static bool vk_init_image(pl_gpu gpu, pl_tex tex, pl_debug_tag debug_tag) +{ + struct pl_vk *p = PL_PRIV(gpu); + struct vk_ctx *vk = p->vk; + + const struct pl_tex_params *params = &tex->params; + struct pl_tex_vk *tex_vk = PL_PRIV(tex); + pl_assert(tex_vk->img); + PL_VK_NAME(IMAGE, tex_vk->img, debug_tag); + pl_rc_init(&tex_vk->rc); + if (tex_vk->num_planes) + return true; + tex_vk->layout = VK_IMAGE_LAYOUT_UNDEFINED; + tex_vk->transfer_queue = GRAPHICS; + tex_vk->qf = VK_QUEUE_FAMILY_IGNORED; // will be set on first use, if needed + + // Always use the transfer pool if available, for efficiency + if ((params->host_writable || params->host_readable) && vk->pool_transfer) + tex_vk->transfer_queue = TRANSFER; + + // For emulated formats: force usage of the compute queue, because we + // can't properly track cross-queue dependencies for buffers (yet?) + if (params->format->emulated) + tex_vk->transfer_queue = COMPUTE; + + bool ret = false; + VkRenderPass dummyPass = VK_NULL_HANDLE; + + if (params->sampleable || params->renderable || params->storable) { + static const VkImageViewType viewType[] = { + [VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D, + [VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D, + [VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D, + }; + + const VkImageViewCreateInfo vinfo = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = tex_vk->img, + .viewType = viewType[tex_vk->type], + .format = tex_vk->img_fmt, + .subresourceRange = { + .aspectMask = tex_vk->aspect, + .levelCount = 1, + .layerCount = 1, + }, + }; + + VK(vk->CreateImageView(vk->dev, &vinfo, PL_VK_ALLOC, &tex_vk->view)); + PL_VK_NAME(IMAGE_VIEW, tex_vk->view, debug_tag); + } + + if (params->renderable) { + // Framebuffers need to be created against a specific render pass + // layout, so we need to temporarily create a skeleton/dummy render + // pass for vulkan to figure out the compatibility + VkRenderPassCreateInfo rinfo = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = tex_vk->img_fmt, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + }, + }, + }; + + VK(vk->CreateRenderPass(vk->dev, &rinfo, PL_VK_ALLOC, &dummyPass)); + + VkFramebufferCreateInfo finfo = { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .renderPass = dummyPass, + .attachmentCount = 1, + .pAttachments = &tex_vk->view, + .width = tex->params.w, + .height = tex->params.h, + .layers = 1, + }; + + if (finfo.width > vk->props.limits.maxFramebufferWidth || + finfo.height > vk->props.limits.maxFramebufferHeight) + { + PL_ERR(gpu, "Framebuffer of size %dx%d exceeds the maximum allowed " + "dimensions: %dx%d", finfo.width, finfo.height, + vk->props.limits.maxFramebufferWidth, + vk->props.limits.maxFramebufferHeight); + goto error; + } + + VK(vk->CreateFramebuffer(vk->dev, &finfo, PL_VK_ALLOC, + &tex_vk->framebuffer)); + PL_VK_NAME(FRAMEBUFFER, tex_vk->framebuffer, debug_tag); + } + + ret = true; + +error: + vk->DestroyRenderPass(vk->dev, dummyPass, PL_VK_ALLOC); + return ret; +} + +pl_tex vk_tex_create(pl_gpu gpu, const struct pl_tex_params *params) +{ + struct pl_vk *p = PL_PRIV(gpu); + struct vk_ctx *vk = p->vk; + + enum pl_handle_type handle_type = params->export_handle | + params->import_handle; + VkExternalMemoryHandleTypeFlagBitsKHR vk_handle_type = vk_mem_handle_type(handle_type); + + struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_vk); + pl_fmt fmt = params->format; + tex->params = *params; + tex->params.initial_data = NULL; + tex->sampler_type = PL_SAMPLER_NORMAL; + + struct pl_tex_vk *tex_vk = PL_PRIV(tex); + struct pl_fmt_vk *fmtp = PL_PRIV(fmt); + tex_vk->img_fmt = fmtp->vk_fmt->tfmt; + tex_vk->num_planes = fmt->num_planes; + for (int i = 0; i < tex_vk->num_planes; i++) + tex_vk->aspect |= VK_IMAGE_ASPECT_PLANE_0_BIT << i; + tex_vk->aspect = PL_DEF(tex_vk->aspect, VK_IMAGE_ASPECT_COLOR_BIT); + + switch (pl_tex_params_dimension(*params)) { + case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break; + case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break; + case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break; + } + + if (fmt->emulated) { + tex_vk->texel_fmt = pl_find_fmt(gpu, fmt->type, 1, 0, + fmt->host_bits[0], + PL_FMT_CAP_TEXEL_UNIFORM); + if (!tex_vk->texel_fmt) { + PL_ERR(gpu, "Failed picking texel format for emulated texture!"); + goto error; + } + + // Our format emulation requires storage image support. In order to + // make a bunch of checks happy, just mark it off as storable (and also + // enable VK_IMAGE_USAGE_STORAGE_BIT, which we do below) + tex->params.storable = true; + } + + if (fmtp->blit_emulated) { + // Enable what's required for sampling + tex->params.sampleable = fmt->caps & PL_FMT_CAP_SAMPLEABLE; + tex->params.storable = true; + } + + // Blit emulation on planar textures requires storage + if ((params->blit_src || params->blit_dst) && tex_vk->num_planes) + tex->params.storable = true; + + VkImageUsageFlags usage = 0; + VkImageCreateFlags flags = 0; + if (tex->params.sampleable) + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + if (tex->params.renderable) + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + if (tex->params.storable) + usage |= VK_IMAGE_USAGE_STORAGE_BIT; + if (tex->params.host_readable || tex->params.blit_src) + usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + if (tex->params.host_writable || tex->params.blit_dst || params->initial_data) + usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + + if (!usage) { + // Vulkan requires images have at least *some* image usage set, but our + // API is perfectly happy with a (useless) image. So just put + // VK_IMAGE_USAGE_TRANSFER_DST_BIT since this harmless. + usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; + } + + if (tex_vk->num_planes) { + flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | + VK_IMAGE_CREATE_EXTENDED_USAGE_BIT; + } + + // FIXME: Since we can't keep track of queue family ownership properly, + // and we don't know in advance what types of queue families this image + // will belong to, we're forced to share all of our images between all + // command pools. + uint32_t qfs[3] = {0}; + pl_assert(vk->pools.num <= PL_ARRAY_SIZE(qfs)); + for (int i = 0; i < vk->pools.num; i++) + qfs[i] = vk->pools.elem[i]->qf; + + VkImageDrmFormatModifierExplicitCreateInfoEXT drm_explicit = { + .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT, + .drmFormatModifier = params->shared_mem.drm_format_mod, + .drmFormatModifierPlaneCount = 1, + .pPlaneLayouts = &(VkSubresourceLayout) { + .rowPitch = PL_DEF(params->shared_mem.stride_w, params->w), + .depthPitch = params->d ? PL_DEF(params->shared_mem.stride_h, params->h) : 0, + .offset = params->shared_mem.offset, + }, + }; + +#ifdef VK_EXT_metal_objects + VkImportMetalTextureInfoEXT import_metal_tex = { + .sType = VK_STRUCTURE_TYPE_IMPORT_METAL_TEXTURE_INFO_EXT, + .plane = VK_IMAGE_ASPECT_PLANE_0_BIT << params->shared_mem.plane, + }; + + VkImportMetalIOSurfaceInfoEXT import_iosurface = { + .sType = VK_STRUCTURE_TYPE_IMPORT_METAL_IO_SURFACE_INFO_EXT, + }; +#endif + + VkImageDrmFormatModifierListCreateInfoEXT drm_list = { + .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT, + .drmFormatModifierCount = fmt->num_modifiers, + .pDrmFormatModifiers = fmt->modifiers, + }; + + VkExternalMemoryImageCreateInfoKHR ext_info = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR, + .handleTypes = vk_handle_type, + }; + + VkImageCreateInfo iinfo = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = vk_handle_type ? &ext_info : NULL, + .imageType = tex_vk->type, + .format = tex_vk->img_fmt, + .extent = (VkExtent3D) { + .width = params->w, + .height = PL_MAX(1, params->h), + .depth = PL_MAX(1, params->d) + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = usage, + .flags = flags, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .sharingMode = vk->pools.num > 1 ? VK_SHARING_MODE_CONCURRENT + : VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = vk->pools.num, + .pQueueFamilyIndices = qfs, + }; + + struct vk_malloc_params mparams = { + .optimal = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + .export_handle = params->export_handle, + .import_handle = params->import_handle, + .shared_mem = params->shared_mem, + .debug_tag = params->debug_tag, + }; + + if (params->import_handle == PL_HANDLE_DMA_BUF) { + vk_link_struct(&iinfo, &drm_explicit); + iinfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; + mparams.shared_mem.offset = 0x0; // handled via plane offsets + } + +#ifdef VK_EXT_metal_objects + if (params->import_handle == PL_HANDLE_MTL_TEX) { + vk_link_struct(&iinfo, &import_metal_tex); + import_metal_tex.mtlTexture = params->shared_mem.handle.handle; + } + + if (params->import_handle == PL_HANDLE_IOSURFACE) { + vk_link_struct(&iinfo, &import_iosurface); + import_iosurface.ioSurface = params->shared_mem.handle.handle; + } +#endif + + if (params->export_handle == PL_HANDLE_DMA_BUF) { + pl_assert(drm_list.drmFormatModifierCount > 0); + vk_link_struct(&iinfo, &drm_list); + iinfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; + } + + // Double-check physical image format limits and fail if invalid + VkPhysicalDeviceImageDrmFormatModifierInfoEXT drm_pinfo = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, + .sharingMode = iinfo.sharingMode, + .queueFamilyIndexCount = iinfo.queueFamilyIndexCount, + .pQueueFamilyIndices = iinfo.pQueueFamilyIndices, + }; + + VkPhysicalDeviceExternalImageFormatInfoKHR ext_pinfo = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO_KHR, + .handleType = ext_info.handleTypes, + }; + + if (handle_type == PL_HANDLE_DMA_BUF) { + if (params->import_handle) { + // On import, we know exactly which format modifier to test + drm_pinfo.drmFormatModifier = drm_explicit.drmFormatModifier; + } else { + // On export, the choice of format modifier is ambiguous, because + // we offer the implementation a whole list to choose from. In + // principle, we must check *all* supported drm format modifiers, + // but in practice it should hopefully suffice to just check one + drm_pinfo.drmFormatModifier = drm_list.pDrmFormatModifiers[0]; + } + vk_link_struct(&ext_pinfo, &drm_pinfo); + } + + VkPhysicalDeviceImageFormatInfo2KHR pinfo = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR, + .pNext = vk_handle_type ? &ext_pinfo : NULL, + .format = iinfo.format, + .type = iinfo.imageType, + .tiling = iinfo.tiling, + .usage = iinfo.usage, + .flags = iinfo.flags, + }; + + VkExternalImageFormatPropertiesKHR ext_props = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR, + }; + + VkImageFormatProperties2KHR props = { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR, + .pNext = vk_handle_type ? &ext_props : NULL, + }; + + VkResult res; + res = vk->GetPhysicalDeviceImageFormatProperties2KHR(vk->physd, &pinfo, &props); + if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) { + PL_DEBUG(gpu, "Texture creation failed: not supported"); + goto error; + } else { + PL_VK_ASSERT(res, "Querying image format properties"); + } + + VkExtent3D max = props.imageFormatProperties.maxExtent; + if (params->w > max.width || params->h > max.height || params->d > max.depth) + { + PL_ERR(gpu, "Requested image size %dx%dx%d exceeds the maximum allowed " + "dimensions %dx%dx%d for vulkan image format %x", + params->w, params->h, params->d, max.width, max.height, max.depth, + (unsigned) iinfo.format); + goto error; + } + + // Ensure the handle type is supported + if (vk_handle_type) { + bool ok = vk_external_mem_check(vk, &ext_props.externalMemoryProperties, + handle_type, params->import_handle); + if (!ok) { + PL_ERR(gpu, "Requested handle type is not compatible with the " + "specified combination of image parameters. Possibly the " + "handle type is unsupported altogether?"); + goto error; + } + } + + VK(vk->CreateImage(vk->dev, &iinfo, PL_VK_ALLOC, &tex_vk->img)); + tex_vk->usage_flags = iinfo.usage; + + VkMemoryDedicatedRequirements ded_reqs = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR, + }; + + VkMemoryRequirements2 reqs = { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR, + .pNext = &ded_reqs, + }; + + VkImageMemoryRequirementsInfo2 req_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR, + .image = tex_vk->img, + }; + + vk->GetImageMemoryRequirements2(vk->dev, &req_info, &reqs); + mparams.reqs = reqs.memoryRequirements; + if (ded_reqs.prefersDedicatedAllocation) { + mparams.ded_image = tex_vk->img; + if (vk_mem_handle_type(params->import_handle)) + mparams.shared_mem.size = reqs.memoryRequirements.size; + } + + const char *debug_tag = params->debug_tag ? params->debug_tag : + params->import_handle ? "imported" : "created"; + + if (!params->import_handle || vk_mem_handle_type(params->import_handle)) { + struct vk_memslice *mem = &tex_vk->mem; + if (!vk_malloc_slice(vk->ma, mem, &mparams)) + goto error; + + VK(vk->BindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset)); + } + + static const char * const plane_names[4] = { + "plane 0", "plane 1", "plane 2", "plane 3", + }; + + if (tex_vk->num_planes) { + for (int i = 0; i < tex_vk->num_planes; i++) { + struct pl_tex_t *plane; + + pl_assert(tex_vk->type == VK_IMAGE_TYPE_2D); + plane = (struct pl_tex_t *) pl_vulkan_wrap(gpu, pl_vulkan_wrap_params( + .image = tex_vk->img, + .aspect = VK_IMAGE_ASPECT_PLANE_0_BIT << i, + .width = PL_RSHIFT_UP(tex->params.w, fmt->planes[i].shift_x), + .height = PL_RSHIFT_UP(tex->params.h, fmt->planes[i].shift_y), + .format = fmtp->vk_fmt->pfmt[i].fmt, + .usage = usage, + .user_data = params->user_data, + .debug_tag = PL_DEF(params->debug_tag, plane_names[i]), + )); + if (!plane) + goto error; + plane->parent = tex; + tex->planes[i] = plane; + tex_vk->planes[i] = PL_PRIV(plane); + tex_vk->planes[i]->held = false; + tex_vk->planes[i]->layout = tex_vk->layout; + } + + // Explicitly mask out all usage flags from planar parent images + pl_assert(!fmt->caps); + tex->params.sampleable = false; + tex->params.renderable = false; + tex->params.storable = false; + tex->params.blit_src = false; + tex->params.blit_dst = false; + tex->params.host_writable = false; + tex->params.host_readable = false; + } + + if (!vk_init_image(gpu, tex, debug_tag)) + goto error; + + if (params->export_handle) + tex->shared_mem = tex_vk->mem.shared_mem; + + if (params->export_handle == PL_HANDLE_DMA_BUF) { + if (vk->GetImageDrmFormatModifierPropertiesEXT) { + + // Query the DRM format modifier and plane layout from the driver + VkImageDrmFormatModifierPropertiesEXT mod_props = { + .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT, + }; + + VK(vk->GetImageDrmFormatModifierPropertiesEXT(vk->dev, tex_vk->img, &mod_props)); + tex->shared_mem.drm_format_mod = mod_props.drmFormatModifier; + + VkSubresourceLayout layout = {0}; + VkImageSubresource plane = { + .aspectMask = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT, + }; + + vk->GetImageSubresourceLayout(vk->dev, tex_vk->img, &plane, &layout); + if (layout.offset != 0) { + PL_ERR(gpu, "Exported DRM plane 0 has nonzero offset %zu, " + "this should never happen! Erroring for safety...", + (size_t) layout.offset); + goto error; + } + tex->shared_mem.stride_w = layout.rowPitch; + tex->shared_mem.stride_h = layout.depthPitch; + + } else { + + // Fallback for no modifiers, just do something stupid. + tex->shared_mem.drm_format_mod = DRM_FORMAT_MOD_INVALID; + tex->shared_mem.stride_w = params->w; + tex->shared_mem.stride_h = params->h; + + } + } + + if (params->initial_data) { + struct pl_tex_transfer_params ul_params = { + .tex = tex, + .ptr = (void *) params->initial_data, + .rc = { 0, 0, 0, params->w, params->h, params->d }, + }; + + // Since we re-use GPU helpers which require writable images, just fake it + bool writable = tex->params.host_writable; + tex->params.host_writable = true; + if (!pl_tex_upload(gpu, &ul_params)) + goto error; + tex->params.host_writable = writable; + } + + return tex; + +error: + vk_tex_destroy(gpu, tex); + return NULL; +} + +void vk_tex_invalidate(pl_gpu gpu, pl_tex tex) +{ + struct pl_tex_vk *tex_vk = PL_PRIV(tex); + tex_vk->may_invalidate = true; + for (int i = 0; i < tex_vk->num_planes; i++) + tex_vk->planes[i]->may_invalidate = true; +} + +static bool tex_clear_fallback(pl_gpu gpu, pl_tex tex, + const union pl_clear_color color) +{ + pl_tex pixel = pl_tex_create(gpu, pl_tex_params( + .w = 1, + .h = 1, + .format = tex->params.format, + .storable = true, + .blit_src = true, + .blit_dst = true, + )); + if (!pixel) + return false; + + pl_tex_clear_ex(gpu, pixel, color); + + pl_assert(tex->params.storable); + pl_tex_blit(gpu, pl_tex_blit_params( + .src = pixel, + .dst = tex, + .sample_mode = PL_TEX_SAMPLE_NEAREST, + )); + + pl_tex_destroy(gpu, &pixel); + return true; +} + +void vk_tex_clear_ex(pl_gpu gpu, pl_tex tex, const union pl_clear_color color) +{ + struct pl_vk *p = PL_PRIV(gpu); + struct vk_ctx *vk = p->vk; + struct pl_tex_vk *tex_vk = PL_PRIV(tex); + + if (tex_vk->aspect != VK_IMAGE_ASPECT_COLOR_BIT) { + if (!tex_clear_fallback(gpu, tex, color)) { + PL_ERR(gpu, "Failed clearing imported planar image: color aspect " + "clears disallowed by spec and no shader fallback " + "available"); + } + return; + } + + struct vk_cmd *cmd = CMD_BEGIN(GRAPHICS); + if (!cmd) + return; + + vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_CLEAR_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + + pl_static_assert(sizeof(VkClearColorValue) == sizeof(union pl_clear_color)); + const VkClearColorValue *clearColor = (const VkClearColorValue *) &color; + + pl_assert(tex_vk->aspect == VK_IMAGE_ASPECT_COLOR_BIT); + static const VkImageSubresourceRange range = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .levelCount = 1, + .layerCount = 1, + }; + + vk->CmdClearColorImage(cmd->buf, tex_vk->img, tex_vk->layout, + clearColor, 1, &range); + + CMD_FINISH(&cmd); +} + +void vk_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params) +{ + struct pl_vk *p = PL_PRIV(gpu); + struct vk_ctx *vk = p->vk; + struct pl_tex_vk *src_vk = PL_PRIV(params->src); + struct pl_tex_vk *dst_vk = PL_PRIV(params->dst); + struct pl_fmt_vk *src_fmtp = PL_PRIV(params->src->params.format); + struct pl_fmt_vk *dst_fmtp = PL_PRIV(params->dst->params.format); + bool blit_emulated = src_fmtp->blit_emulated || dst_fmtp->blit_emulated; + bool planar_fallback = src_vk->aspect != VK_IMAGE_ASPECT_COLOR_BIT || + dst_vk->aspect != VK_IMAGE_ASPECT_COLOR_BIT; + + pl_rect3d src_rc = params->src_rc, dst_rc = params->dst_rc; + bool requires_scaling = !pl_rect3d_eq(src_rc, dst_rc); + if ((requires_scaling && blit_emulated) || planar_fallback) { + if (!pl_tex_blit_compute(gpu, params)) + PL_ERR(gpu, "Failed emulating texture blit, incompatible textures?"); + return; + } + + struct vk_cmd *cmd = CMD_BEGIN(GRAPHICS); + if (!cmd) + return; + + // When the blit operation doesn't require scaling, we can use the more + // efficient vkCmdCopyImage instead of vkCmdBlitImage + if (!requires_scaling) { + vk_tex_barrier(gpu, cmd, params->src, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + + vk_tex_barrier(gpu, cmd, params->dst, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + + pl_rect3d_normalize(&src_rc); + + VkImageCopy region = { + .srcSubresource = { + .aspectMask = src_vk->aspect, + .layerCount = 1, + }, + .dstSubresource = { + .aspectMask = dst_vk->aspect, + .layerCount = 1, + }, + .srcOffset = {src_rc.x0, src_rc.y0, src_rc.z0}, + .dstOffset = {src_rc.x0, src_rc.y0, src_rc.z0}, + .extent = { + pl_rect_w(src_rc), + pl_rect_h(src_rc), + pl_rect_d(src_rc), + }, + }; + + vk->CmdCopyImage(cmd->buf, src_vk->img, src_vk->layout, + dst_vk->img, dst_vk->layout, 1, ®ion); + } else { + vk_tex_barrier(gpu, cmd, params->src, VK_PIPELINE_STAGE_2_BLIT_BIT, + VK_ACCESS_2_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + + vk_tex_barrier(gpu, cmd, params->dst, VK_PIPELINE_STAGE_2_BLIT_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + + VkImageBlit region = { + .srcSubresource = { + .aspectMask = src_vk->aspect, + .layerCount = 1, + }, + .dstSubresource = { + .aspectMask = dst_vk->aspect, + .layerCount = 1, + }, + .srcOffsets = {{src_rc.x0, src_rc.y0, src_rc.z0}, + {src_rc.x1, src_rc.y1, src_rc.z1}}, + .dstOffsets = {{dst_rc.x0, dst_rc.y0, dst_rc.z0}, + {dst_rc.x1, dst_rc.y1, dst_rc.z1}}, + }; + + static const VkFilter filters[PL_TEX_SAMPLE_MODE_COUNT] = { + [PL_TEX_SAMPLE_NEAREST] = VK_FILTER_NEAREST, + [PL_TEX_SAMPLE_LINEAR] = VK_FILTER_LINEAR, + }; + + vk->CmdBlitImage(cmd->buf, src_vk->img, src_vk->layout, + dst_vk->img, dst_vk->layout, 1, ®ion, + filters[params->sample_mode]); + } + + CMD_FINISH(&cmd); +} + +// Determine the best queue type to perform a buffer<->image copy on +static enum queue_type vk_img_copy_queue(pl_gpu gpu, pl_tex tex, + const struct VkBufferImageCopy *region) +{ + struct pl_vk *p = PL_PRIV(gpu); + struct vk_ctx *vk = p->vk; + + const struct pl_tex_vk *tex_vk = PL_PRIV(tex); + enum queue_type queue = tex_vk->transfer_queue; + if (queue != TRANSFER) + return queue; + + VkExtent3D alignment = vk->pool_transfer->props.minImageTransferGranularity; + + enum queue_type fallback = GRAPHICS; + if (gpu->limits.compute_queues > gpu->limits.fragment_queues) + fallback = COMPUTE; // prefer async compute queue + + int tex_w = PL_DEF(tex->params.w, 1), + tex_h = PL_DEF(tex->params.h, 1), + tex_d = PL_DEF(tex->params.d, 1); + + bool full_w = region->imageOffset.x + region->imageExtent.width == tex_w, + full_h = region->imageOffset.y + region->imageExtent.height == tex_h, + full_d = region->imageOffset.z + region->imageExtent.depth == tex_d; + + if (alignment.width) { + + bool unaligned = false; + unaligned |= region->imageOffset.x % alignment.width; + unaligned |= region->imageOffset.y % alignment.height; + unaligned |= region->imageOffset.z % alignment.depth; + unaligned |= (region->imageExtent.width % alignment.width) && !full_w; + unaligned |= (region->imageExtent.height % alignment.height) && !full_h; + unaligned |= (region->imageExtent.depth % alignment.depth) && !full_d; + + return unaligned ? fallback : queue; + + } else { + + // an alignment of {0} means the copy must span the entire image + bool unaligned = false; + unaligned |= region->imageOffset.x || !full_w; + unaligned |= region->imageOffset.y || !full_h; + unaligned |= region->imageOffset.z || !full_d; + + return unaligned ? fallback : queue; + + } +} + +static void tex_xfer_cb(void *ctx, void *arg) +{ + void (*fun)(void *priv) = ctx; + fun(arg); +} + +bool vk_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) +{ + struct pl_vk *p = PL_PRIV(gpu); + struct vk_ctx *vk = p->vk; + pl_tex tex = params->tex; + pl_fmt fmt = tex->params.format; + struct pl_tex_vk *tex_vk = PL_PRIV(tex); + struct pl_tex_transfer_params *slices = NULL; + int num_slices = 0; + + if (!params->buf) + return pl_tex_upload_pbo(gpu, params); + + pl_buf buf = params->buf; + struct pl_buf_vk *buf_vk = PL_PRIV(buf); + pl_rect3d rc = params->rc; + const size_t size = pl_tex_transfer_size(params); + const size_t buf_offset = buf_vk->mem.offset + params->buf_offset; + bool unaligned = buf_offset % fmt->texel_size; + if (unaligned) + PL_TRACE(gpu, "vk_tex_upload: unaligned transfer (slow path)"); + + if (fmt->emulated || unaligned) { + + // Create all slice buffers first, to early-fail if OOM, and to avoid + // blocking unnecessarily on waiting for these buffers to get read from + num_slices = pl_tex_transfer_slices(gpu, tex_vk->texel_fmt, params, &slices); + for (int i = 0; i < num_slices; i++) { + slices[i].buf = pl_buf_create(gpu, pl_buf_params( + .memory_type = PL_BUF_MEM_DEVICE, + .format = tex_vk->texel_fmt, + .size = pl_tex_transfer_size(&slices[i]), + .storable = fmt->emulated, + )); + + if (!slices[i].buf) { + PL_ERR(gpu, "Failed creating buffer for tex upload fallback!"); + num_slices = i; // only clean up buffers up to here + goto error; + } + } + + // All temporary buffers successfully created, begin copying source data + struct vk_cmd *cmd = CMD_BEGIN_TIMED(tex_vk->transfer_queue, + params->timer); + if (!cmd) + goto error; + + vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_READ_BIT, params->buf_offset, size, + false); + + for (int i = 0; i < num_slices; i++) { + pl_buf slice = slices[i].buf; + struct pl_buf_vk *slice_vk = PL_PRIV(slice); + vk_buf_barrier(gpu, cmd, slice, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, 0, slice->params.size, + false); + + vk->CmdCopyBuffer(cmd->buf, buf_vk->mem.buf, slice_vk->mem.buf, 1, &(VkBufferCopy) { + .srcOffset = buf_vk->mem.offset + slices[i].buf_offset, + .dstOffset = slice_vk->mem.offset, + .size = slice->params.size, + }); + } + + if (params->callback) + vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv); + + bool ok = CMD_FINISH(&cmd); + + // Finally, dispatch the (texel) upload asynchronously. We can fire + // the callback already at the completion of previous command because + // these temporary buffers already hold persistent copies of the data + for (int i = 0; i < num_slices; i++) { + if (ok) { + slices[i].buf_offset = 0; + ok = fmt->emulated ? pl_tex_upload_texel(gpu, &slices[i]) + : pl_tex_upload(gpu, &slices[i]); + } + pl_buf_destroy(gpu, &slices[i].buf); + } + + pl_free(slices); + return ok; + + } else { + + pl_assert(fmt->texel_align == fmt->texel_size); + const VkBufferImageCopy region = { + .bufferOffset = buf_offset, + .bufferRowLength = params->row_pitch / fmt->texel_size, + .bufferImageHeight = params->depth_pitch / params->row_pitch, + .imageOffset = { rc.x0, rc.y0, rc.z0 }, + .imageExtent = { rc.x1, rc.y1, rc.z1 }, + .imageSubresource = { + .aspectMask = tex_vk->aspect, + .layerCount = 1, + }, + }; + + enum queue_type queue = vk_img_copy_queue(gpu, tex, ®ion); + struct vk_cmd *cmd = CMD_BEGIN_TIMED(queue, params->timer); + if (!cmd) + goto error; + + vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_READ_BIT, params->buf_offset, size, + false); + vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + vk->CmdCopyBufferToImage(cmd->buf, buf_vk->mem.buf, tex_vk->img, + tex_vk->layout, 1, ®ion); + + if (params->callback) + vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv); + + return CMD_FINISH(&cmd); + } + + pl_unreachable(); + +error: + for (int i = 0; i < num_slices; i++) + pl_buf_destroy(gpu, &slices[i].buf); + pl_free(slices); + return false; +} + +bool vk_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) +{ + struct pl_vk *p = PL_PRIV(gpu); + struct vk_ctx *vk = p->vk; + pl_tex tex = params->tex; + pl_fmt fmt = tex->params.format; + struct pl_tex_vk *tex_vk = PL_PRIV(tex); + struct pl_tex_transfer_params *slices = NULL; + int num_slices = 0; + + if (!params->buf) + return pl_tex_download_pbo(gpu, params); + + pl_buf buf = params->buf; + struct pl_buf_vk *buf_vk = PL_PRIV(buf); + pl_rect3d rc = params->rc; + const size_t size = pl_tex_transfer_size(params); + const size_t buf_offset = buf_vk->mem.offset + params->buf_offset; + bool unaligned = buf_offset % fmt->texel_size; + if (unaligned) + PL_TRACE(gpu, "vk_tex_download: unaligned transfer (slow path)"); + + if (fmt->emulated || unaligned) { + + num_slices = pl_tex_transfer_slices(gpu, tex_vk->texel_fmt, params, &slices); + for (int i = 0; i < num_slices; i++) { + slices[i].buf = pl_buf_create(gpu, pl_buf_params( + .memory_type = PL_BUF_MEM_DEVICE, + .format = tex_vk->texel_fmt, + .size = pl_tex_transfer_size(&slices[i]), + .storable = fmt->emulated, + )); + + if (!slices[i].buf) { + PL_ERR(gpu, "Failed creating buffer for tex download fallback!"); + num_slices = i; + goto error; + } + } + + for (int i = 0; i < num_slices; i++) { + // Restore buffer offset after downloading into temporary buffer, + // because we still need to copy the data from the temporary buffer + // into this offset in the original buffer + const size_t tmp_offset = slices[i].buf_offset; + slices[i].buf_offset = 0; + bool ok = fmt->emulated ? pl_tex_download_texel(gpu, &slices[i]) + : pl_tex_download(gpu, &slices[i]); + slices[i].buf_offset = tmp_offset; + if (!ok) + goto error; + } + + // Finally, download into the user buffer + struct vk_cmd *cmd = CMD_BEGIN_TIMED(tex_vk->transfer_queue, params->timer); + if (!cmd) + goto error; + + vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, params->buf_offset, size, + false); + + for (int i = 0; i < num_slices; i++) { + pl_buf slice = slices[i].buf; + struct pl_buf_vk *slice_vk = PL_PRIV(slice); + vk_buf_barrier(gpu, cmd, slice, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_READ_BIT, 0, slice->params.size, + false); + + vk->CmdCopyBuffer(cmd->buf, slice_vk->mem.buf, buf_vk->mem.buf, 1, &(VkBufferCopy) { + .srcOffset = slice_vk->mem.offset, + .dstOffset = buf_vk->mem.offset + slices[i].buf_offset, + .size = slice->params.size, + }); + + pl_buf_destroy(gpu, &slices[i].buf); + } + + vk_buf_flush(gpu, cmd, buf, params->buf_offset, size); + + if (params->callback) + vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv); + + pl_free(slices); + return CMD_FINISH(&cmd); + + } else { + + pl_assert(params->row_pitch % fmt->texel_size == 0); + pl_assert(params->depth_pitch % params->row_pitch == 0); + const VkBufferImageCopy region = { + .bufferOffset = buf_offset, + .bufferRowLength = params->row_pitch / fmt->texel_size, + .bufferImageHeight = params->depth_pitch / params->row_pitch, + .imageOffset = { rc.x0, rc.y0, rc.z0 }, + .imageExtent = { rc.x1, rc.y1, rc.z1 }, + .imageSubresource = { + .aspectMask = tex_vk->aspect, + .layerCount = 1, + }, + }; + + enum queue_type queue = vk_img_copy_queue(gpu, tex, ®ion); + + struct vk_cmd *cmd = CMD_BEGIN_TIMED(queue, params->timer); + if (!cmd) + goto error; + + vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, params->buf_offset, size, + false); + vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + vk->CmdCopyImageToBuffer(cmd->buf, tex_vk->img, tex_vk->layout, + buf_vk->mem.buf, 1, ®ion); + vk_buf_flush(gpu, cmd, buf, params->buf_offset, size); + + if (params->callback) + vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv); + + return CMD_FINISH(&cmd); + } + + pl_unreachable(); + +error: + for (int i = 0; i < num_slices; i++) + pl_buf_destroy(gpu, &slices[i].buf); + pl_free(slices); + return false; +} + +bool vk_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t timeout) +{ + struct pl_vk *p = PL_PRIV(gpu); + struct vk_ctx *vk = p->vk; + struct pl_tex_vk *tex_vk = PL_PRIV(tex); + + // Opportunistically check if we can re-use this texture without flush + vk_poll_commands(vk, 0); + if (pl_rc_count(&tex_vk->rc) == 1) + goto skip_blocking; + + // Otherwise, we're force to submit any queued command so that the user is + // guaranteed to see progress eventually, even if they call this in a loop + CMD_SUBMIT(NULL); + vk_poll_commands(vk, timeout); + if (pl_rc_count(&tex_vk->rc) > 1) + return true; + + // fall through +skip_blocking: + for (int i = 0; i < tex_vk->num_planes; i++) { + if (vk_tex_poll(gpu, tex->planes[i], timeout)) + return true; + } + + return false; +} + +bool vk_tex_export(pl_gpu gpu, pl_tex tex, pl_sync sync) +{ + struct pl_tex_vk *tex_vk = PL_PRIV(tex); + struct pl_sync_vk *sync_vk = PL_PRIV(sync); + + if (tex_vk->num_planes) { + PL_ERR(gpu, "`pl_tex_export` cannot be called on planar textures." + "Please see `pl_vulkan_hold_ex` for a replacement."); + return false; + } + + struct vk_cmd *cmd = CMD_BEGIN(ANY); + if (!cmd) + goto error; + + vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_NONE, + 0, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_EXTERNAL); + + // Make the next barrier appear as though coming from a different queue + tex_vk->sem.write.queue = tex_vk->sem.read.queue = NULL; + + vk_cmd_sig(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, (pl_vulkan_sem){ sync_vk->wait }); + if (!CMD_SUBMIT(&cmd)) + goto error; + + // Remember the other dependency and hold on to the sync object + PL_ARRAY_APPEND(tex, tex_vk->ext_deps, (pl_vulkan_sem){ sync_vk->signal }); + pl_rc_ref(&sync_vk->rc); + tex_vk->ext_sync = sync; + tex_vk->qf = VK_QUEUE_FAMILY_EXTERNAL; + return true; + +error: + PL_ERR(gpu, "Failed exporting shared texture!"); + return false; +} + +pl_tex pl_vulkan_wrap(pl_gpu gpu, const struct pl_vulkan_wrap_params *params) +{ + pl_fmt fmt = NULL; + for (int i = 0; i < gpu->num_formats; i++) { + const struct vk_format **vkfmt = PL_PRIV(gpu->formats[i]); + if ((*vkfmt)->tfmt == params->format) { + fmt = gpu->formats[i]; + break; + } + } + + if (!fmt) { + PL_ERR(gpu, "Could not find pl_fmt suitable for wrapped image " + "with format %s", vk_fmt_name(params->format)); + return NULL; + } + + VkImageUsageFlags usage = params->usage; + if (fmt->num_planes) + usage = 0; // mask capabilities from the base texture + + struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_vk); + tex->params = (struct pl_tex_params) { + .format = fmt, + .w = params->width, + .h = params->height, + .d = params->depth, + .sampleable = !!(usage & VK_IMAGE_USAGE_SAMPLED_BIT), + .renderable = !!(usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT), + .storable = !!(usage & VK_IMAGE_USAGE_STORAGE_BIT), + .blit_src = !!(usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT), + .blit_dst = !!(usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT), + .host_writable = !!(usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT), + .host_readable = !!(usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT), + .user_data = params->user_data, + .debug_tag = params->debug_tag, + }; + + // Mask out capabilities not permitted by the `pl_fmt` +#define MASK(field, cap) \ + do { \ + if (tex->params.field && !(fmt->caps & cap)) { \ + PL_WARN(gpu, "Masking `" #field "` from wrapped texture because " \ + "the corresponding format '%s' does not support " #cap, \ + fmt->name); \ + tex->params.field = false; \ + } \ + } while (0) + + MASK(sampleable, PL_FMT_CAP_SAMPLEABLE); + MASK(renderable, PL_FMT_CAP_RENDERABLE); + MASK(storable, PL_FMT_CAP_STORABLE); + MASK(blit_src, PL_FMT_CAP_BLITTABLE); + MASK(blit_dst, PL_FMT_CAP_BLITTABLE); + MASK(host_readable, PL_FMT_CAP_HOST_READABLE); +#undef MASK + + // For simplicity, explicitly mask out blit emulation for wrapped textures + struct pl_fmt_vk *fmtp = PL_PRIV(fmt); + if (fmtp->blit_emulated) { + tex->params.blit_src = false; + tex->params.blit_dst = false; + } + + struct pl_tex_vk *tex_vk = PL_PRIV(tex); + switch (pl_tex_params_dimension(tex->params)) { + case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break; + case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break; + case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break; + } + tex_vk->external_img = true; + tex_vk->held = !fmt->num_planes; + tex_vk->img = params->image; + tex_vk->img_fmt = params->format; + tex_vk->num_planes = fmt->num_planes; + tex_vk->usage_flags = usage; + tex_vk->aspect = params->aspect; + + if (!tex_vk->aspect) { + for (int i = 0; i < tex_vk->num_planes; i++) + tex_vk->aspect |= VK_IMAGE_ASPECT_PLANE_0_BIT << i; + tex_vk->aspect = PL_DEF(tex_vk->aspect, VK_IMAGE_ASPECT_COLOR_BIT); + } + + // Blitting to planar images requires fallback via compute shaders + if (tex_vk->aspect != VK_IMAGE_ASPECT_COLOR_BIT) { + tex->params.blit_src &= tex->params.storable; + tex->params.blit_dst &= tex->params.storable; + } + + static const char * const wrapped_plane_names[4] = { + "wrapped plane 0", "wrapped plane 1", "wrapped plane 2", "wrapped plane 3", + }; + + for (int i = 0; i < tex_vk->num_planes; i++) { + struct pl_tex_t *plane; + VkImageAspectFlags aspect = VK_IMAGE_ASPECT_PLANE_0_BIT << i; + if (!(aspect & tex_vk->aspect)) { + PL_INFO(gpu, "Not wrapping plane %d due to aspect bit 0x%x not " + "being contained in supplied params->aspect 0x%x!", + i, (unsigned) aspect, (unsigned) tex_vk->aspect); + continue; + } + + pl_assert(tex_vk->type == VK_IMAGE_TYPE_2D); + plane = (struct pl_tex_t *) pl_vulkan_wrap(gpu, pl_vulkan_wrap_params( + .image = tex_vk->img, + .aspect = aspect, + .width = PL_RSHIFT_UP(tex->params.w, fmt->planes[i].shift_x), + .height = PL_RSHIFT_UP(tex->params.h, fmt->planes[i].shift_y), + .format = fmtp->vk_fmt->pfmt[i].fmt, + .usage = params->usage, + .user_data = params->user_data, + .debug_tag = PL_DEF(params->debug_tag, wrapped_plane_names[i]), + )); + if (!plane) + goto error; + plane->parent = tex; + tex->planes[i] = plane; + tex_vk->planes[i] = PL_PRIV(plane); + } + + if (!vk_init_image(gpu, tex, PL_DEF(params->debug_tag, "wrapped"))) + goto error; + + return tex; + +error: + vk_tex_destroy(gpu, tex); + return NULL; +} + +VkImage pl_vulkan_unwrap(pl_gpu gpu, pl_tex tex, VkFormat *out_format, + VkImageUsageFlags *out_flags) +{ + struct pl_tex_vk *tex_vk = PL_PRIV(tex); + + if (out_format) + *out_format = tex_vk->img_fmt; + if (out_flags) + *out_flags = tex_vk->usage_flags; + + return tex_vk->img; +} + +bool pl_vulkan_hold_ex(pl_gpu gpu, const struct pl_vulkan_hold_params *params) +{ + struct pl_tex_vk *tex_vk = PL_PRIV(params->tex); + pl_assert(params->semaphore.sem); + + bool held = tex_vk->held; + for (int i = 0; i < tex_vk->num_planes; i++) + held |= tex_vk->planes[i]->held; + + if (held) { + PL_ERR(gpu, "Attempting to hold an already held image!"); + return false; + } + + struct vk_cmd *cmd = CMD_BEGIN(GRAPHICS); + if (!cmd) { + PL_ERR(gpu, "Failed holding external image!"); + return false; + } + + VkImageLayout layout = params->layout; + if (params->out_layout) { + // For planar images, arbitrarily pick the current image layout of the + // first plane. This should be fine in practice, since all planes will + // share the same usage capabilities. + if (tex_vk->num_planes) { + layout = tex_vk->planes[0]->layout; + } else { + layout = tex_vk->layout; + } + } + + bool may_invalidate = true; + if (!tex_vk->num_planes) { + may_invalidate &= tex_vk->may_invalidate; + vk_tex_barrier(gpu, cmd, params->tex, VK_PIPELINE_STAGE_2_NONE, + 0, layout, params->qf); + } + + for (int i = 0; i < tex_vk->num_planes; i++) { + may_invalidate &= tex_vk->planes[i]->may_invalidate; + vk_tex_barrier(gpu, cmd, params->tex->planes[i], + VK_PIPELINE_STAGE_2_NONE, 0, layout, params->qf); + } + + vk_cmd_sig(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, params->semaphore); + bool ok = CMD_SUBMIT(&cmd); + + if (!tex_vk->num_planes) { + tex_vk->sem.write.queue = tex_vk->sem.read.queue = NULL; + tex_vk->held = ok; + } + + for (int i = 0; i < tex_vk->num_planes; i++) { + struct pl_tex_vk *plane_vk = tex_vk->planes[i]; + plane_vk->sem.write.queue = plane_vk->sem.read.queue = NULL; + plane_vk->held = ok; + } + + if (ok && params->out_layout) + *params->out_layout = may_invalidate ? VK_IMAGE_LAYOUT_UNDEFINED : layout; + + return ok; +} + +void pl_vulkan_release_ex(pl_gpu gpu, const struct pl_vulkan_release_params *params) +{ + struct pl_tex_vk *tex_vk = PL_PRIV(params->tex); + if (tex_vk->num_planes) { + struct pl_vulkan_release_params plane_pars = *params; + for (int i = 0; i < tex_vk->num_planes; i++) { + plane_pars.tex = params->tex->planes[i]; + pl_vulkan_release_ex(gpu, &plane_pars); + } + return; + } + + if (!tex_vk->held) { + PL_ERR(gpu, "Attempting to release an unheld image?"); + return; + } + + if (params->semaphore.sem) + PL_ARRAY_APPEND(params->tex, tex_vk->ext_deps, params->semaphore); + + tex_vk->qf = params->qf; + tex_vk->layout = params->layout; + tex_vk->held = false; +} + +bool pl_vulkan_hold(pl_gpu gpu, pl_tex tex, VkImageLayout layout, + pl_vulkan_sem sem_out) +{ + return pl_vulkan_hold_ex(gpu, pl_vulkan_hold_params( + .tex = tex, + .layout = layout, + .semaphore = sem_out, + .qf = VK_QUEUE_FAMILY_IGNORED, + )); +} + +bool pl_vulkan_hold_raw(pl_gpu gpu, pl_tex tex, + VkImageLayout *out_layout, + pl_vulkan_sem sem_out) +{ + return pl_vulkan_hold_ex(gpu, pl_vulkan_hold_params( + .tex = tex, + .out_layout = out_layout, + .semaphore = sem_out, + .qf = VK_QUEUE_FAMILY_IGNORED, + )); +} + +void pl_vulkan_release(pl_gpu gpu, pl_tex tex, VkImageLayout layout, + pl_vulkan_sem sem_in) +{ + pl_vulkan_release_ex(gpu, pl_vulkan_release_params( + .tex = tex, + .layout = layout, + .semaphore = sem_in, + .qf = VK_QUEUE_FAMILY_IGNORED, + )); +} |