summaryrefslogtreecommitdiffstats
path: root/src/vulkan/gpu_tex.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/vulkan/gpu_tex.c')
-rw-r--r--src/vulkan/gpu_tex.c1453
1 files changed, 1453 insertions, 0 deletions
diff --git a/src/vulkan/gpu_tex.c b/src/vulkan/gpu_tex.c
new file mode 100644
index 0000000..7ab83b7
--- /dev/null
+++ b/src/vulkan/gpu_tex.c
@@ -0,0 +1,1453 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "gpu.h"
+
+void vk_tex_barrier(pl_gpu gpu, struct vk_cmd *cmd, pl_tex tex,
+ VkPipelineStageFlags2 stage, VkAccessFlags2 access,
+ VkImageLayout layout, uint32_t qf)
+{
+ struct pl_vk *p = PL_PRIV(gpu);
+ struct vk_ctx *vk = p->vk;
+ struct pl_tex_vk *tex_vk = PL_PRIV(tex);
+ pl_rc_ref(&tex_vk->rc);
+ pl_assert(!tex_vk->held);
+ pl_assert(!tex_vk->num_planes);
+
+ // CONCURRENT images require transitioning to/from IGNORED, EXCLUSIVE
+ // images require transitioning to/from the concrete QF index
+ if (vk->pools.num == 1) {
+ if (tex_vk->qf == VK_QUEUE_FAMILY_IGNORED)
+ tex_vk->qf = cmd->pool->qf;
+ if (qf == VK_QUEUE_FAMILY_IGNORED)
+ qf = cmd->pool->qf;
+ }
+
+ struct vk_sync_scope last;
+ bool is_trans = layout != tex_vk->layout, is_xfer = qf != tex_vk->qf;
+ last = vk_sem_barrier(cmd, &tex_vk->sem, stage, access, is_trans || is_xfer);
+
+ VkImageMemoryBarrier2 barr = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ .srcStageMask = last.stage,
+ .srcAccessMask = last.access,
+ .dstStageMask = stage,
+ .dstAccessMask = access,
+ .oldLayout = tex_vk->layout,
+ .newLayout = layout,
+ .srcQueueFamilyIndex = tex_vk->qf,
+ .dstQueueFamilyIndex = qf,
+ .image = tex_vk->img,
+ .subresourceRange = {
+ .aspectMask = tex_vk->aspect,
+ .levelCount = 1,
+ .layerCount = 1,
+ },
+ };
+
+ if (tex_vk->may_invalidate) {
+ tex_vk->may_invalidate = false;
+ barr.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+ }
+
+ if (last.access || is_trans || is_xfer) {
+ vk_cmd_barrier(cmd, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .imageMemoryBarrierCount = 1,
+ .pImageMemoryBarriers = &barr,
+ });
+ }
+
+ tex_vk->qf = qf;
+ tex_vk->layout = layout;
+ vk_cmd_callback(cmd, (vk_cb) vk_tex_deref, gpu, tex);
+
+ for (int i = 0; i < tex_vk->ext_deps.num; i++)
+ vk_cmd_dep(cmd, stage, tex_vk->ext_deps.elem[i]);
+ tex_vk->ext_deps.num = 0;
+
+ if (tex_vk->ext_sync) {
+ vk_cmd_callback(cmd, (vk_cb) vk_sync_deref, gpu, tex_vk->ext_sync);
+ tex_vk->ext_sync = NULL;
+ }
+}
+
+static void vk_tex_destroy(pl_gpu gpu, struct pl_tex_t *tex)
+{
+ if (!tex)
+ return;
+
+ struct pl_vk *p = PL_PRIV(gpu);
+ struct vk_ctx *vk = p->vk;
+ struct pl_tex_vk *tex_vk = PL_PRIV(tex);
+
+ vk_sync_deref(gpu, tex_vk->ext_sync);
+ vk->DestroyFramebuffer(vk->dev, tex_vk->framebuffer, PL_VK_ALLOC);
+ vk->DestroyImageView(vk->dev, tex_vk->view, PL_VK_ALLOC);
+ for (int i = 0; i < tex_vk->num_planes; i++)
+ vk_tex_deref(gpu, tex->planes[i]);
+ if (!tex_vk->external_img) {
+ vk->DestroyImage(vk->dev, tex_vk->img, PL_VK_ALLOC);
+ vk_malloc_free(vk->ma, &tex_vk->mem);
+ }
+
+ pl_free(tex);
+}
+
+void vk_tex_deref(pl_gpu gpu, pl_tex tex)
+{
+ if (!tex)
+ return;
+
+ struct pl_tex_vk *tex_vk = PL_PRIV(tex);
+ if (pl_rc_deref(&tex_vk->rc))
+ vk_tex_destroy(gpu, (struct pl_tex_t *) tex);
+}
+
+
+// Initializes non-VkImage values like the image view, framebuffers, etc.
+static bool vk_init_image(pl_gpu gpu, pl_tex tex, pl_debug_tag debug_tag)
+{
+ struct pl_vk *p = PL_PRIV(gpu);
+ struct vk_ctx *vk = p->vk;
+
+ const struct pl_tex_params *params = &tex->params;
+ struct pl_tex_vk *tex_vk = PL_PRIV(tex);
+ pl_assert(tex_vk->img);
+ PL_VK_NAME(IMAGE, tex_vk->img, debug_tag);
+ pl_rc_init(&tex_vk->rc);
+ if (tex_vk->num_planes)
+ return true;
+ tex_vk->layout = VK_IMAGE_LAYOUT_UNDEFINED;
+ tex_vk->transfer_queue = GRAPHICS;
+ tex_vk->qf = VK_QUEUE_FAMILY_IGNORED; // will be set on first use, if needed
+
+ // Always use the transfer pool if available, for efficiency
+ if ((params->host_writable || params->host_readable) && vk->pool_transfer)
+ tex_vk->transfer_queue = TRANSFER;
+
+ // For emulated formats: force usage of the compute queue, because we
+ // can't properly track cross-queue dependencies for buffers (yet?)
+ if (params->format->emulated)
+ tex_vk->transfer_queue = COMPUTE;
+
+ bool ret = false;
+ VkRenderPass dummyPass = VK_NULL_HANDLE;
+
+ if (params->sampleable || params->renderable || params->storable) {
+ static const VkImageViewType viewType[] = {
+ [VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D,
+ [VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D,
+ [VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D,
+ };
+
+ const VkImageViewCreateInfo vinfo = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = tex_vk->img,
+ .viewType = viewType[tex_vk->type],
+ .format = tex_vk->img_fmt,
+ .subresourceRange = {
+ .aspectMask = tex_vk->aspect,
+ .levelCount = 1,
+ .layerCount = 1,
+ },
+ };
+
+ VK(vk->CreateImageView(vk->dev, &vinfo, PL_VK_ALLOC, &tex_vk->view));
+ PL_VK_NAME(IMAGE_VIEW, tex_vk->view, debug_tag);
+ }
+
+ if (params->renderable) {
+ // Framebuffers need to be created against a specific render pass
+ // layout, so we need to temporarily create a skeleton/dummy render
+ // pass for vulkan to figure out the compatibility
+ VkRenderPassCreateInfo rinfo = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &(VkAttachmentDescription) {
+ .format = tex_vk->img_fmt,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ },
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ },
+ },
+ };
+
+ VK(vk->CreateRenderPass(vk->dev, &rinfo, PL_VK_ALLOC, &dummyPass));
+
+ VkFramebufferCreateInfo finfo = {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .renderPass = dummyPass,
+ .attachmentCount = 1,
+ .pAttachments = &tex_vk->view,
+ .width = tex->params.w,
+ .height = tex->params.h,
+ .layers = 1,
+ };
+
+ if (finfo.width > vk->props.limits.maxFramebufferWidth ||
+ finfo.height > vk->props.limits.maxFramebufferHeight)
+ {
+ PL_ERR(gpu, "Framebuffer of size %dx%d exceeds the maximum allowed "
+ "dimensions: %dx%d", finfo.width, finfo.height,
+ vk->props.limits.maxFramebufferWidth,
+ vk->props.limits.maxFramebufferHeight);
+ goto error;
+ }
+
+ VK(vk->CreateFramebuffer(vk->dev, &finfo, PL_VK_ALLOC,
+ &tex_vk->framebuffer));
+ PL_VK_NAME(FRAMEBUFFER, tex_vk->framebuffer, debug_tag);
+ }
+
+ ret = true;
+
+error:
+ vk->DestroyRenderPass(vk->dev, dummyPass, PL_VK_ALLOC);
+ return ret;
+}
+
+pl_tex vk_tex_create(pl_gpu gpu, const struct pl_tex_params *params)
+{
+ struct pl_vk *p = PL_PRIV(gpu);
+ struct vk_ctx *vk = p->vk;
+
+ enum pl_handle_type handle_type = params->export_handle |
+ params->import_handle;
+ VkExternalMemoryHandleTypeFlagBitsKHR vk_handle_type = vk_mem_handle_type(handle_type);
+
+ struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_vk);
+ pl_fmt fmt = params->format;
+ tex->params = *params;
+ tex->params.initial_data = NULL;
+ tex->sampler_type = PL_SAMPLER_NORMAL;
+
+ struct pl_tex_vk *tex_vk = PL_PRIV(tex);
+ struct pl_fmt_vk *fmtp = PL_PRIV(fmt);
+ tex_vk->img_fmt = fmtp->vk_fmt->tfmt;
+ tex_vk->num_planes = fmt->num_planes;
+ for (int i = 0; i < tex_vk->num_planes; i++)
+ tex_vk->aspect |= VK_IMAGE_ASPECT_PLANE_0_BIT << i;
+ tex_vk->aspect = PL_DEF(tex_vk->aspect, VK_IMAGE_ASPECT_COLOR_BIT);
+
+ switch (pl_tex_params_dimension(*params)) {
+ case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break;
+ case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break;
+ case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break;
+ }
+
+ if (fmt->emulated) {
+ tex_vk->texel_fmt = pl_find_fmt(gpu, fmt->type, 1, 0,
+ fmt->host_bits[0],
+ PL_FMT_CAP_TEXEL_UNIFORM);
+ if (!tex_vk->texel_fmt) {
+ PL_ERR(gpu, "Failed picking texel format for emulated texture!");
+ goto error;
+ }
+
+ // Our format emulation requires storage image support. In order to
+ // make a bunch of checks happy, just mark it off as storable (and also
+ // enable VK_IMAGE_USAGE_STORAGE_BIT, which we do below)
+ tex->params.storable = true;
+ }
+
+ if (fmtp->blit_emulated) {
+ // Enable what's required for sampling
+ tex->params.sampleable = fmt->caps & PL_FMT_CAP_SAMPLEABLE;
+ tex->params.storable = true;
+ }
+
+ // Blit emulation on planar textures requires storage
+ if ((params->blit_src || params->blit_dst) && tex_vk->num_planes)
+ tex->params.storable = true;
+
+ VkImageUsageFlags usage = 0;
+ VkImageCreateFlags flags = 0;
+ if (tex->params.sampleable)
+ usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
+ if (tex->params.renderable)
+ usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ if (tex->params.storable)
+ usage |= VK_IMAGE_USAGE_STORAGE_BIT;
+ if (tex->params.host_readable || tex->params.blit_src)
+ usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+ if (tex->params.host_writable || tex->params.blit_dst || params->initial_data)
+ usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+
+ if (!usage) {
+ // Vulkan requires images have at least *some* image usage set, but our
+ // API is perfectly happy with a (useless) image. So just put
+ // VK_IMAGE_USAGE_TRANSFER_DST_BIT since this harmless.
+ usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+ }
+
+ if (tex_vk->num_planes) {
+ flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
+ VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
+ }
+
+ // FIXME: Since we can't keep track of queue family ownership properly,
+ // and we don't know in advance what types of queue families this image
+ // will belong to, we're forced to share all of our images between all
+ // command pools.
+ uint32_t qfs[3] = {0};
+ pl_assert(vk->pools.num <= PL_ARRAY_SIZE(qfs));
+ for (int i = 0; i < vk->pools.num; i++)
+ qfs[i] = vk->pools.elem[i]->qf;
+
+ VkImageDrmFormatModifierExplicitCreateInfoEXT drm_explicit = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
+ .drmFormatModifier = params->shared_mem.drm_format_mod,
+ .drmFormatModifierPlaneCount = 1,
+ .pPlaneLayouts = &(VkSubresourceLayout) {
+ .rowPitch = PL_DEF(params->shared_mem.stride_w, params->w),
+ .depthPitch = params->d ? PL_DEF(params->shared_mem.stride_h, params->h) : 0,
+ .offset = params->shared_mem.offset,
+ },
+ };
+
+#ifdef VK_EXT_metal_objects
+ VkImportMetalTextureInfoEXT import_metal_tex = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_METAL_TEXTURE_INFO_EXT,
+ .plane = VK_IMAGE_ASPECT_PLANE_0_BIT << params->shared_mem.plane,
+ };
+
+ VkImportMetalIOSurfaceInfoEXT import_iosurface = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_METAL_IO_SURFACE_INFO_EXT,
+ };
+#endif
+
+ VkImageDrmFormatModifierListCreateInfoEXT drm_list = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT,
+ .drmFormatModifierCount = fmt->num_modifiers,
+ .pDrmFormatModifiers = fmt->modifiers,
+ };
+
+ VkExternalMemoryImageCreateInfoKHR ext_info = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR,
+ .handleTypes = vk_handle_type,
+ };
+
+ VkImageCreateInfo iinfo = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .pNext = vk_handle_type ? &ext_info : NULL,
+ .imageType = tex_vk->type,
+ .format = tex_vk->img_fmt,
+ .extent = (VkExtent3D) {
+ .width = params->w,
+ .height = PL_MAX(1, params->h),
+ .depth = PL_MAX(1, params->d)
+ },
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = usage,
+ .flags = flags,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .sharingMode = vk->pools.num > 1 ? VK_SHARING_MODE_CONCURRENT
+ : VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = vk->pools.num,
+ .pQueueFamilyIndices = qfs,
+ };
+
+ struct vk_malloc_params mparams = {
+ .optimal = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+ .export_handle = params->export_handle,
+ .import_handle = params->import_handle,
+ .shared_mem = params->shared_mem,
+ .debug_tag = params->debug_tag,
+ };
+
+ if (params->import_handle == PL_HANDLE_DMA_BUF) {
+ vk_link_struct(&iinfo, &drm_explicit);
+ iinfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
+ mparams.shared_mem.offset = 0x0; // handled via plane offsets
+ }
+
+#ifdef VK_EXT_metal_objects
+ if (params->import_handle == PL_HANDLE_MTL_TEX) {
+ vk_link_struct(&iinfo, &import_metal_tex);
+ import_metal_tex.mtlTexture = params->shared_mem.handle.handle;
+ }
+
+ if (params->import_handle == PL_HANDLE_IOSURFACE) {
+ vk_link_struct(&iinfo, &import_iosurface);
+ import_iosurface.ioSurface = params->shared_mem.handle.handle;
+ }
+#endif
+
+ if (params->export_handle == PL_HANDLE_DMA_BUF) {
+ pl_assert(drm_list.drmFormatModifierCount > 0);
+ vk_link_struct(&iinfo, &drm_list);
+ iinfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
+ }
+
+ // Double-check physical image format limits and fail if invalid
+ VkPhysicalDeviceImageDrmFormatModifierInfoEXT drm_pinfo = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
+ .sharingMode = iinfo.sharingMode,
+ .queueFamilyIndexCount = iinfo.queueFamilyIndexCount,
+ .pQueueFamilyIndices = iinfo.pQueueFamilyIndices,
+ };
+
+ VkPhysicalDeviceExternalImageFormatInfoKHR ext_pinfo = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO_KHR,
+ .handleType = ext_info.handleTypes,
+ };
+
+ if (handle_type == PL_HANDLE_DMA_BUF) {
+ if (params->import_handle) {
+ // On import, we know exactly which format modifier to test
+ drm_pinfo.drmFormatModifier = drm_explicit.drmFormatModifier;
+ } else {
+ // On export, the choice of format modifier is ambiguous, because
+ // we offer the implementation a whole list to choose from. In
+ // principle, we must check *all* supported drm format modifiers,
+ // but in practice it should hopefully suffice to just check one
+ drm_pinfo.drmFormatModifier = drm_list.pDrmFormatModifiers[0];
+ }
+ vk_link_struct(&ext_pinfo, &drm_pinfo);
+ }
+
+ VkPhysicalDeviceImageFormatInfo2KHR pinfo = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
+ .pNext = vk_handle_type ? &ext_pinfo : NULL,
+ .format = iinfo.format,
+ .type = iinfo.imageType,
+ .tiling = iinfo.tiling,
+ .usage = iinfo.usage,
+ .flags = iinfo.flags,
+ };
+
+ VkExternalImageFormatPropertiesKHR ext_props = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
+ };
+
+ VkImageFormatProperties2KHR props = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR,
+ .pNext = vk_handle_type ? &ext_props : NULL,
+ };
+
+ VkResult res;
+ res = vk->GetPhysicalDeviceImageFormatProperties2KHR(vk->physd, &pinfo, &props);
+ if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) {
+ PL_DEBUG(gpu, "Texture creation failed: not supported");
+ goto error;
+ } else {
+ PL_VK_ASSERT(res, "Querying image format properties");
+ }
+
+ VkExtent3D max = props.imageFormatProperties.maxExtent;
+ if (params->w > max.width || params->h > max.height || params->d > max.depth)
+ {
+ PL_ERR(gpu, "Requested image size %dx%dx%d exceeds the maximum allowed "
+ "dimensions %dx%dx%d for vulkan image format %x",
+ params->w, params->h, params->d, max.width, max.height, max.depth,
+ (unsigned) iinfo.format);
+ goto error;
+ }
+
+ // Ensure the handle type is supported
+ if (vk_handle_type) {
+ bool ok = vk_external_mem_check(vk, &ext_props.externalMemoryProperties,
+ handle_type, params->import_handle);
+ if (!ok) {
+ PL_ERR(gpu, "Requested handle type is not compatible with the "
+ "specified combination of image parameters. Possibly the "
+ "handle type is unsupported altogether?");
+ goto error;
+ }
+ }
+
+ VK(vk->CreateImage(vk->dev, &iinfo, PL_VK_ALLOC, &tex_vk->img));
+ tex_vk->usage_flags = iinfo.usage;
+
+ VkMemoryDedicatedRequirements ded_reqs = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR,
+ };
+
+ VkMemoryRequirements2 reqs = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR,
+ .pNext = &ded_reqs,
+ };
+
+ VkImageMemoryRequirementsInfo2 req_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR,
+ .image = tex_vk->img,
+ };
+
+ vk->GetImageMemoryRequirements2(vk->dev, &req_info, &reqs);
+ mparams.reqs = reqs.memoryRequirements;
+ if (ded_reqs.prefersDedicatedAllocation) {
+ mparams.ded_image = tex_vk->img;
+ if (vk_mem_handle_type(params->import_handle))
+ mparams.shared_mem.size = reqs.memoryRequirements.size;
+ }
+
+ const char *debug_tag = params->debug_tag ? params->debug_tag :
+ params->import_handle ? "imported" : "created";
+
+ if (!params->import_handle || vk_mem_handle_type(params->import_handle)) {
+ struct vk_memslice *mem = &tex_vk->mem;
+ if (!vk_malloc_slice(vk->ma, mem, &mparams))
+ goto error;
+
+ VK(vk->BindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset));
+ }
+
+ static const char * const plane_names[4] = {
+ "plane 0", "plane 1", "plane 2", "plane 3",
+ };
+
+ if (tex_vk->num_planes) {
+ for (int i = 0; i < tex_vk->num_planes; i++) {
+ struct pl_tex_t *plane;
+
+ pl_assert(tex_vk->type == VK_IMAGE_TYPE_2D);
+ plane = (struct pl_tex_t *) pl_vulkan_wrap(gpu, pl_vulkan_wrap_params(
+ .image = tex_vk->img,
+ .aspect = VK_IMAGE_ASPECT_PLANE_0_BIT << i,
+ .width = PL_RSHIFT_UP(tex->params.w, fmt->planes[i].shift_x),
+ .height = PL_RSHIFT_UP(tex->params.h, fmt->planes[i].shift_y),
+ .format = fmtp->vk_fmt->pfmt[i].fmt,
+ .usage = usage,
+ .user_data = params->user_data,
+ .debug_tag = PL_DEF(params->debug_tag, plane_names[i]),
+ ));
+ if (!plane)
+ goto error;
+ plane->parent = tex;
+ tex->planes[i] = plane;
+ tex_vk->planes[i] = PL_PRIV(plane);
+ tex_vk->planes[i]->held = false;
+ tex_vk->planes[i]->layout = tex_vk->layout;
+ }
+
+ // Explicitly mask out all usage flags from planar parent images
+ pl_assert(!fmt->caps);
+ tex->params.sampleable = false;
+ tex->params.renderable = false;
+ tex->params.storable = false;
+ tex->params.blit_src = false;
+ tex->params.blit_dst = false;
+ tex->params.host_writable = false;
+ tex->params.host_readable = false;
+ }
+
+ if (!vk_init_image(gpu, tex, debug_tag))
+ goto error;
+
+ if (params->export_handle)
+ tex->shared_mem = tex_vk->mem.shared_mem;
+
+ if (params->export_handle == PL_HANDLE_DMA_BUF) {
+ if (vk->GetImageDrmFormatModifierPropertiesEXT) {
+
+ // Query the DRM format modifier and plane layout from the driver
+ VkImageDrmFormatModifierPropertiesEXT mod_props = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
+ };
+
+ VK(vk->GetImageDrmFormatModifierPropertiesEXT(vk->dev, tex_vk->img, &mod_props));
+ tex->shared_mem.drm_format_mod = mod_props.drmFormatModifier;
+
+ VkSubresourceLayout layout = {0};
+ VkImageSubresource plane = {
+ .aspectMask = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT,
+ };
+
+ vk->GetImageSubresourceLayout(vk->dev, tex_vk->img, &plane, &layout);
+ if (layout.offset != 0) {
+ PL_ERR(gpu, "Exported DRM plane 0 has nonzero offset %zu, "
+ "this should never happen! Erroring for safety...",
+ (size_t) layout.offset);
+ goto error;
+ }
+ tex->shared_mem.stride_w = layout.rowPitch;
+ tex->shared_mem.stride_h = layout.depthPitch;
+
+ } else {
+
+ // Fallback for no modifiers, just do something stupid.
+ tex->shared_mem.drm_format_mod = DRM_FORMAT_MOD_INVALID;
+ tex->shared_mem.stride_w = params->w;
+ tex->shared_mem.stride_h = params->h;
+
+ }
+ }
+
+ if (params->initial_data) {
+ struct pl_tex_transfer_params ul_params = {
+ .tex = tex,
+ .ptr = (void *) params->initial_data,
+ .rc = { 0, 0, 0, params->w, params->h, params->d },
+ };
+
+ // Since we re-use GPU helpers which require writable images, just fake it
+ bool writable = tex->params.host_writable;
+ tex->params.host_writable = true;
+ if (!pl_tex_upload(gpu, &ul_params))
+ goto error;
+ tex->params.host_writable = writable;
+ }
+
+ return tex;
+
+error:
+ vk_tex_destroy(gpu, tex);
+ return NULL;
+}
+
+void vk_tex_invalidate(pl_gpu gpu, pl_tex tex)
+{
+ struct pl_tex_vk *tex_vk = PL_PRIV(tex);
+ tex_vk->may_invalidate = true;
+ for (int i = 0; i < tex_vk->num_planes; i++)
+ tex_vk->planes[i]->may_invalidate = true;
+}
+
+static bool tex_clear_fallback(pl_gpu gpu, pl_tex tex,
+ const union pl_clear_color color)
+{
+ pl_tex pixel = pl_tex_create(gpu, pl_tex_params(
+ .w = 1,
+ .h = 1,
+ .format = tex->params.format,
+ .storable = true,
+ .blit_src = true,
+ .blit_dst = true,
+ ));
+ if (!pixel)
+ return false;
+
+ pl_tex_clear_ex(gpu, pixel, color);
+
+ pl_assert(tex->params.storable);
+ pl_tex_blit(gpu, pl_tex_blit_params(
+ .src = pixel,
+ .dst = tex,
+ .sample_mode = PL_TEX_SAMPLE_NEAREST,
+ ));
+
+ pl_tex_destroy(gpu, &pixel);
+ return true;
+}
+
+void vk_tex_clear_ex(pl_gpu gpu, pl_tex tex, const union pl_clear_color color)
+{
+ struct pl_vk *p = PL_PRIV(gpu);
+ struct vk_ctx *vk = p->vk;
+ struct pl_tex_vk *tex_vk = PL_PRIV(tex);
+
+ if (tex_vk->aspect != VK_IMAGE_ASPECT_COLOR_BIT) {
+ if (!tex_clear_fallback(gpu, tex, color)) {
+ PL_ERR(gpu, "Failed clearing imported planar image: color aspect "
+ "clears disallowed by spec and no shader fallback "
+ "available");
+ }
+ return;
+ }
+
+ struct vk_cmd *cmd = CMD_BEGIN(GRAPHICS);
+ if (!cmd)
+ return;
+
+ vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_CLEAR_BIT,
+ VK_ACCESS_2_TRANSFER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ pl_static_assert(sizeof(VkClearColorValue) == sizeof(union pl_clear_color));
+ const VkClearColorValue *clearColor = (const VkClearColorValue *) &color;
+
+ pl_assert(tex_vk->aspect == VK_IMAGE_ASPECT_COLOR_BIT);
+ static const VkImageSubresourceRange range = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .levelCount = 1,
+ .layerCount = 1,
+ };
+
+ vk->CmdClearColorImage(cmd->buf, tex_vk->img, tex_vk->layout,
+ clearColor, 1, &range);
+
+ CMD_FINISH(&cmd);
+}
+
+void vk_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params)
+{
+ struct pl_vk *p = PL_PRIV(gpu);
+ struct vk_ctx *vk = p->vk;
+ struct pl_tex_vk *src_vk = PL_PRIV(params->src);
+ struct pl_tex_vk *dst_vk = PL_PRIV(params->dst);
+ struct pl_fmt_vk *src_fmtp = PL_PRIV(params->src->params.format);
+ struct pl_fmt_vk *dst_fmtp = PL_PRIV(params->dst->params.format);
+ bool blit_emulated = src_fmtp->blit_emulated || dst_fmtp->blit_emulated;
+ bool planar_fallback = src_vk->aspect != VK_IMAGE_ASPECT_COLOR_BIT ||
+ dst_vk->aspect != VK_IMAGE_ASPECT_COLOR_BIT;
+
+ pl_rect3d src_rc = params->src_rc, dst_rc = params->dst_rc;
+ bool requires_scaling = !pl_rect3d_eq(src_rc, dst_rc);
+ if ((requires_scaling && blit_emulated) || planar_fallback) {
+ if (!pl_tex_blit_compute(gpu, params))
+ PL_ERR(gpu, "Failed emulating texture blit, incompatible textures?");
+ return;
+ }
+
+ struct vk_cmd *cmd = CMD_BEGIN(GRAPHICS);
+ if (!cmd)
+ return;
+
+ // When the blit operation doesn't require scaling, we can use the more
+ // efficient vkCmdCopyImage instead of vkCmdBlitImage
+ if (!requires_scaling) {
+ vk_tex_barrier(gpu, cmd, params->src, VK_PIPELINE_STAGE_2_COPY_BIT,
+ VK_ACCESS_2_TRANSFER_READ_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk_tex_barrier(gpu, cmd, params->dst, VK_PIPELINE_STAGE_2_COPY_BIT,
+ VK_ACCESS_2_TRANSFER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ pl_rect3d_normalize(&src_rc);
+
+ VkImageCopy region = {
+ .srcSubresource = {
+ .aspectMask = src_vk->aspect,
+ .layerCount = 1,
+ },
+ .dstSubresource = {
+ .aspectMask = dst_vk->aspect,
+ .layerCount = 1,
+ },
+ .srcOffset = {src_rc.x0, src_rc.y0, src_rc.z0},
+ .dstOffset = {src_rc.x0, src_rc.y0, src_rc.z0},
+ .extent = {
+ pl_rect_w(src_rc),
+ pl_rect_h(src_rc),
+ pl_rect_d(src_rc),
+ },
+ };
+
+ vk->CmdCopyImage(cmd->buf, src_vk->img, src_vk->layout,
+ dst_vk->img, dst_vk->layout, 1, &region);
+ } else {
+ vk_tex_barrier(gpu, cmd, params->src, VK_PIPELINE_STAGE_2_BLIT_BIT,
+ VK_ACCESS_2_TRANSFER_READ_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk_tex_barrier(gpu, cmd, params->dst, VK_PIPELINE_STAGE_2_BLIT_BIT,
+ VK_ACCESS_2_TRANSFER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ VkImageBlit region = {
+ .srcSubresource = {
+ .aspectMask = src_vk->aspect,
+ .layerCount = 1,
+ },
+ .dstSubresource = {
+ .aspectMask = dst_vk->aspect,
+ .layerCount = 1,
+ },
+ .srcOffsets = {{src_rc.x0, src_rc.y0, src_rc.z0},
+ {src_rc.x1, src_rc.y1, src_rc.z1}},
+ .dstOffsets = {{dst_rc.x0, dst_rc.y0, dst_rc.z0},
+ {dst_rc.x1, dst_rc.y1, dst_rc.z1}},
+ };
+
+ static const VkFilter filters[PL_TEX_SAMPLE_MODE_COUNT] = {
+ [PL_TEX_SAMPLE_NEAREST] = VK_FILTER_NEAREST,
+ [PL_TEX_SAMPLE_LINEAR] = VK_FILTER_LINEAR,
+ };
+
+ vk->CmdBlitImage(cmd->buf, src_vk->img, src_vk->layout,
+ dst_vk->img, dst_vk->layout, 1, &region,
+ filters[params->sample_mode]);
+ }
+
+ CMD_FINISH(&cmd);
+}
+
+// Determine the best queue type to perform a buffer<->image copy on
+static enum queue_type vk_img_copy_queue(pl_gpu gpu, pl_tex tex,
+ const struct VkBufferImageCopy *region)
+{
+ struct pl_vk *p = PL_PRIV(gpu);
+ struct vk_ctx *vk = p->vk;
+
+ const struct pl_tex_vk *tex_vk = PL_PRIV(tex);
+ enum queue_type queue = tex_vk->transfer_queue;
+ if (queue != TRANSFER)
+ return queue;
+
+ VkExtent3D alignment = vk->pool_transfer->props.minImageTransferGranularity;
+
+ enum queue_type fallback = GRAPHICS;
+ if (gpu->limits.compute_queues > gpu->limits.fragment_queues)
+ fallback = COMPUTE; // prefer async compute queue
+
+ int tex_w = PL_DEF(tex->params.w, 1),
+ tex_h = PL_DEF(tex->params.h, 1),
+ tex_d = PL_DEF(tex->params.d, 1);
+
+ bool full_w = region->imageOffset.x + region->imageExtent.width == tex_w,
+ full_h = region->imageOffset.y + region->imageExtent.height == tex_h,
+ full_d = region->imageOffset.z + region->imageExtent.depth == tex_d;
+
+ if (alignment.width) {
+
+ bool unaligned = false;
+ unaligned |= region->imageOffset.x % alignment.width;
+ unaligned |= region->imageOffset.y % alignment.height;
+ unaligned |= region->imageOffset.z % alignment.depth;
+ unaligned |= (region->imageExtent.width % alignment.width) && !full_w;
+ unaligned |= (region->imageExtent.height % alignment.height) && !full_h;
+ unaligned |= (region->imageExtent.depth % alignment.depth) && !full_d;
+
+ return unaligned ? fallback : queue;
+
+ } else {
+
+ // an alignment of {0} means the copy must span the entire image
+ bool unaligned = false;
+ unaligned |= region->imageOffset.x || !full_w;
+ unaligned |= region->imageOffset.y || !full_h;
+ unaligned |= region->imageOffset.z || !full_d;
+
+ return unaligned ? fallback : queue;
+
+ }
+}
+
+static void tex_xfer_cb(void *ctx, void *arg)
+{
+ void (*fun)(void *priv) = ctx;
+ fun(arg);
+}
+
+bool vk_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params)
+{
+ struct pl_vk *p = PL_PRIV(gpu);
+ struct vk_ctx *vk = p->vk;
+ pl_tex tex = params->tex;
+ pl_fmt fmt = tex->params.format;
+ struct pl_tex_vk *tex_vk = PL_PRIV(tex);
+ struct pl_tex_transfer_params *slices = NULL;
+ int num_slices = 0;
+
+ if (!params->buf)
+ return pl_tex_upload_pbo(gpu, params);
+
+ pl_buf buf = params->buf;
+ struct pl_buf_vk *buf_vk = PL_PRIV(buf);
+ pl_rect3d rc = params->rc;
+ const size_t size = pl_tex_transfer_size(params);
+ const size_t buf_offset = buf_vk->mem.offset + params->buf_offset;
+ bool unaligned = buf_offset % fmt->texel_size;
+ if (unaligned)
+ PL_TRACE(gpu, "vk_tex_upload: unaligned transfer (slow path)");
+
+ if (fmt->emulated || unaligned) {
+
+ // Create all slice buffers first, to early-fail if OOM, and to avoid
+ // blocking unnecessarily on waiting for these buffers to get read from
+ num_slices = pl_tex_transfer_slices(gpu, tex_vk->texel_fmt, params, &slices);
+ for (int i = 0; i < num_slices; i++) {
+ slices[i].buf = pl_buf_create(gpu, pl_buf_params(
+ .memory_type = PL_BUF_MEM_DEVICE,
+ .format = tex_vk->texel_fmt,
+ .size = pl_tex_transfer_size(&slices[i]),
+ .storable = fmt->emulated,
+ ));
+
+ if (!slices[i].buf) {
+ PL_ERR(gpu, "Failed creating buffer for tex upload fallback!");
+ num_slices = i; // only clean up buffers up to here
+ goto error;
+ }
+ }
+
+ // All temporary buffers successfully created, begin copying source data
+ struct vk_cmd *cmd = CMD_BEGIN_TIMED(tex_vk->transfer_queue,
+ params->timer);
+ if (!cmd)
+ goto error;
+
+ vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT,
+ VK_ACCESS_2_TRANSFER_READ_BIT, params->buf_offset, size,
+ false);
+
+ for (int i = 0; i < num_slices; i++) {
+ pl_buf slice = slices[i].buf;
+ struct pl_buf_vk *slice_vk = PL_PRIV(slice);
+ vk_buf_barrier(gpu, cmd, slice, VK_PIPELINE_STAGE_2_COPY_BIT,
+ VK_ACCESS_2_TRANSFER_WRITE_BIT, 0, slice->params.size,
+ false);
+
+ vk->CmdCopyBuffer(cmd->buf, buf_vk->mem.buf, slice_vk->mem.buf, 1, &(VkBufferCopy) {
+ .srcOffset = buf_vk->mem.offset + slices[i].buf_offset,
+ .dstOffset = slice_vk->mem.offset,
+ .size = slice->params.size,
+ });
+ }
+
+ if (params->callback)
+ vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv);
+
+ bool ok = CMD_FINISH(&cmd);
+
+ // Finally, dispatch the (texel) upload asynchronously. We can fire
+ // the callback already at the completion of previous command because
+ // these temporary buffers already hold persistent copies of the data
+ for (int i = 0; i < num_slices; i++) {
+ if (ok) {
+ slices[i].buf_offset = 0;
+ ok = fmt->emulated ? pl_tex_upload_texel(gpu, &slices[i])
+ : pl_tex_upload(gpu, &slices[i]);
+ }
+ pl_buf_destroy(gpu, &slices[i].buf);
+ }
+
+ pl_free(slices);
+ return ok;
+
+ } else {
+
+ pl_assert(fmt->texel_align == fmt->texel_size);
+ const VkBufferImageCopy region = {
+ .bufferOffset = buf_offset,
+ .bufferRowLength = params->row_pitch / fmt->texel_size,
+ .bufferImageHeight = params->depth_pitch / params->row_pitch,
+ .imageOffset = { rc.x0, rc.y0, rc.z0 },
+ .imageExtent = { rc.x1, rc.y1, rc.z1 },
+ .imageSubresource = {
+ .aspectMask = tex_vk->aspect,
+ .layerCount = 1,
+ },
+ };
+
+ enum queue_type queue = vk_img_copy_queue(gpu, tex, &region);
+ struct vk_cmd *cmd = CMD_BEGIN_TIMED(queue, params->timer);
+ if (!cmd)
+ goto error;
+
+ vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT,
+ VK_ACCESS_2_TRANSFER_READ_BIT, params->buf_offset, size,
+ false);
+ vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_COPY_BIT,
+ VK_ACCESS_2_TRANSFER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ vk->CmdCopyBufferToImage(cmd->buf, buf_vk->mem.buf, tex_vk->img,
+ tex_vk->layout, 1, &region);
+
+ if (params->callback)
+ vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv);
+
+ return CMD_FINISH(&cmd);
+ }
+
+ pl_unreachable();
+
+error:
+ for (int i = 0; i < num_slices; i++)
+ pl_buf_destroy(gpu, &slices[i].buf);
+ pl_free(slices);
+ return false;
+}
+
+bool vk_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params)
+{
+ struct pl_vk *p = PL_PRIV(gpu);
+ struct vk_ctx *vk = p->vk;
+ pl_tex tex = params->tex;
+ pl_fmt fmt = tex->params.format;
+ struct pl_tex_vk *tex_vk = PL_PRIV(tex);
+ struct pl_tex_transfer_params *slices = NULL;
+ int num_slices = 0;
+
+ if (!params->buf)
+ return pl_tex_download_pbo(gpu, params);
+
+ pl_buf buf = params->buf;
+ struct pl_buf_vk *buf_vk = PL_PRIV(buf);
+ pl_rect3d rc = params->rc;
+ const size_t size = pl_tex_transfer_size(params);
+ const size_t buf_offset = buf_vk->mem.offset + params->buf_offset;
+ bool unaligned = buf_offset % fmt->texel_size;
+ if (unaligned)
+ PL_TRACE(gpu, "vk_tex_download: unaligned transfer (slow path)");
+
+ if (fmt->emulated || unaligned) {
+
+ num_slices = pl_tex_transfer_slices(gpu, tex_vk->texel_fmt, params, &slices);
+ for (int i = 0; i < num_slices; i++) {
+ slices[i].buf = pl_buf_create(gpu, pl_buf_params(
+ .memory_type = PL_BUF_MEM_DEVICE,
+ .format = tex_vk->texel_fmt,
+ .size = pl_tex_transfer_size(&slices[i]),
+ .storable = fmt->emulated,
+ ));
+
+ if (!slices[i].buf) {
+ PL_ERR(gpu, "Failed creating buffer for tex download fallback!");
+ num_slices = i;
+ goto error;
+ }
+ }
+
+ for (int i = 0; i < num_slices; i++) {
+ // Restore buffer offset after downloading into temporary buffer,
+ // because we still need to copy the data from the temporary buffer
+ // into this offset in the original buffer
+ const size_t tmp_offset = slices[i].buf_offset;
+ slices[i].buf_offset = 0;
+ bool ok = fmt->emulated ? pl_tex_download_texel(gpu, &slices[i])
+ : pl_tex_download(gpu, &slices[i]);
+ slices[i].buf_offset = tmp_offset;
+ if (!ok)
+ goto error;
+ }
+
+ // Finally, download into the user buffer
+ struct vk_cmd *cmd = CMD_BEGIN_TIMED(tex_vk->transfer_queue, params->timer);
+ if (!cmd)
+ goto error;
+
+ vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT,
+ VK_ACCESS_2_TRANSFER_WRITE_BIT, params->buf_offset, size,
+ false);
+
+ for (int i = 0; i < num_slices; i++) {
+ pl_buf slice = slices[i].buf;
+ struct pl_buf_vk *slice_vk = PL_PRIV(slice);
+ vk_buf_barrier(gpu, cmd, slice, VK_PIPELINE_STAGE_2_COPY_BIT,
+ VK_ACCESS_2_TRANSFER_READ_BIT, 0, slice->params.size,
+ false);
+
+ vk->CmdCopyBuffer(cmd->buf, slice_vk->mem.buf, buf_vk->mem.buf, 1, &(VkBufferCopy) {
+ .srcOffset = slice_vk->mem.offset,
+ .dstOffset = buf_vk->mem.offset + slices[i].buf_offset,
+ .size = slice->params.size,
+ });
+
+ pl_buf_destroy(gpu, &slices[i].buf);
+ }
+
+ vk_buf_flush(gpu, cmd, buf, params->buf_offset, size);
+
+ if (params->callback)
+ vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv);
+
+ pl_free(slices);
+ return CMD_FINISH(&cmd);
+
+ } else {
+
+ pl_assert(params->row_pitch % fmt->texel_size == 0);
+ pl_assert(params->depth_pitch % params->row_pitch == 0);
+ const VkBufferImageCopy region = {
+ .bufferOffset = buf_offset,
+ .bufferRowLength = params->row_pitch / fmt->texel_size,
+ .bufferImageHeight = params->depth_pitch / params->row_pitch,
+ .imageOffset = { rc.x0, rc.y0, rc.z0 },
+ .imageExtent = { rc.x1, rc.y1, rc.z1 },
+ .imageSubresource = {
+ .aspectMask = tex_vk->aspect,
+ .layerCount = 1,
+ },
+ };
+
+ enum queue_type queue = vk_img_copy_queue(gpu, tex, &region);
+
+ struct vk_cmd *cmd = CMD_BEGIN_TIMED(queue, params->timer);
+ if (!cmd)
+ goto error;
+
+ vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT,
+ VK_ACCESS_2_TRANSFER_WRITE_BIT, params->buf_offset, size,
+ false);
+ vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_COPY_BIT,
+ VK_ACCESS_2_TRANSFER_READ_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ vk->CmdCopyImageToBuffer(cmd->buf, tex_vk->img, tex_vk->layout,
+ buf_vk->mem.buf, 1, &region);
+ vk_buf_flush(gpu, cmd, buf, params->buf_offset, size);
+
+ if (params->callback)
+ vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv);
+
+ return CMD_FINISH(&cmd);
+ }
+
+ pl_unreachable();
+
+error:
+ for (int i = 0; i < num_slices; i++)
+ pl_buf_destroy(gpu, &slices[i].buf);
+ pl_free(slices);
+ return false;
+}
+
+bool vk_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t timeout)
+{
+ struct pl_vk *p = PL_PRIV(gpu);
+ struct vk_ctx *vk = p->vk;
+ struct pl_tex_vk *tex_vk = PL_PRIV(tex);
+
+ // Opportunistically check if we can re-use this texture without flush
+ vk_poll_commands(vk, 0);
+ if (pl_rc_count(&tex_vk->rc) == 1)
+ goto skip_blocking;
+
+ // Otherwise, we're force to submit any queued command so that the user is
+ // guaranteed to see progress eventually, even if they call this in a loop
+ CMD_SUBMIT(NULL);
+ vk_poll_commands(vk, timeout);
+ if (pl_rc_count(&tex_vk->rc) > 1)
+ return true;
+
+ // fall through
+skip_blocking:
+ for (int i = 0; i < tex_vk->num_planes; i++) {
+ if (vk_tex_poll(gpu, tex->planes[i], timeout))
+ return true;
+ }
+
+ return false;
+}
+
+bool vk_tex_export(pl_gpu gpu, pl_tex tex, pl_sync sync)
+{
+ struct pl_tex_vk *tex_vk = PL_PRIV(tex);
+ struct pl_sync_vk *sync_vk = PL_PRIV(sync);
+
+ if (tex_vk->num_planes) {
+ PL_ERR(gpu, "`pl_tex_export` cannot be called on planar textures."
+ "Please see `pl_vulkan_hold_ex` for a replacement.");
+ return false;
+ }
+
+ struct vk_cmd *cmd = CMD_BEGIN(ANY);
+ if (!cmd)
+ goto error;
+
+ vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_NONE,
+ 0, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_EXTERNAL);
+
+ // Make the next barrier appear as though coming from a different queue
+ tex_vk->sem.write.queue = tex_vk->sem.read.queue = NULL;
+
+ vk_cmd_sig(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, (pl_vulkan_sem){ sync_vk->wait });
+ if (!CMD_SUBMIT(&cmd))
+ goto error;
+
+ // Remember the other dependency and hold on to the sync object
+ PL_ARRAY_APPEND(tex, tex_vk->ext_deps, (pl_vulkan_sem){ sync_vk->signal });
+ pl_rc_ref(&sync_vk->rc);
+ tex_vk->ext_sync = sync;
+ tex_vk->qf = VK_QUEUE_FAMILY_EXTERNAL;
+ return true;
+
+error:
+ PL_ERR(gpu, "Failed exporting shared texture!");
+ return false;
+}
+
+pl_tex pl_vulkan_wrap(pl_gpu gpu, const struct pl_vulkan_wrap_params *params)
+{
+ pl_fmt fmt = NULL;
+ for (int i = 0; i < gpu->num_formats; i++) {
+ const struct vk_format **vkfmt = PL_PRIV(gpu->formats[i]);
+ if ((*vkfmt)->tfmt == params->format) {
+ fmt = gpu->formats[i];
+ break;
+ }
+ }
+
+ if (!fmt) {
+ PL_ERR(gpu, "Could not find pl_fmt suitable for wrapped image "
+ "with format %s", vk_fmt_name(params->format));
+ return NULL;
+ }
+
+ VkImageUsageFlags usage = params->usage;
+ if (fmt->num_planes)
+ usage = 0; // mask capabilities from the base texture
+
+ struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_vk);
+ tex->params = (struct pl_tex_params) {
+ .format = fmt,
+ .w = params->width,
+ .h = params->height,
+ .d = params->depth,
+ .sampleable = !!(usage & VK_IMAGE_USAGE_SAMPLED_BIT),
+ .renderable = !!(usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT),
+ .storable = !!(usage & VK_IMAGE_USAGE_STORAGE_BIT),
+ .blit_src = !!(usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT),
+ .blit_dst = !!(usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT),
+ .host_writable = !!(usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT),
+ .host_readable = !!(usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT),
+ .user_data = params->user_data,
+ .debug_tag = params->debug_tag,
+ };
+
+ // Mask out capabilities not permitted by the `pl_fmt`
+#define MASK(field, cap) \
+ do { \
+ if (tex->params.field && !(fmt->caps & cap)) { \
+ PL_WARN(gpu, "Masking `" #field "` from wrapped texture because " \
+ "the corresponding format '%s' does not support " #cap, \
+ fmt->name); \
+ tex->params.field = false; \
+ } \
+ } while (0)
+
+ MASK(sampleable, PL_FMT_CAP_SAMPLEABLE);
+ MASK(renderable, PL_FMT_CAP_RENDERABLE);
+ MASK(storable, PL_FMT_CAP_STORABLE);
+ MASK(blit_src, PL_FMT_CAP_BLITTABLE);
+ MASK(blit_dst, PL_FMT_CAP_BLITTABLE);
+ MASK(host_readable, PL_FMT_CAP_HOST_READABLE);
+#undef MASK
+
+ // For simplicity, explicitly mask out blit emulation for wrapped textures
+ struct pl_fmt_vk *fmtp = PL_PRIV(fmt);
+ if (fmtp->blit_emulated) {
+ tex->params.blit_src = false;
+ tex->params.blit_dst = false;
+ }
+
+ struct pl_tex_vk *tex_vk = PL_PRIV(tex);
+ switch (pl_tex_params_dimension(tex->params)) {
+ case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break;
+ case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break;
+ case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break;
+ }
+ tex_vk->external_img = true;
+ tex_vk->held = !fmt->num_planes;
+ tex_vk->img = params->image;
+ tex_vk->img_fmt = params->format;
+ tex_vk->num_planes = fmt->num_planes;
+ tex_vk->usage_flags = usage;
+ tex_vk->aspect = params->aspect;
+
+ if (!tex_vk->aspect) {
+ for (int i = 0; i < tex_vk->num_planes; i++)
+ tex_vk->aspect |= VK_IMAGE_ASPECT_PLANE_0_BIT << i;
+ tex_vk->aspect = PL_DEF(tex_vk->aspect, VK_IMAGE_ASPECT_COLOR_BIT);
+ }
+
+ // Blitting to planar images requires fallback via compute shaders
+ if (tex_vk->aspect != VK_IMAGE_ASPECT_COLOR_BIT) {
+ tex->params.blit_src &= tex->params.storable;
+ tex->params.blit_dst &= tex->params.storable;
+ }
+
+ static const char * const wrapped_plane_names[4] = {
+ "wrapped plane 0", "wrapped plane 1", "wrapped plane 2", "wrapped plane 3",
+ };
+
+ for (int i = 0; i < tex_vk->num_planes; i++) {
+ struct pl_tex_t *plane;
+ VkImageAspectFlags aspect = VK_IMAGE_ASPECT_PLANE_0_BIT << i;
+ if (!(aspect & tex_vk->aspect)) {
+ PL_INFO(gpu, "Not wrapping plane %d due to aspect bit 0x%x not "
+ "being contained in supplied params->aspect 0x%x!",
+ i, (unsigned) aspect, (unsigned) tex_vk->aspect);
+ continue;
+ }
+
+ pl_assert(tex_vk->type == VK_IMAGE_TYPE_2D);
+ plane = (struct pl_tex_t *) pl_vulkan_wrap(gpu, pl_vulkan_wrap_params(
+ .image = tex_vk->img,
+ .aspect = aspect,
+ .width = PL_RSHIFT_UP(tex->params.w, fmt->planes[i].shift_x),
+ .height = PL_RSHIFT_UP(tex->params.h, fmt->planes[i].shift_y),
+ .format = fmtp->vk_fmt->pfmt[i].fmt,
+ .usage = params->usage,
+ .user_data = params->user_data,
+ .debug_tag = PL_DEF(params->debug_tag, wrapped_plane_names[i]),
+ ));
+ if (!plane)
+ goto error;
+ plane->parent = tex;
+ tex->planes[i] = plane;
+ tex_vk->planes[i] = PL_PRIV(plane);
+ }
+
+ if (!vk_init_image(gpu, tex, PL_DEF(params->debug_tag, "wrapped")))
+ goto error;
+
+ return tex;
+
+error:
+ vk_tex_destroy(gpu, tex);
+ return NULL;
+}
+
+VkImage pl_vulkan_unwrap(pl_gpu gpu, pl_tex tex, VkFormat *out_format,
+ VkImageUsageFlags *out_flags)
+{
+ struct pl_tex_vk *tex_vk = PL_PRIV(tex);
+
+ if (out_format)
+ *out_format = tex_vk->img_fmt;
+ if (out_flags)
+ *out_flags = tex_vk->usage_flags;
+
+ return tex_vk->img;
+}
+
+bool pl_vulkan_hold_ex(pl_gpu gpu, const struct pl_vulkan_hold_params *params)
+{
+ struct pl_tex_vk *tex_vk = PL_PRIV(params->tex);
+ pl_assert(params->semaphore.sem);
+
+ bool held = tex_vk->held;
+ for (int i = 0; i < tex_vk->num_planes; i++)
+ held |= tex_vk->planes[i]->held;
+
+ if (held) {
+ PL_ERR(gpu, "Attempting to hold an already held image!");
+ return false;
+ }
+
+ struct vk_cmd *cmd = CMD_BEGIN(GRAPHICS);
+ if (!cmd) {
+ PL_ERR(gpu, "Failed holding external image!");
+ return false;
+ }
+
+ VkImageLayout layout = params->layout;
+ if (params->out_layout) {
+ // For planar images, arbitrarily pick the current image layout of the
+ // first plane. This should be fine in practice, since all planes will
+ // share the same usage capabilities.
+ if (tex_vk->num_planes) {
+ layout = tex_vk->planes[0]->layout;
+ } else {
+ layout = tex_vk->layout;
+ }
+ }
+
+ bool may_invalidate = true;
+ if (!tex_vk->num_planes) {
+ may_invalidate &= tex_vk->may_invalidate;
+ vk_tex_barrier(gpu, cmd, params->tex, VK_PIPELINE_STAGE_2_NONE,
+ 0, layout, params->qf);
+ }
+
+ for (int i = 0; i < tex_vk->num_planes; i++) {
+ may_invalidate &= tex_vk->planes[i]->may_invalidate;
+ vk_tex_barrier(gpu, cmd, params->tex->planes[i],
+ VK_PIPELINE_STAGE_2_NONE, 0, layout, params->qf);
+ }
+
+ vk_cmd_sig(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, params->semaphore);
+ bool ok = CMD_SUBMIT(&cmd);
+
+ if (!tex_vk->num_planes) {
+ tex_vk->sem.write.queue = tex_vk->sem.read.queue = NULL;
+ tex_vk->held = ok;
+ }
+
+ for (int i = 0; i < tex_vk->num_planes; i++) {
+ struct pl_tex_vk *plane_vk = tex_vk->planes[i];
+ plane_vk->sem.write.queue = plane_vk->sem.read.queue = NULL;
+ plane_vk->held = ok;
+ }
+
+ if (ok && params->out_layout)
+ *params->out_layout = may_invalidate ? VK_IMAGE_LAYOUT_UNDEFINED : layout;
+
+ return ok;
+}
+
+void pl_vulkan_release_ex(pl_gpu gpu, const struct pl_vulkan_release_params *params)
+{
+ struct pl_tex_vk *tex_vk = PL_PRIV(params->tex);
+ if (tex_vk->num_planes) {
+ struct pl_vulkan_release_params plane_pars = *params;
+ for (int i = 0; i < tex_vk->num_planes; i++) {
+ plane_pars.tex = params->tex->planes[i];
+ pl_vulkan_release_ex(gpu, &plane_pars);
+ }
+ return;
+ }
+
+ if (!tex_vk->held) {
+ PL_ERR(gpu, "Attempting to release an unheld image?");
+ return;
+ }
+
+ if (params->semaphore.sem)
+ PL_ARRAY_APPEND(params->tex, tex_vk->ext_deps, params->semaphore);
+
+ tex_vk->qf = params->qf;
+ tex_vk->layout = params->layout;
+ tex_vk->held = false;
+}
+
+bool pl_vulkan_hold(pl_gpu gpu, pl_tex tex, VkImageLayout layout,
+ pl_vulkan_sem sem_out)
+{
+ return pl_vulkan_hold_ex(gpu, pl_vulkan_hold_params(
+ .tex = tex,
+ .layout = layout,
+ .semaphore = sem_out,
+ .qf = VK_QUEUE_FAMILY_IGNORED,
+ ));
+}
+
+bool pl_vulkan_hold_raw(pl_gpu gpu, pl_tex tex,
+ VkImageLayout *out_layout,
+ pl_vulkan_sem sem_out)
+{
+ return pl_vulkan_hold_ex(gpu, pl_vulkan_hold_params(
+ .tex = tex,
+ .out_layout = out_layout,
+ .semaphore = sem_out,
+ .qf = VK_QUEUE_FAMILY_IGNORED,
+ ));
+}
+
+void pl_vulkan_release(pl_gpu gpu, pl_tex tex, VkImageLayout layout,
+ pl_vulkan_sem sem_in)
+{
+ pl_vulkan_release_ex(gpu, pl_vulkan_release_params(
+ .tex = tex,
+ .layout = layout,
+ .semaphore = sem_in,
+ .qf = VK_QUEUE_FAMILY_IGNORED,
+ ));
+}