diff options
Diffstat (limited to '')
-rw-r--r-- | src/vulkan/swapchain.c | 911 |
1 files changed, 911 insertions, 0 deletions
diff --git a/src/vulkan/swapchain.c b/src/vulkan/swapchain.c new file mode 100644 index 0000000..0741fbf --- /dev/null +++ b/src/vulkan/swapchain.c @@ -0,0 +1,911 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "common.h" +#include "command.h" +#include "formats.h" +#include "utils.h" +#include "gpu.h" +#include "swapchain.h" +#include "pl_thread.h" + +struct sem_pair { + VkSemaphore in; + VkSemaphore out; +}; + +struct priv { + struct pl_sw_fns impl; + + pl_mutex lock; + struct vk_ctx *vk; + VkSurfaceKHR surf; + PL_ARRAY(VkSurfaceFormatKHR) formats; + + // current swapchain and metadata: + struct pl_vulkan_swapchain_params params; + VkSwapchainCreateInfoKHR protoInfo; // partially filled-in prototype + VkSwapchainKHR swapchain; + int cur_width, cur_height; + int swapchain_depth; + pl_rc_t frames_in_flight; // number of frames currently queued + bool suboptimal; // true once VK_SUBOPTIMAL_KHR is returned + bool needs_recreate; // swapchain needs to be recreated + struct pl_color_repr color_repr; + struct pl_color_space color_space; + struct pl_hdr_metadata hdr_metadata; + + // state of the images: + PL_ARRAY(pl_tex) images; // pl_tex wrappers for the VkImages + PL_ARRAY(struct sem_pair) sems; // pool of semaphores used to synchronize images + int idx_sems; // index of next free semaphore pair + int last_imgidx; // the image index last acquired (for submit) +}; + +static const struct pl_sw_fns vulkan_swapchain; + +static bool map_color_space(VkColorSpaceKHR space, struct pl_color_space *out) +{ + switch (space) { + // Note: This is technically against the spec, but more often than not + // it's the correct result since `SRGB_NONLINEAR` is just a catch-all + // for any sort of typical SDR curve, which is better approximated by + // `pl_color_space_monitor`. + case VK_COLOR_SPACE_SRGB_NONLINEAR_KHR: + *out = pl_color_space_monitor; + return true; + + case VK_COLOR_SPACE_BT709_NONLINEAR_EXT: + *out = pl_color_space_monitor; + return true; + case VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT: + *out = (struct pl_color_space) { + .primaries = PL_COLOR_PRIM_DISPLAY_P3, + .transfer = PL_COLOR_TRC_BT_1886, + }; + return true; + case VK_COLOR_SPACE_DCI_P3_LINEAR_EXT: + *out = (struct pl_color_space) { + .primaries = PL_COLOR_PRIM_DCI_P3, + .transfer = PL_COLOR_TRC_LINEAR, + }; + return true; + case VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT: + *out = (struct pl_color_space) { + .primaries = PL_COLOR_PRIM_DCI_P3, + .transfer = PL_COLOR_TRC_BT_1886, + }; + return true; + case VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT: + case VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT: + // TODO + return false; + case VK_COLOR_SPACE_BT709_LINEAR_EXT: + *out = (struct pl_color_space) { + .primaries = PL_COLOR_PRIM_DCI_P3, + .transfer = PL_COLOR_TRC_LINEAR, + }; + return true; + case VK_COLOR_SPACE_BT2020_LINEAR_EXT: + *out = (struct pl_color_space) { + .primaries = PL_COLOR_PRIM_BT_2020, + .transfer = PL_COLOR_TRC_LINEAR, + }; + return true; + case VK_COLOR_SPACE_HDR10_ST2084_EXT: + *out = (struct pl_color_space) { + .primaries = PL_COLOR_PRIM_BT_2020, + .transfer = PL_COLOR_TRC_PQ, + }; + return true; + case VK_COLOR_SPACE_DOLBYVISION_EXT: + // Unlikely to ever be implemented + return false; + case VK_COLOR_SPACE_HDR10_HLG_EXT: + *out = (struct pl_color_space) { + .primaries = PL_COLOR_PRIM_BT_2020, + .transfer = PL_COLOR_TRC_HLG, + }; + return true; + case VK_COLOR_SPACE_ADOBERGB_LINEAR_EXT: + *out = (struct pl_color_space) { + .primaries = PL_COLOR_PRIM_ADOBE, + .transfer = PL_COLOR_TRC_LINEAR, + }; + return true; + case VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT: + *out = (struct pl_color_space) { + .primaries = PL_COLOR_PRIM_ADOBE, + .transfer = PL_COLOR_TRC_GAMMA22, + }; + return true; + case VK_COLOR_SPACE_PASS_THROUGH_EXT: + *out = pl_color_space_unknown; + return true; + +#ifdef VK_AMD_display_native_hdr + case VK_COLOR_SPACE_DISPLAY_NATIVE_AMD: + // TODO + return false; +#endif + + default: return false; + } +} + +static bool pick_surf_format(pl_swapchain sw, const struct pl_color_space *hint) +{ + struct priv *p = PL_PRIV(sw); + struct vk_ctx *vk = p->vk; + pl_gpu gpu = sw->gpu; + + int best_score = 0, best_id; + bool wide_gamut = pl_color_primaries_is_wide_gamut(hint->primaries); + bool prefer_hdr = pl_color_transfer_is_hdr(hint->transfer); + + for (int i = 0; i < p->formats.num; i++) { + // Color space / format whitelist + struct pl_color_space space; + if (!map_color_space(p->formats.elem[i].colorSpace, &space)) + continue; + + bool disable10 = !pl_color_transfer_is_hdr(space.transfer) && + p->params.disable_10bit_sdr; + + switch (p->formats.elem[i].format) { + // Only accept floating point formats for linear curves + case VK_FORMAT_R16G16B16_SFLOAT: + case VK_FORMAT_R16G16B16A16_SFLOAT: + case VK_FORMAT_R32G32B32_SFLOAT: + case VK_FORMAT_R32G32B32A32_SFLOAT: + case VK_FORMAT_R64G64B64_SFLOAT: + case VK_FORMAT_R64G64B64A64_SFLOAT: + if (space.transfer == PL_COLOR_TRC_LINEAR) + break; // accept + continue; + + // Only accept 8 bit for non-HDR curves + case VK_FORMAT_R8G8B8_UNORM: + case VK_FORMAT_B8G8R8_UNORM: + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + if (!pl_color_transfer_is_hdr(space.transfer)) + break; // accept + continue; + + // Only accept 10 bit formats for non-linear curves + case VK_FORMAT_A2R10G10B10_UNORM_PACK32: + case VK_FORMAT_A2B10G10R10_UNORM_PACK32: + if (space.transfer != PL_COLOR_TRC_LINEAR && !disable10) + break; // accept + continue; + + // Accept 16-bit formats for everything + case VK_FORMAT_R16G16B16_UNORM: + case VK_FORMAT_R16G16B16A16_UNORM: + if (!disable10) + break; // accept + continue; + + default: continue; + } + + // Make sure we can wrap this format to a meaningful, valid pl_fmt + for (int n = 0; n < gpu->num_formats; n++) { + pl_fmt plfmt = gpu->formats[n]; + const struct vk_format **pvkfmt = PL_PRIV(plfmt); + if ((*pvkfmt)->tfmt != p->formats.elem[i].format) + continue; + + enum pl_fmt_caps render_caps = 0; + render_caps |= PL_FMT_CAP_RENDERABLE; + render_caps |= PL_FMT_CAP_BLITTABLE; + if ((plfmt->caps & render_caps) != render_caps) + continue; + + // format valid, use it if it has a higher score + int score = 0; + for (int c = 0; c < 3; c++) + score += plfmt->component_depth[c]; + if (pl_color_primaries_is_wide_gamut(space.primaries) == wide_gamut) + score += 1000; + if (space.primaries == hint->primaries) + score += 2000; + if (pl_color_transfer_is_hdr(space.transfer) == prefer_hdr) + score += 10000; + if (space.transfer == hint->transfer) + score += 20000; + + switch (plfmt->type) { + case PL_FMT_UNKNOWN: break; + case PL_FMT_UINT: break; + case PL_FMT_SINT: break; + case PL_FMT_UNORM: score += 500; break; + case PL_FMT_SNORM: score += 400; break; + case PL_FMT_FLOAT: score += 300; break; + case PL_FMT_TYPE_COUNT: pl_unreachable(); + }; + + if (score > best_score) { + best_score = score; + best_id = i; + break; + } + } + } + + if (!best_score) { + PL_ERR(vk, "Failed picking any valid, renderable surface format!"); + return false; + } + + VkSurfaceFormatKHR new_sfmt = p->formats.elem[best_id]; + if (p->protoInfo.imageFormat != new_sfmt.format || + p->protoInfo.imageColorSpace != new_sfmt.colorSpace) + { + PL_INFO(vk, "Picked surface configuration %d: %s + %s", best_id, + vk_fmt_name(new_sfmt.format), + vk_csp_name(new_sfmt.colorSpace)); + + p->protoInfo.imageFormat = new_sfmt.format; + p->protoInfo.imageColorSpace = new_sfmt.colorSpace; + p->needs_recreate = true; + } + + return true; +} + +static void set_hdr_metadata(struct priv *p, const struct pl_hdr_metadata *metadata) +{ + struct vk_ctx *vk = p->vk; + if (!vk->SetHdrMetadataEXT) + return; + + // Whitelist only values that we support signalling metadata for + struct pl_hdr_metadata fix = { + .prim = metadata->prim, + .min_luma = metadata->min_luma, + .max_luma = metadata->max_luma, + .max_cll = metadata->max_cll, + .max_fall = metadata->max_fall, + }; + + // Ignore no-op changes + if (pl_hdr_metadata_equal(&fix, &p->hdr_metadata)) + return; + + // Remember the metadata so we can re-apply it after swapchain recreation + p->hdr_metadata = fix; + + // Ignore HDR metadata requests for SDR swapchains + if (!pl_color_transfer_is_hdr(p->color_space.transfer)) + return; + + if (!p->swapchain) + return; + + vk->SetHdrMetadataEXT(vk->dev, 1, &p->swapchain, &(VkHdrMetadataEXT) { + .sType = VK_STRUCTURE_TYPE_HDR_METADATA_EXT, + .displayPrimaryRed = { fix.prim.red.x, fix.prim.red.y }, + .displayPrimaryGreen = { fix.prim.green.x, fix.prim.green.y }, + .displayPrimaryBlue = { fix.prim.blue.x, fix.prim.blue.y }, + .whitePoint = { fix.prim.white.x, fix.prim.white.y }, + .maxLuminance = fix.max_luma, + .minLuminance = fix.min_luma, + .maxContentLightLevel = fix.max_cll, + .maxFrameAverageLightLevel = fix.max_fall, + }); + + // Keep track of applied HDR colorimetry metadata + p->color_space.hdr = p->hdr_metadata; +} + +pl_swapchain pl_vulkan_create_swapchain(pl_vulkan plvk, + const struct pl_vulkan_swapchain_params *params) +{ + struct vk_ctx *vk = PL_PRIV(plvk); + pl_gpu gpu = plvk->gpu; + + if (!vk->CreateSwapchainKHR) { + PL_ERR(gpu, VK_KHR_SWAPCHAIN_EXTENSION_NAME " not enabled!"); + return NULL; + } + + struct pl_swapchain_t *sw = pl_zalloc_obj(NULL, sw, struct priv); + sw->log = vk->log; + sw->gpu = gpu; + + struct priv *p = PL_PRIV(sw); + pl_mutex_init(&p->lock); + p->impl = vulkan_swapchain; + p->params = *params; + p->vk = vk; + p->surf = params->surface; + p->swapchain_depth = PL_DEF(params->swapchain_depth, 3); + pl_assert(p->swapchain_depth > 0); + atomic_init(&p->frames_in_flight, 0); + p->last_imgidx = -1; + p->protoInfo = (VkSwapchainCreateInfoKHR) { + .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, + .surface = p->surf, + .imageArrayLayers = 1, // non-stereoscopic + .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, + .minImageCount = p->swapchain_depth + 1, // +1 for the FB + .presentMode = params->present_mode, + .clipped = true, + }; + + // These fields will be updated by `vk_sw_recreate` + p->color_space = pl_color_space_unknown; + p->color_repr = (struct pl_color_repr) { + .sys = PL_COLOR_SYSTEM_RGB, + .levels = PL_COLOR_LEVELS_FULL, + .alpha = PL_ALPHA_UNKNOWN, + }; + + // Make sure the swapchain present mode is supported + VkPresentModeKHR *modes = NULL; + uint32_t num_modes = 0; + VK(vk->GetPhysicalDeviceSurfacePresentModesKHR(vk->physd, p->surf, &num_modes, NULL)); + modes = pl_calloc_ptr(NULL, num_modes, modes); + VK(vk->GetPhysicalDeviceSurfacePresentModesKHR(vk->physd, p->surf, &num_modes, modes)); + + bool supported = false; + for (int i = 0; i < num_modes; i++) + supported |= (modes[i] == p->protoInfo.presentMode); + pl_free_ptr(&modes); + + if (!supported) { + PL_WARN(vk, "Requested swap mode unsupported by this device, falling " + "back to VK_PRESENT_MODE_FIFO_KHR"); + p->protoInfo.presentMode = VK_PRESENT_MODE_FIFO_KHR; + } + + // Enumerate the supported surface color spaces + uint32_t num_formats = 0; + VK(vk->GetPhysicalDeviceSurfaceFormatsKHR(vk->physd, p->surf, &num_formats, NULL)); + PL_ARRAY_RESIZE(sw, p->formats, num_formats); + VK(vk->GetPhysicalDeviceSurfaceFormatsKHR(vk->physd, p->surf, &num_formats, p->formats.elem)); + p->formats.num = num_formats; + + PL_INFO(gpu, "Available surface configurations:"); + for (int i = 0; i < p->formats.num; i++) { + PL_INFO(gpu, " %d: %-40s %s", i, + vk_fmt_name(p->formats.elem[i].format), + vk_csp_name(p->formats.elem[i].colorSpace)); + } + + // Ensure there exists at least some valid renderable surface format + struct pl_color_space hint = {0}; + if (!pick_surf_format(sw, &hint)) + goto error; + + return sw; + +error: + pl_free(modes); + pl_free(sw); + return NULL; +} + +static void vk_sw_destroy(pl_swapchain sw) +{ + pl_gpu gpu = sw->gpu; + struct priv *p = PL_PRIV(sw); + struct vk_ctx *vk = p->vk; + + pl_gpu_flush(gpu); + vk_wait_idle(vk); + + // Vulkan offers no way to know when a queue presentation command is done, + // leading to spec-mandated undefined behavior when destroying resources + // tied to the swapchain. Use an extra `vkQueueWaitIdle` on all of the + // queues we may have oustanding presentation calls on, to hopefully inform + // the driver that we want to wait until the device is truly idle. + for (int i = 0; i < vk->pool_graphics->num_queues; i++) + vk->QueueWaitIdle(vk->pool_graphics->queues[i]); + + for (int i = 0; i < p->images.num; i++) + pl_tex_destroy(gpu, &p->images.elem[i]); + for (int i = 0; i < p->sems.num; i++) { + vk->DestroySemaphore(vk->dev, p->sems.elem[i].in, PL_VK_ALLOC); + vk->DestroySemaphore(vk->dev, p->sems.elem[i].out, PL_VK_ALLOC); + } + + vk->DestroySwapchainKHR(vk->dev, p->swapchain, PL_VK_ALLOC); + pl_mutex_destroy(&p->lock); + pl_free((void *) sw); +} + +static int vk_sw_latency(pl_swapchain sw) +{ + struct priv *p = PL_PRIV(sw); + return p->swapchain_depth; +} + +static bool update_swapchain_info(struct priv *p, VkSwapchainCreateInfoKHR *info, + int w, int h) +{ + struct vk_ctx *vk = p->vk; + + // Query the supported capabilities and update this struct as needed + VkSurfaceCapabilitiesKHR caps = {0}; + VK(vk->GetPhysicalDeviceSurfaceCapabilitiesKHR(vk->physd, p->surf, &caps)); + + // Check for hidden/invisible window + if (!caps.currentExtent.width || !caps.currentExtent.height) { + PL_DEBUG(vk, "maxImageExtent reported as 0x0, hidden window? skipping"); + return false; + } + + // Sorted by preference + static const struct { VkCompositeAlphaFlagsKHR vk_mode; + enum pl_alpha_mode pl_mode; + } alphaModes[] = { + {VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR, PL_ALPHA_INDEPENDENT}, + {VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR, PL_ALPHA_PREMULTIPLIED}, + {VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, PL_ALPHA_UNKNOWN}, + {VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR, PL_ALPHA_UNKNOWN}, + }; + + for (int i = 0; i < PL_ARRAY_SIZE(alphaModes); i++) { + if (caps.supportedCompositeAlpha & alphaModes[i].vk_mode) { + info->compositeAlpha = alphaModes[i].vk_mode; + p->color_repr.alpha = alphaModes[i].pl_mode; + PL_DEBUG(vk, "Requested alpha compositing mode: %s", + vk_alpha_mode(info->compositeAlpha)); + break; + } + } + + if (!info->compositeAlpha) { + PL_ERR(vk, "Failed picking alpha compositing mode (caps: 0x%x)", + caps.supportedCompositeAlpha); + goto error; + } + + // Note: We could probably also allow picking a surface transform that + // flips the framebuffer and set `pl_swapchain_frame.flipped`, but this + // doesn't appear to be necessary for any vulkan implementations. + static const VkSurfaceTransformFlagsKHR rotModes[] = { + VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR, + VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR, + }; + + for (int i = 0; i < PL_ARRAY_SIZE(rotModes); i++) { + if (caps.supportedTransforms & rotModes[i]) { + info->preTransform = rotModes[i]; + PL_DEBUG(vk, "Requested surface transform: %s", + vk_surface_transform(info->preTransform)); + break; + } + } + + if (!info->preTransform) { + PL_ERR(vk, "Failed picking surface transform mode (caps: 0x%x)", + caps.supportedTransforms); + goto error; + } + + // Image count as required + PL_DEBUG(vk, "Requested image count: %d (min %d max %d)", + (int) info->minImageCount, (int) caps.minImageCount, + (int) caps.maxImageCount); + + info->minImageCount = PL_MAX(info->minImageCount, caps.minImageCount); + if (caps.maxImageCount) + info->minImageCount = PL_MIN(info->minImageCount, caps.maxImageCount); + + PL_DEBUG(vk, "Requested image size: %dx%d (min %dx%d < cur %dx%d < max %dx%d)", + w, h, caps.minImageExtent.width, caps.minImageExtent.height, + caps.currentExtent.width, caps.currentExtent.height, + caps.maxImageExtent.width, caps.maxImageExtent.height); + + // Default the requested size based on the reported extent + if (caps.currentExtent.width != 0xFFFFFFFF) + w = PL_DEF(w, caps.currentExtent.width); + if (caps.currentExtent.height != 0xFFFFFFFF) + h = PL_DEF(h, caps.currentExtent.height); + + // Otherwise, re-use the existing size if available + w = PL_DEF(w, info->imageExtent.width); + h = PL_DEF(h, info->imageExtent.height); + + if (!w || !h) { + PL_ERR(vk, "Failed resizing swapchain: unknown size?"); + goto error; + } + + // Clamp the extent based on the supported limits + w = PL_CLAMP(w, caps.minImageExtent.width, caps.maxImageExtent.width); + h = PL_CLAMP(h, caps.minImageExtent.height, caps.maxImageExtent.height); + info->imageExtent = (VkExtent2D) { w, h }; + + // We just request whatever makes sense, and let the pl_vk decide what + // pl_tex_params that translates to. That said, we still need to intersect + // the swapchain usage flags with the format usage flags + VkImageUsageFlags req_flags = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT; + VkImageUsageFlags opt_flags = VK_IMAGE_USAGE_STORAGE_BIT; + + info->imageUsage = caps.supportedUsageFlags & (req_flags | opt_flags); + VkFormatProperties fmtprop = {0}; + vk->GetPhysicalDeviceFormatProperties(vk->physd, info->imageFormat, &fmtprop); + +#define CHECK(usage, feature) \ + if (!((fmtprop.optimalTilingFeatures & VK_FORMAT_FEATURE_##feature##_BIT))) \ + info->imageUsage &= ~VK_IMAGE_USAGE_##usage##_BIT + + CHECK(COLOR_ATTACHMENT, COLOR_ATTACHMENT); + CHECK(TRANSFER_DST, TRANSFER_DST); + CHECK(STORAGE, STORAGE_IMAGE); + + if ((info->imageUsage & req_flags) != req_flags) { + PL_ERR(vk, "The swapchain doesn't support rendering and blitting!"); + goto error; + } + + return true; + +error: + return false; +} + +static void destroy_swapchain(struct vk_ctx *vk, void *swapchain) +{ + vk->DestroySwapchainKHR(vk->dev, vk_unwrap_handle(swapchain), PL_VK_ALLOC); +} + +static bool vk_sw_recreate(pl_swapchain sw, int w, int h) +{ + pl_gpu gpu = sw->gpu; + struct priv *p = PL_PRIV(sw); + struct vk_ctx *vk = p->vk; + + VkImage *vkimages = NULL; + uint32_t num_images = 0; + + if (!update_swapchain_info(p, &p->protoInfo, w, h)) + return false; + + VkSwapchainCreateInfoKHR sinfo = p->protoInfo; +#ifdef VK_EXT_full_screen_exclusive + // Explicitly disallow full screen exclusive mode if possible + static const VkSurfaceFullScreenExclusiveInfoEXT fsinfo = { + .sType = VK_STRUCTURE_TYPE_SURFACE_FULL_SCREEN_EXCLUSIVE_INFO_EXT, + .fullScreenExclusive = VK_FULL_SCREEN_EXCLUSIVE_DISALLOWED_EXT, + }; + if (vk->AcquireFullScreenExclusiveModeEXT) + vk_link_struct(&sinfo, &fsinfo); +#endif + + p->suboptimal = false; + p->needs_recreate = false; + p->cur_width = sinfo.imageExtent.width; + p->cur_height = sinfo.imageExtent.height; + + PL_DEBUG(sw, "(Re)creating swapchain of size %dx%d", + sinfo.imageExtent.width, + sinfo.imageExtent.height); + +#ifdef PL_HAVE_UNIX + if (vk->props.vendorID == VK_VENDOR_ID_NVIDIA) { + vk->DeviceWaitIdle(vk->dev); + vk_wait_idle(vk); + } +#endif + + // Calling `vkCreateSwapchainKHR` puts sinfo.oldSwapchain into a retired + // state whether the call succeeds or not, so we always need to garbage + // collect it afterwards - asynchronously as it may still be in use + sinfo.oldSwapchain = p->swapchain; + p->swapchain = VK_NULL_HANDLE; + VkResult res = vk->CreateSwapchainKHR(vk->dev, &sinfo, PL_VK_ALLOC, &p->swapchain); + vk_dev_callback(vk, (vk_cb) destroy_swapchain, vk, vk_wrap_handle(sinfo.oldSwapchain)); + PL_VK_ASSERT(res, "vk->CreateSwapchainKHR(...)"); + + // Get the new swapchain images + VK(vk->GetSwapchainImagesKHR(vk->dev, p->swapchain, &num_images, NULL)); + vkimages = pl_calloc_ptr(NULL, num_images, vkimages); + VK(vk->GetSwapchainImagesKHR(vk->dev, p->swapchain, &num_images, vkimages)); + + for (int i = 0; i < num_images; i++) + PL_VK_NAME(IMAGE, vkimages[i], "swapchain"); + + // If needed, allocate some more semaphores + while (num_images > p->sems.num) { + VkSemaphore sem_in = VK_NULL_HANDLE, sem_out = VK_NULL_HANDLE; + static const VkSemaphoreCreateInfo seminfo = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + }; + VK(vk->CreateSemaphore(vk->dev, &seminfo, PL_VK_ALLOC, &sem_in)); + VK(vk->CreateSemaphore(vk->dev, &seminfo, PL_VK_ALLOC, &sem_out)); + PL_VK_NAME(SEMAPHORE, sem_in, "swapchain in"); + PL_VK_NAME(SEMAPHORE, sem_out, "swapchain out"); + + PL_ARRAY_APPEND(sw, p->sems, (struct sem_pair) { + .in = sem_in, + .out = sem_out, + }); + } + + // Recreate the pl_tex wrappers + for (int i = 0; i < p->images.num; i++) + pl_tex_destroy(gpu, &p->images.elem[i]); + p->images.num = 0; + + for (int i = 0; i < num_images; i++) { + const VkExtent2D *ext = &sinfo.imageExtent; + pl_tex tex = pl_vulkan_wrap(gpu, pl_vulkan_wrap_params( + .image = vkimages[i], + .width = ext->width, + .height = ext->height, + .format = sinfo.imageFormat, + .usage = sinfo.imageUsage, + )); + if (!tex) + goto error; + PL_ARRAY_APPEND(sw, p->images, tex); + } + + pl_assert(num_images > 0); + int bits = 0; + + // The channel with the most bits is probably the most authoritative about + // the actual color information (consider e.g. a2bgr10). Slight downside + // in that it results in rounding r/b for e.g. rgb565, but we don't pick + // surfaces with fewer than 8 bits anyway, so let's not care for now. + pl_fmt fmt = p->images.elem[0]->params.format; + for (int i = 0; i < fmt->num_components; i++) + bits = PL_MAX(bits, fmt->component_depth[i]); + + p->color_repr.bits.sample_depth = bits; + p->color_repr.bits.color_depth = bits; + + // Note: `p->color_space.hdr` is (re-)applied by `set_hdr_metadata` + map_color_space(sinfo.imageColorSpace, &p->color_space); + + // Forcibly re-apply HDR metadata, bypassing the no-op check + struct pl_hdr_metadata metadata = p->hdr_metadata; + p->hdr_metadata = pl_hdr_metadata_empty; + set_hdr_metadata(p, &metadata); + + pl_free(vkimages); + return true; + +error: + PL_ERR(vk, "Failed (re)creating swapchain!"); + pl_free(vkimages); + vk->DestroySwapchainKHR(vk->dev, p->swapchain, PL_VK_ALLOC); + p->swapchain = VK_NULL_HANDLE; + p->cur_width = p->cur_height = 0; + return false; +} + +static bool vk_sw_start_frame(pl_swapchain sw, + struct pl_swapchain_frame *out_frame) +{ + struct priv *p = PL_PRIV(sw); + struct vk_ctx *vk = p->vk; + pl_mutex_lock(&p->lock); + + bool recreate = !p->swapchain || p->needs_recreate; + if (p->suboptimal && !p->params.allow_suboptimal) + recreate = true; + + if (recreate && !vk_sw_recreate(sw, 0, 0)) { + pl_mutex_unlock(&p->lock); + return false; + } + + VkSemaphore sem_in = p->sems.elem[p->idx_sems].in; + PL_TRACE(vk, "vkAcquireNextImageKHR signals 0x%"PRIx64, (uint64_t) sem_in); + + for (int attempts = 0; attempts < 2; attempts++) { + uint32_t imgidx = 0; + VkResult res = vk->AcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX, + sem_in, VK_NULL_HANDLE, &imgidx); + + switch (res) { + case VK_SUBOPTIMAL_KHR: + p->suboptimal = true; + // fall through + case VK_SUCCESS: + p->last_imgidx = imgidx; + pl_vulkan_release_ex(sw->gpu, pl_vulkan_release_params( + .tex = p->images.elem[imgidx], + .layout = VK_IMAGE_LAYOUT_UNDEFINED, + .qf = VK_QUEUE_FAMILY_IGNORED, + .semaphore = { sem_in }, + )); + *out_frame = (struct pl_swapchain_frame) { + .fbo = p->images.elem[imgidx], + .flipped = false, + .color_repr = p->color_repr, + .color_space = p->color_space, + }; + // keep lock held + return true; + + case VK_ERROR_OUT_OF_DATE_KHR: { + // In these cases try recreating the swapchain + if (!vk_sw_recreate(sw, 0, 0)) { + pl_mutex_unlock(&p->lock); + return false; + } + continue; + } + + default: + PL_ERR(vk, "Failed acquiring swapchain image: %s", vk_res_str(res)); + pl_mutex_unlock(&p->lock); + return false; + } + } + + // If we've exhausted the number of attempts to recreate the swapchain, + // just give up silently and let the user retry some time later. + pl_mutex_unlock(&p->lock); + return false; +} + +static void present_cb(struct priv *p, void *arg) +{ + (void) pl_rc_deref(&p->frames_in_flight); +} + +static bool vk_sw_submit_frame(pl_swapchain sw) +{ + pl_gpu gpu = sw->gpu; + struct priv *p = PL_PRIV(sw); + struct vk_ctx *vk = p->vk; + pl_assert(p->last_imgidx >= 0); + pl_assert(p->swapchain); + uint32_t idx = p->last_imgidx; + VkSemaphore sem_out = p->sems.elem[p->idx_sems++].out; + p->idx_sems %= p->sems.num; + p->last_imgidx = -1; + + bool held = pl_vulkan_hold_ex(gpu, pl_vulkan_hold_params( + .tex = p->images.elem[idx], + .layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + .qf = VK_QUEUE_FAMILY_IGNORED, + .semaphore = { sem_out }, + )); + + if (!held) { + PL_ERR(gpu, "Failed holding swapchain image for presentation"); + pl_mutex_unlock(&p->lock); + return false; + } + + struct vk_cmd *cmd = pl_vk_steal_cmd(gpu); + if (!cmd) { + pl_mutex_unlock(&p->lock); + return false; + } + + pl_rc_ref(&p->frames_in_flight); + vk_cmd_callback(cmd, (vk_cb) present_cb, p, NULL); + if (!vk_cmd_submit(&cmd)) { + pl_mutex_unlock(&p->lock); + return false; + } + + struct vk_cmdpool *pool = vk->pool_graphics; + int qidx = pool->idx_queues; + VkQueue queue = pool->queues[qidx]; + + vk_rotate_queues(p->vk); + vk_malloc_garbage_collect(vk->ma); + + VkPresentInfoKHR pinfo = { + .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &sem_out, + .swapchainCount = 1, + .pSwapchains = &p->swapchain, + .pImageIndices = &idx, + }; + + PL_TRACE(vk, "vkQueuePresentKHR waits on 0x%"PRIx64, (uint64_t) sem_out); + vk->lock_queue(vk->queue_ctx, pool->qf, qidx); + VkResult res = vk->QueuePresentKHR(queue, &pinfo); + vk->unlock_queue(vk->queue_ctx, pool->qf, qidx); + pl_mutex_unlock(&p->lock); + + switch (res) { + case VK_SUBOPTIMAL_KHR: + p->suboptimal = true; + // fall through + case VK_SUCCESS: + return true; + + case VK_ERROR_OUT_OF_DATE_KHR: + // We can silently ignore this error, since the next start_frame will + // recreate the swapchain automatically. + return true; + + default: + PL_ERR(vk, "Failed presenting to queue %p: %s", (void *) queue, + vk_res_str(res)); + return false; + } +} + +static void vk_sw_swap_buffers(pl_swapchain sw) +{ + struct priv *p = PL_PRIV(sw); + + pl_mutex_lock(&p->lock); + while (pl_rc_count(&p->frames_in_flight) >= p->swapchain_depth) { + pl_mutex_unlock(&p->lock); // don't hold mutex while blocking + vk_poll_commands(p->vk, UINT64_MAX); + pl_mutex_lock(&p->lock); + } + pl_mutex_unlock(&p->lock); +} + +static bool vk_sw_resize(pl_swapchain sw, int *width, int *height) +{ + struct priv *p = PL_PRIV(sw); + bool ok = true; + + pl_mutex_lock(&p->lock); + + bool width_changed = *width && *width != p->cur_width, + height_changed = *height && *height != p->cur_height; + + if (p->suboptimal || p->needs_recreate || width_changed || height_changed) + ok = vk_sw_recreate(sw, *width, *height); + + *width = p->cur_width; + *height = p->cur_height; + + pl_mutex_unlock(&p->lock); + return ok; +} + +static void vk_sw_colorspace_hint(pl_swapchain sw, const struct pl_color_space *csp) +{ + struct priv *p = PL_PRIV(sw); + pl_mutex_lock(&p->lock); + + // This should never fail if the swapchain already exists + bool ok = pick_surf_format(sw, csp); + set_hdr_metadata(p, &csp->hdr); + pl_assert(ok); + + pl_mutex_unlock(&p->lock); +} + +bool pl_vulkan_swapchain_suboptimal(pl_swapchain sw) +{ + struct priv *p = PL_PRIV(sw); + return p->suboptimal; +} + +static const struct pl_sw_fns vulkan_swapchain = { + .destroy = vk_sw_destroy, + .latency = vk_sw_latency, + .resize = vk_sw_resize, + .colorspace_hint = vk_sw_colorspace_hint, + .start_frame = vk_sw_start_frame, + .submit_frame = vk_sw_submit_frame, + .swap_buffers = vk_sw_swap_buffers, +}; |