#include "quakedef.h" #ifdef VKQUAKE #include "vkrenderer.h" #include "gl_draw.h" #include "shader.h" #include "renderque.h" //is anything still using this? extern qboolean vid_isfullscreen; extern cvar_t vk_submissionthread; extern cvar_t vk_debug; extern cvar_t vk_dualqueue; extern cvar_t vk_busywait; extern cvar_t vk_waitfence; extern cvar_t vk_nv_glsl_shader; extern cvar_t vk_khr_get_memory_requirements2; extern cvar_t vk_khr_dedicated_allocation; extern cvar_t vk_khr_push_descriptor; extern cvar_t vk_amd_rasterization_order; extern cvar_t vk_usememorypools; extern cvar_t vid_srgb, vid_vsync, vid_triplebuffer, r_stereo_method, vid_multisample, vid_bpp; void R2D_Console_Resize(void); extern qboolean scr_con_forcedraw; #ifndef MULTITHREAD #define Sys_LockConditional(c) #define Sys_UnlockConditional(c) #endif const char *vklayerlist[] = { #if 1 "VK_LAYER_LUNARG_standard_validation" #else //older versions of the sdk were crashing out on me, // "VK_LAYER_LUNARG_api_dump", "VK_LAYER_LUNARG_device_limits", //"VK_LAYER_LUNARG_draw_state", "VK_LAYER_LUNARG_image", //"VK_LAYER_LUNARG_mem_tracker", "VK_LAYER_LUNARG_object_tracker", "VK_LAYER_LUNARG_param_checker", "VK_LAYER_LUNARG_screenshot", "VK_LAYER_LUNARG_swapchain", "VK_LAYER_GOOGLE_threading", "VK_LAYER_GOOGLE_unique_objects", //"VK_LAYER_LUNARG_vktrace", #endif }; #define vklayercount (vk_debug.ival>1?countof(vklayerlist):0) //code to initialise+destroy vulkan contexts. //this entire file is meant to be platform-agnostic. //the vid code still needs to set up vkGetInstanceProcAddr, and do all the window+input stuff. #ifdef VK_NO_PROTOTYPES #define VKFunc(n) PFN_vk##n vk##n; #ifdef VK_EXT_debug_utils VKFunc(CreateDebugUtilsMessengerEXT) VKFunc(DestroyDebugUtilsMessengerEXT) #endif #ifdef VK_EXT_debug_report VKFunc(CreateDebugReportCallbackEXT) VKFunc(DestroyDebugReportCallbackEXT) #endif VKFuncs #undef VKFunc #endif void VK_Submit_Work(VkCommandBuffer cmdbuf, VkSemaphore semwait, VkPipelineStageFlags semwaitstagemask, VkSemaphore semsignal, VkFence fencesignal, struct vkframe *presentframe, struct vk_fencework *fencedwork); #ifdef MULTITHREAD static int VK_Submit_Thread(void *arg); #endif static void VK_Submit_DoWork(void); static void VK_DestroyRenderPass(void); static void VK_CreateRenderPass(void); static void VK_Shutdown_PostProc(void); struct vulkaninfo_s vk; static struct vk_rendertarg postproc[4]; static unsigned int postproc_buf; static struct vk_rendertarg_cube vk_rt_cubemap; qboolean VK_SCR_GrabBackBuffer(void); #if defined(__linux__) && defined(__GLIBC__) #include #define DOBACKTRACE() \ do { \ void *bt[16]; \ int i, fr = backtrace(bt, countof(bt)); \ char **strings = backtrace_symbols(bt, fr); \ for (i = 0; i < fr; i++) \ if (strings) \ Con_Printf("\t%s\n", strings[i]); \ else \ Con_Printf("\t%p\n", bt[i]); \ free(strings); \ } while(0) #else #define DOBACKTRACE() #endif #ifdef VK_EXT_debug_utils static void DebugSetName(VkObjectType objtype, uint64_t handle, const char *name) { if (vkSetDebugUtilsObjectNameEXT) { VkDebugUtilsObjectNameInfoEXT info = { VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, NULL, objtype, handle, name }; vkSetDebugUtilsObjectNameEXT(vk.device, &info); } } static VkDebugUtilsMessengerEXT vk_debugucallback; char *DebugAnnotObjectToString(VkObjectType t) { switch(t) { case VK_OBJECT_TYPE_UNKNOWN: return "VK_OBJECT_TYPE_UNKNOWN"; case VK_OBJECT_TYPE_INSTANCE: return "VK_OBJECT_TYPE_INSTANCE"; case VK_OBJECT_TYPE_PHYSICAL_DEVICE: return "VK_OBJECT_TYPE_PHYSICAL_DEVICE"; case VK_OBJECT_TYPE_DEVICE: return "VK_OBJECT_TYPE_DEVICE"; case VK_OBJECT_TYPE_QUEUE: return "VK_OBJECT_TYPE_QUEUE"; case VK_OBJECT_TYPE_SEMAPHORE: return "VK_OBJECT_TYPE_SEMAPHORE"; case VK_OBJECT_TYPE_COMMAND_BUFFER: return "VK_OBJECT_TYPE_COMMAND_BUFFER"; case VK_OBJECT_TYPE_FENCE: return "VK_OBJECT_TYPE_FENCE"; case VK_OBJECT_TYPE_DEVICE_MEMORY: return "VK_OBJECT_TYPE_DEVICE_MEMORY"; case VK_OBJECT_TYPE_BUFFER: return "VK_OBJECT_TYPE_BUFFER"; case VK_OBJECT_TYPE_IMAGE: return "VK_OBJECT_TYPE_IMAGE"; case VK_OBJECT_TYPE_EVENT: return "VK_OBJECT_TYPE_EVENT"; case VK_OBJECT_TYPE_QUERY_POOL: return "VK_OBJECT_TYPE_QUERY_POOL"; case VK_OBJECT_TYPE_BUFFER_VIEW: return "VK_OBJECT_TYPE_BUFFER_VIEW"; case VK_OBJECT_TYPE_IMAGE_VIEW: return "VK_OBJECT_TYPE_IMAGE_VIEW"; case VK_OBJECT_TYPE_SHADER_MODULE: return "VK_OBJECT_TYPE_SHADER_MODULE"; case VK_OBJECT_TYPE_PIPELINE_CACHE: return "VK_OBJECT_TYPE_PIPELINE_CACHE"; case VK_OBJECT_TYPE_PIPELINE_LAYOUT: return "VK_OBJECT_TYPE_PIPELINE_LAYOUT"; case VK_OBJECT_TYPE_RENDER_PASS: return "VK_OBJECT_TYPE_RENDER_PASS"; case VK_OBJECT_TYPE_PIPELINE: return "VK_OBJECT_TYPE_PIPELINE"; case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT: return "VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT"; case VK_OBJECT_TYPE_SAMPLER: return "VK_OBJECT_TYPE_SAMPLER"; case VK_OBJECT_TYPE_DESCRIPTOR_POOL: return "VK_OBJECT_TYPE_DESCRIPTOR_POOL"; case VK_OBJECT_TYPE_DESCRIPTOR_SET: return "VK_OBJECT_TYPE_DESCRIPTOR_SET"; case VK_OBJECT_TYPE_FRAMEBUFFER: return "VK_OBJECT_TYPE_FRAMEBUFFER"; case VK_OBJECT_TYPE_COMMAND_POOL: return "VK_OBJECT_TYPE_COMMAND_POOL"; case VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION: return "VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION"; case VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE: return "VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE"; case VK_OBJECT_TYPE_SURFACE_KHR: return "VK_OBJECT_TYPE_SURFACE_KHR"; case VK_OBJECT_TYPE_SWAPCHAIN_KHR: return "VK_OBJECT_TYPE_SWAPCHAIN_KHR"; case VK_OBJECT_TYPE_DISPLAY_KHR: return "VK_OBJECT_TYPE_DISPLAY_KHR"; case VK_OBJECT_TYPE_DISPLAY_MODE_KHR: return "VK_OBJECT_TYPE_DISPLAY_MODE_KHR"; case VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT: return "VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT"; case VK_OBJECT_TYPE_OBJECT_TABLE_NVX: return "VK_OBJECT_TYPE_OBJECT_TABLE_NVX"; case VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX: return "VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX"; case VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT: return "VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT"; case VK_OBJECT_TYPE_VALIDATION_CACHE_EXT: return "VK_OBJECT_TYPE_VALIDATION_CACHE_EXT"; case VK_OBJECT_TYPE_RANGE_SIZE: case VK_OBJECT_TYPE_MAX_ENUM: break; } return "UNKNOWNTYPE"; } static VKAPI_ATTR VkBool32 VKAPI_CALL mydebugutilsmessagecallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT*pCallbackData, void* pUserData) { char prefix[64]; int l = 0; //developer level if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) { //spam? strcpy(prefix, "VERBOSE:"); l = 2; } else if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) { //generally stuff like 'object created' strcpy(prefix, "INFO:"); l = 1; } else if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) strcpy(prefix, CON_WARNING"WARNING:"); else if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) strcpy(prefix, CON_ERROR "ERROR:"); if (messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT) strcat(prefix, "GENERAL"); else { if (messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT) strcat(prefix, "SPEC"); if (messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT) { if (messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT) { strcat(prefix, "|"); } strcat(prefix,"PERF"); } } Con_DLPrintf(l, "%s[%d] %s - %s\n", prefix, pCallbackData->messageIdNumber, pCallbackData->pMessageIdName?pCallbackData->pMessageIdName:"", pCallbackData->pMessage); if (pCallbackData->objectCount > 0) { uint32_t object; for(object = 0; object < pCallbackData->objectCount; ++object) Con_DLPrintf(l, " Object[%d] - Type %s, Value %"PRIx64", Name \"%s\"\n", object, DebugAnnotObjectToString(pCallbackData->pObjects[object].objectType), pCallbackData->pObjects[object].objectHandle, pCallbackData->pObjects[object].pObjectName); } if (pCallbackData->cmdBufLabelCount > 0) { uint32_t label; for (label = 0; label < pCallbackData->cmdBufLabelCount; ++label) Con_DLPrintf(l, " Label[%d] - %s { %f, %f, %f, %f}\n", label, pCallbackData->pCmdBufLabels[label].pLabelName, pCallbackData->pCmdBufLabels[label].color[0], pCallbackData->pCmdBufLabels[label].color[1], pCallbackData->pCmdBufLabels[label].color[2], pCallbackData->pCmdBufLabels[label].color[3]); } return false; } #else #define DebugSetName(objtype,handle,name) #endif #ifdef VK_EXT_debug_report static VkDebugReportCallbackEXT vk_debugcallback; static VkBool32 VKAPI_PTR mydebugreportcallback( VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, uint64_t object, size_t location, int32_t messageCode, const char* pLayerPrefix, const char* pMessage, void* pUserData) { if (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) { Con_Printf("ERR: %s: %s\n", pLayerPrefix, pMessage); // DOBACKTRACE(); } else if (flags & VK_DEBUG_REPORT_WARNING_BIT_EXT) { if (!strncmp(pMessage, "Additional bits in Source accessMask", 36) && strstr(pMessage, "VK_IMAGE_LAYOUT_UNDEFINED")) return false; //I don't give a fuck. undefined can be used to change layouts on a texture that already exists too. Con_Printf("WARN: %s: %s\n", pLayerPrefix, pMessage); DOBACKTRACE(); } else if (flags & VK_DEBUG_REPORT_DEBUG_BIT_EXT) { Con_DPrintf("DBG: %s: %s\n", pLayerPrefix, pMessage); // DOBACKTRACE(); } else if (flags & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) { #ifdef _WIN32 // OutputDebugString(va("INF: %s\n", pMessage)); #else Con_Printf("INF: %s: %s\n", pLayerPrefix, pMessage); // DOBACKTRACE(); #endif } else if (flags & VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT) { Con_Printf("PERF: %s: %s\n", pLayerPrefix, pMessage); DOBACKTRACE(); } else { Con_Printf("OTHER: %s: %s\n", pLayerPrefix, pMessage); DOBACKTRACE(); } return false; } #endif //typeBits is some vulkan requirement thing (like textures must be device-local). //requirements_mask are things that the engine may require (like host-visible). //note that there is absolutely no guarentee that hardware requirements will match what the host needs. //thus you may need to use staging. uint32_t vk_find_memory_try(uint32_t typeBits, VkFlags requirements_mask) { uint32_t i; for (i = 0; i < 32; i++) { if ((typeBits & 1) == 1) { if ((vk.memory_properties.memoryTypes[i].propertyFlags & requirements_mask) == requirements_mask) return i; } typeBits >>= 1; } return ~0u; } uint32_t vk_find_memory_require(uint32_t typeBits, VkFlags requirements_mask) { uint32_t ret = vk_find_memory_try(typeBits, requirements_mask); if (ret == ~0) Sys_Error("Unable to find suitable vulkan memory pool\n"); return ret; } void VK_DestroyVkTexture(vk_image_t *img) { if (!img) return; if (img->sampler) vkDestroySampler(vk.device, img->sampler, vkallocationcb); if (img->view) vkDestroyImageView(vk.device, img->view, vkallocationcb); if (img->image) vkDestroyImage(vk.device, img->image, vkallocationcb); VK_ReleasePoolMemory(&img->mem); } static void VK_DestroyVkTexture_Delayed(void *w) { VK_DestroyVkTexture(w); } static void VK_DestroySwapChain(void) { uint32_t i; #ifdef MULTITHREAD if (vk.submitcondition) { Sys_LockConditional(vk.submitcondition); vk.neednewswapchain = true; Sys_ConditionSignal(vk.submitcondition); Sys_UnlockConditional(vk.submitcondition); } if (vk.submitthread) { Sys_WaitOnThread(vk.submitthread); vk.submitthread = NULL; } #endif while (vk.work) { Sys_LockConditional(vk.submitcondition); VK_Submit_DoWork(); Sys_UnlockConditional(vk.submitcondition); } if (vk.dopresent) vk.dopresent(NULL); if (vk.device) vkDeviceWaitIdle(vk.device); /*while (vk.aquirenext < vk.aquirelast) { VkWarnAssert(vkWaitForFences(vk.device, 1, &vk.acquirefences[vk.aquirenext%ACQUIRELIMIT], VK_FALSE, UINT64_MAX)); vk.aquirenext++; }*/ VK_FencedCheck(); while(vk.frameendjobs) { //we've fully synced the gpu now, we can clean up any resources that were pending but not assigned yet. struct vk_frameend *job = vk.frameendjobs; vk.frameendjobs = job->next; job->FrameEnded(job+1); Z_Free(job); } if (vk.frame) { vk.frame->next = vk.unusedframes; vk.unusedframes = vk.frame; vk.frame = NULL; } for (i = 0; i < vk.backbuf_count; i++) { //swapchain stuff if (vk.backbufs[i].framebuffer) vkDestroyFramebuffer(vk.device, vk.backbufs[i].framebuffer, vkallocationcb); vk.backbufs[i].framebuffer = VK_NULL_HANDLE; if (vk.backbufs[i].colour.view) vkDestroyImageView(vk.device, vk.backbufs[i].colour.view, vkallocationcb); vk.backbufs[i].colour.view = VK_NULL_HANDLE; VK_DestroyVkTexture(&vk.backbufs[i].depth); VK_DestroyVkTexture(&vk.backbufs[i].mscolour); } if (vk.dopresent) vk.dopresent(NULL); while (vk.aquirenext < vk.aquirelast) { if (vk.acquirefences[vk.aquirenext%ACQUIRELIMIT]) VkWarnAssert(vkWaitForFences(vk.device, 1, &vk.acquirefences[vk.aquirenext%ACQUIRELIMIT], VK_FALSE, UINT64_MAX)); vk.aquirenext++; } if (vk.device) vkDeviceWaitIdle(vk.device); for (i = 0; i < ACQUIRELIMIT; i++) { if (vk.acquirefences[i]) vkDestroyFence(vk.device, vk.acquirefences[i], vkallocationcb); vk.acquirefences[i] = VK_NULL_HANDLE; } while(vk.unusedframes) { struct vkframe *frame = vk.unusedframes; vk.unusedframes = frame->next; VKBE_ShutdownFramePools(frame); vkFreeCommandBuffers(vk.device, vk.cmdpool, frame->maxcbufs, frame->cbufs); BZ_Free(frame->cbufs); vkDestroyFence(vk.device, frame->finishedfence, vkallocationcb); Z_Free(frame); } if (vk.swapchain) { vkDestroySwapchainKHR(vk.device, vk.swapchain, vkallocationcb); vk.swapchain = VK_NULL_HANDLE; } if (vk.backbufs) free(vk.backbufs); vk.backbufs = NULL; vk.backbuf_count = 0; } static qboolean VK_CreateSwapChain(void) { qboolean reloadshaders = false; uint32_t fmtcount; VkSurfaceFormatKHR *surffmts; uint32_t presentmodes; VkPresentModeKHR *presentmode; VkSurfaceCapabilitiesKHR surfcaps; VkSwapchainCreateInfoKHR swapinfo = {VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR}; uint32_t i, curpri, preaquirecount; VkSwapchainKHR newvkswapchain; VkImage *images; VkDeviceMemory *memories; VkImageView attachments[3]; VkFramebufferCreateInfo fb_info = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO}; VkSampleCountFlagBits oldms; VkFormat oldformat = vk.backbufformat; VkFormat olddepthformat = vk.depthformat; vk.dopresent(NULL); //make sure they're all pushed through. vid_vsync.modified = false; vid_triplebuffer.modified = false; vid_srgb.modified = false; vk_submissionthread.modified = false; vk_waitfence.modified = false; vid_multisample.modified = false; vk.triplebuffer = vid_triplebuffer.ival; vk.vsync = vid_vsync.ival; if (!vk.khr_swapchain) { //headless if (vk.swapchain || vk.backbuf_count) VK_DestroySwapChain(); vk.backbufformat = ((vid.flags&VID_SRGBAWARE)||vid_srgb.ival)?VK_FORMAT_B8G8R8A8_SRGB:VK_FORMAT_B8G8R8A8_UNORM; vk.backbuf_count = 4; swapinfo.imageExtent.width = vid.pixelwidth; swapinfo.imageExtent.height = vid.pixelheight; images = malloc(sizeof(VkImage)*vk.backbuf_count); memset(images, 0, sizeof(VkImage)*vk.backbuf_count); memories = malloc(sizeof(VkDeviceMemory)*vk.backbuf_count); memset(memories, 0, sizeof(VkDeviceMemory)*vk.backbuf_count); vk.aquirelast = vk.aquirenext = 0; for (i = 0; i < ACQUIRELIMIT; i++) { if (1) { VkFenceCreateInfo fci = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO}; fci.flags = VK_FENCE_CREATE_SIGNALED_BIT; VkAssert(vkCreateFence(vk.device,&fci,vkallocationcb,&vk.acquirefences[i])); vk.acquiresemaphores[i] = VK_NULL_HANDLE; } else { VkSemaphoreCreateInfo sci = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO}; VkAssert(vkCreateSemaphore(vk.device, &sci, vkallocationcb, &vk.acquiresemaphores[i])); vk.acquirefences[i] = VK_NULL_HANDLE; } vk.acquirebufferidx[vk.aquirelast%ACQUIRELIMIT] = vk.aquirelast%vk.backbuf_count; vk.aquirelast++; } for (i = 0; i < vk.backbuf_count; i++) { VkMemoryRequirements mem_reqs; VkMemoryAllocateInfo memAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; VkMemoryDedicatedAllocateInfoKHR khr_mdai = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR}; VkImageCreateInfo ici = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; ici.flags = 0; ici.imageType = VK_IMAGE_TYPE_2D; ici.format = vk.backbufformat; ici.extent.width = vid.pixelwidth; ici.extent.height = vid.pixelheight; ici.extent.depth = 1; ici.mipLevels = 1; ici.arrayLayers = 1; ici.samples = VK_SAMPLE_COUNT_1_BIT; ici.tiling = VK_IMAGE_TILING_OPTIMAL; ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; ici.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ici.queueFamilyIndexCount = 0; ici.pQueueFamilyIndices = NULL; ici.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkAssert(vkCreateImage(vk.device, &ici, vkallocationcb, &images[i])); DebugSetName(VK_OBJECT_TYPE_IMAGE, (uint64_t)images[i], "backbuffer"); vkGetImageMemoryRequirements(vk.device, images[i], &mem_reqs); memAllocInfo.allocationSize = mem_reqs.size; memAllocInfo.memoryTypeIndex = vk_find_memory_try(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); if (memAllocInfo.memoryTypeIndex == ~0) memAllocInfo.memoryTypeIndex = vk_find_memory_try(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); if (memAllocInfo.memoryTypeIndex == ~0) memAllocInfo.memoryTypeIndex = vk_find_memory_try(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); if (memAllocInfo.memoryTypeIndex == ~0) memAllocInfo.memoryTypeIndex = vk_find_memory_require(mem_reqs.memoryTypeBits, 0); if (vk.khr_dedicated_allocation) { khr_mdai.pNext = memAllocInfo.pNext; khr_mdai.image = images[i]; memAllocInfo.pNext = &khr_mdai; } VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &memories[i])); VkAssert(vkBindImageMemory(vk.device, images[i], memories[i], 0)); } } else { //using vulkan's presentation engine. int BOOST_UNORM, BOOST_SNORM, BOOST_SRGB, BOOST_UFLOAT, BOOST_SFLOAT; if (vid_srgb.ival > 1) { //favour float formats, then srgb, then unorms BOOST_UNORM = 0; BOOST_SNORM = 0; BOOST_SRGB = 128; BOOST_UFLOAT = 256; BOOST_SFLOAT = 256; } else if (vid_srgb.ival) { BOOST_UNORM = 0; BOOST_SNORM = 0; BOOST_SRGB = 256; BOOST_UFLOAT = 128; BOOST_SFLOAT = 128; } else { BOOST_UNORM = 256; BOOST_SNORM = 256; BOOST_SRGB = 0; BOOST_UFLOAT = 128; BOOST_SFLOAT = 128; } VkAssert(vkGetPhysicalDeviceSurfaceFormatsKHR(vk.gpu, vk.surface, &fmtcount, NULL)); surffmts = malloc(sizeof(VkSurfaceFormatKHR)*fmtcount); VkAssert(vkGetPhysicalDeviceSurfaceFormatsKHR(vk.gpu, vk.surface, &fmtcount, surffmts)); VkAssert(vkGetPhysicalDeviceSurfacePresentModesKHR(vk.gpu, vk.surface, &presentmodes, NULL)); presentmode = malloc(sizeof(VkPresentModeKHR)*presentmodes); VkAssert(vkGetPhysicalDeviceSurfacePresentModesKHR(vk.gpu, vk.surface, &presentmodes, presentmode)); vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk.gpu, vk.surface, &surfcaps); swapinfo.surface = vk.surface; swapinfo.minImageCount = surfcaps.minImageCount+vk.triplebuffer; if (swapinfo.minImageCount > surfcaps.maxImageCount) swapinfo.minImageCount = surfcaps.maxImageCount; if (swapinfo.minImageCount < surfcaps.minImageCount) swapinfo.minImageCount = surfcaps.minImageCount; swapinfo.imageExtent.width = surfcaps.currentExtent.width; swapinfo.imageExtent.height = surfcaps.currentExtent.height; swapinfo.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT; swapinfo.preTransform = surfcaps.currentTransform;//VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; if (surfcaps.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR) swapinfo.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; else if (surfcaps.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR) { swapinfo.compositeAlpha = VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; Con_Printf(CON_WARNING"Vulkan swapchain using composite alpha premultiplied\n"); } else if (surfcaps.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR) { swapinfo.compositeAlpha = VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR; Con_Printf(CON_WARNING"Vulkan swapchain using composite alpha postmultiplied\n"); } else { swapinfo.compositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR; //erk? Con_Printf(CON_WARNING"composite alpha inherit\n"); } swapinfo.imageArrayLayers = /*(r_stereo_method.ival==1)?2:*/1; swapinfo.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; swapinfo.queueFamilyIndexCount = 0; swapinfo.pQueueFamilyIndices = NULL; swapinfo.oldSwapchain = vk.swapchain; swapinfo.clipped = vid_isfullscreen?VK_FALSE:VK_TRUE; //allow fragment shaders to be skipped on parts that are obscured by another window. screenshots might get weird, so use proper captures if required/automagic. swapinfo.presentMode = VK_PRESENT_MODE_FIFO_KHR; //support is guarenteed by spec, in theory. for (i = 0, curpri = 0; i < presentmodes; i++) { uint32_t priority = 0; switch(presentmode[i]) { default://ignore it if we don't know it. break; //this is awkward. normally we use vsync<0 to allow tearing-with-vsync, but that leaves us with a problem as far as what 0 should signify - tearing or not. //if we're using mailbox then we could instead discard the command buffers and skip rendering of the actual scenes. //we could have our submission thread wait some time period after the last vswap (ie: before the next) before submitting the command. //this could reduce gpu load at higher resolutions without lying too much about cpu usage... case VK_PRESENT_MODE_IMMEDIATE_KHR: priority = (vk.vsync?0:2) + 2; //for most quake players, latency trumps tearing. break; case VK_PRESENT_MODE_MAILBOX_KHR: priority = (vk.vsync?0:2) + 1; break; case VK_PRESENT_MODE_FIFO_KHR: priority = (vk.vsync?2:0) + 1; break; case VK_PRESENT_MODE_FIFO_RELAXED_KHR: priority = (vk.vsync?2:0) + 2; //strict vsync results in weird juddering if rtlights etc caues framerates to drop below the refreshrate. and nvidia just suck with vsync, so I'm not taking any chances. break; } if (priority > curpri) { curpri = priority; swapinfo.presentMode = presentmode[i]; } } if (!vk.vsync && swapinfo.presentMode != VK_PRESENT_MODE_IMMEDIATE_KHR) if (!vk.swapchain) //only warn on vid_restart, otherwise its annoying when resizing. Con_Printf("Warning: vulkan graphics driver does not support VK_PRESENT_MODE_IMMEDIATE_KHR.\n"); vk.srgbcapable = false; swapinfo.imageColorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; swapinfo.imageFormat = VK_FORMAT_UNDEFINED; for (i = 0, curpri = 0; i < fmtcount; i++) { uint32_t priority = 0; switch(surffmts[i].format) { case VK_FORMAT_B8G8R8A8_UNORM: case VK_FORMAT_R8G8B8A8_UNORM: case VK_FORMAT_A8B8G8R8_UNORM_PACK32: priority = ((vid_bpp.ival>=24)?24:11)+BOOST_UNORM; break; case VK_FORMAT_B8G8R8A8_SNORM: case VK_FORMAT_R8G8B8A8_SNORM: case VK_FORMAT_A8B8G8R8_SNORM_PACK32: priority = ((vid_bpp.ival>=21)?21:2)+BOOST_SNORM; break; case VK_FORMAT_B8G8R8A8_SRGB: case VK_FORMAT_R8G8B8A8_SRGB: case VK_FORMAT_A8B8G8R8_SRGB_PACK32: priority = ((vid_bpp.ival>=24)?24:11)+BOOST_SRGB; vk.srgbcapable = true; break; case VK_FORMAT_A2B10G10R10_UNORM_PACK32: case VK_FORMAT_A2R10G10B10_UNORM_PACK32: priority = ((vid_bpp.ival==30)?30:10)+BOOST_UNORM; break; case VK_FORMAT_B10G11R11_UFLOAT_PACK32: priority = ((vid_srgb.ival>=3||vid_bpp.ival==32)?32:11)+BOOST_UFLOAT; break; case VK_FORMAT_R16G16B16A16_SFLOAT: //16bit per-channel formats priority = ((vid_srgb.ival>=3||vid_bpp.ival>=48)?48:9)+BOOST_SFLOAT; break; case VK_FORMAT_R16G16B16A16_UNORM: priority = ((vid_srgb.ival>=3||vid_bpp.ival>=48)?48:9)+BOOST_UNORM; break; case VK_FORMAT_R16G16B16A16_SNORM: priority = ((vid_srgb.ival>=3||vid_bpp.ival>=48)?48:9)+BOOST_SFLOAT; break; case VK_FORMAT_R32G32B32A32_SFLOAT: //32bit per-channel formats priority = ((vid_bpp.ival>=47)?96:8)+BOOST_SFLOAT; break; case VK_FORMAT_B5G6R5_UNORM_PACK16: case VK_FORMAT_R5G6B5_UNORM_PACK16: priority = 16+BOOST_UNORM; break; case VK_FORMAT_R4G4B4A4_UNORM_PACK16: case VK_FORMAT_B4G4R4A4_UNORM_PACK16: priority = 12+BOOST_UNORM; break; case VK_FORMAT_A1R5G5B5_UNORM_PACK16: case VK_FORMAT_R5G5B5A1_UNORM_PACK16: case VK_FORMAT_B5G5R5A1_UNORM_PACK16: priority = 15+BOOST_UNORM; break; default: //no idea, use as lowest priority. priority = 1; break; } if (surffmts[i].colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR && //sRGB surffmts[i].colorSpace == VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT && //scRGB surffmts[i].colorSpace == VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT) //linear vaugely like sRGB priority += 512; //always favour supported colour spaces. if (priority > curpri) { curpri = priority; swapinfo.imageColorSpace = surffmts[i].colorSpace; swapinfo.imageFormat = surffmts[i].format; } } if (swapinfo.imageFormat == VK_FORMAT_UNDEFINED) { //if we found this format then it means the drivers don't really give a damn. pick a real format. if (vid_srgb.ival > 1 && swapinfo.imageColorSpace == VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT) swapinfo.imageFormat = VK_FORMAT_R16G16B16A16_SFLOAT; else if (vid_srgb.ival) swapinfo.imageFormat = VK_FORMAT_R8G8B8A8_SRGB; else swapinfo.imageFormat = VK_FORMAT_R8G8B8A8_UNORM; } if (vk.backbufformat != swapinfo.imageFormat) { VK_DestroyRenderPass(); reloadshaders = true; } vk.backbufformat = swapinfo.imageFormat; //VK_COLORSPACE_SRGB_NONLINEAR means the presentation engine will interpret the image as SRGB whether its a UNORM or SRGB format or not. //an SRGB format JUST means rendering converts linear->srgb and does not apply to the presentation engine. vid.flags &= ~VID_SRGB_FB; if (swapinfo.imageColorSpace == VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT) vid.flags |= VID_SRGB_FB_LINEAR; else { switch(vk.backbufformat) { case VK_FORMAT_R8G8B8_SRGB: case VK_FORMAT_B8G8R8_SRGB: case VK_FORMAT_B8G8R8A8_SRGB: case VK_FORMAT_R8G8B8A8_SRGB: case VK_FORMAT_A8B8G8R8_SRGB_PACK32: vid.flags |= VID_SRGB_FB_LINEAR; break; default: break; //non-srgb (or compressed) } } free(presentmode); free(surffmts); newvkswapchain = VK_NULL_HANDLE; VkAssert(vkCreateSwapchainKHR(vk.device, &swapinfo, vkallocationcb, &newvkswapchain)); if (!newvkswapchain) return false; if (vk.swapchain) { VK_DestroySwapChain(); } vk.swapchain = newvkswapchain; VkAssert(vkGetSwapchainImagesKHR(vk.device, vk.swapchain, &vk.backbuf_count, NULL)); images = malloc(sizeof(VkImage)*vk.backbuf_count); memories = NULL; VkAssert(vkGetSwapchainImagesKHR(vk.device, vk.swapchain, &vk.backbuf_count, images)); vk.aquirelast = vk.aquirenext = 0; for (i = 0; i < ACQUIRELIMIT; i++) { if (vk_waitfence.ival || !*vk_waitfence.string) { VkFenceCreateInfo fci = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO}; VkAssert(vkCreateFence(vk.device,&fci,vkallocationcb,&vk.acquirefences[i])); vk.acquiresemaphores[i] = VK_NULL_HANDLE; } else { VkSemaphoreCreateInfo sci = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO}; VkAssert(vkCreateSemaphore(vk.device, &sci, vkallocationcb, &vk.acquiresemaphores[i])); vk.acquirefences[i] = VK_NULL_HANDLE; } } if (!vk_submissionthread.value && *vk_submissionthread.string) preaquirecount = 1; else preaquirecount = vk.backbuf_count; /*-1 to hide any weird thread issues*/ while (vk.aquirelast < ACQUIRELIMIT-1 && vk.aquirelast < preaquirecount && vk.aquirelast <= vk.backbuf_count-surfcaps.minImageCount) { VkAssert(vkAcquireNextImageKHR(vk.device, vk.swapchain, UINT64_MAX, vk.acquiresemaphores[vk.aquirelast%ACQUIRELIMIT], vk.acquirefences[vk.aquirelast%ACQUIRELIMIT], &vk.acquirebufferidx[vk.aquirelast%ACQUIRELIMIT])); vk.aquirelast++; } } oldms = vk.multisamplebits; vk.multisamplebits = VK_SAMPLE_COUNT_1_BIT; #ifdef _DEBUG if (vid_multisample.ival>1) { VkSampleCountFlags fl = vk.limits.framebufferColorSampleCounts & vk.limits.framebufferDepthSampleCounts; Con_Printf("Warning: vulkan multisample does not work with rtlights or render targets etc etc\n"); for (i = 1; i < 30; i++) if ((fl & (1<sampler) vkDestroySampler(vk.device, img->sampler, vkallocationcb); if (flags & IF_LINEAR) { lmsampinfo.minFilter = lmsampinfo.magFilter = VK_FILTER_LINEAR; lmsampinfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; } else if (flags & IF_NEAREST) { lmsampinfo.minFilter = lmsampinfo.magFilter = VK_FILTER_NEAREST; lmsampinfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; } else { int *filter = (flags & IF_UIPIC)?vk.filterpic:vk.filtermip; if (filter[0]) lmsampinfo.minFilter = VK_FILTER_LINEAR; else lmsampinfo.minFilter = VK_FILTER_NEAREST; if (filter[1]) lmsampinfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; else lmsampinfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; if (filter[2]) lmsampinfo.magFilter = VK_FILTER_LINEAR; else lmsampinfo.magFilter = VK_FILTER_NEAREST; } lmsampinfo.addressModeU = clamptoedge?VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:VK_SAMPLER_ADDRESS_MODE_REPEAT; lmsampinfo.addressModeV = clamptoedge?VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:VK_SAMPLER_ADDRESS_MODE_REPEAT; lmsampinfo.addressModeW = clamptoedge?VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:VK_SAMPLER_ADDRESS_MODE_REPEAT; lmsampinfo.mipLodBias = 0.0; lmsampinfo.anisotropyEnable = (flags & IF_NEAREST)?false:(vk.max_anistophy > 1); lmsampinfo.maxAnisotropy = vk.max_anistophy; lmsampinfo.compareEnable = VK_FALSE; lmsampinfo.compareOp = VK_COMPARE_OP_NEVER; lmsampinfo.minLod = vk.mipcap[0]; //this isn't quite right lmsampinfo.maxLod = vk.mipcap[1]; lmsampinfo.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; lmsampinfo.unnormalizedCoordinates = VK_FALSE; VkAssert(vkCreateSampler(vk.device, &lmsampinfo, NULL, &img->sampler)); } static void VK_DestroySampler(void *w) { VkSampler s = *(VkSampler*)w; vkDestroySampler(vk.device, s, vkallocationcb); } void VK_UpdateFiltering(image_t *imagelist, int filtermip[3], int filterpic[3], int mipcap[2], float anis) { uint32_t i; for (i = 0; i < countof(vk.filtermip); i++) vk.filtermip[i] = filtermip[i]; for (i = 0; i < countof(vk.filterpic); i++) vk.filterpic[i] = filterpic[i]; for (i = 0; i < countof(vk.mipcap); i++) vk.mipcap[i] = mipcap[i]; vk.max_anistophy = bound(1.0, anis, vk.max_anistophy_limit); while(imagelist) { if (imagelist->vkimage) { if (imagelist->vkimage->sampler) { //the sampler might still be in use, so clean it up at the end of the frame. //all this to avoid syncing all the queues... VK_AtFrameEnd(VK_DestroySampler, &imagelist->vkimage->sampler, sizeof(imagelist->vkimage->sampler)); imagelist->vkimage->sampler = VK_NULL_HANDLE; } VK_CreateSampler(imagelist->flags, imagelist->vkimage); } imagelist = imagelist->next; } } qboolean VK_AllocatePoolMemory(uint32_t pooltype, VkDeviceSize memsize, VkDeviceSize poolalignment, vk_poolmem_t *mem) { struct vk_mempool_s *p; VkDeviceSize pad; if (!vk_usememorypools.ival) return false; // if (memsize > 1024*1024*4) // return false; for (p = vk.mempools; p; p = p->next) { if (p->memtype == pooltype) { if (p->memoryoffset + poolalignment + memsize < p->memorysize) break; } } if (!p) { VkMemoryAllocateInfo poolai = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; p = Z_Malloc(sizeof(*p)); p->memorysize = poolai.allocationSize = 512*1024*1024; //lets just allocate big... p->memtype = poolai.memoryTypeIndex = pooltype; if (VK_SUCCESS != vkAllocateMemory(vk.device, &poolai, vkallocationcb, &p->memory)) { //out of memory? oh well, a smaller dedicated allocation might still work. Z_Free(p); return false; } p->next = vk.mempools; vk.mempools = p; } pad = ((p->memoryoffset+poolalignment-1)&~(poolalignment-1)) - p->memoryoffset; p->memoryoffset = (p->memoryoffset+poolalignment-1)&~(poolalignment-1); p->gaps += pad; mem->offset = p->memoryoffset; mem->size = memsize; //FIXME: we have no way to deal with gaps due to alignment mem->memory = p->memory; mem->pool = p; p->memoryoffset += memsize; return true; } void VK_ReleasePoolMemory(vk_poolmem_t *mem) { if (mem->pool) { //FIXME: track power-of-two holes? mem->pool->gaps += mem->size; mem->pool = NULL; mem->memory = VK_NULL_HANDLE; } else if (mem->memory) { vkFreeMemory(vk.device, mem->memory, vkallocationcb); mem->memory = VK_NULL_HANDLE; } } //does NOT bind. //image memory is NOT expected to be host-visible. you'll get what vulkan gives you. qboolean VK_AllocateImageMemory(VkImage image, qboolean dedicated, vk_poolmem_t *mem) { uint32_t pooltype; VkMemoryRequirements2KHR mem_reqs2 = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR}; if (!dedicated && vk.khr_get_memory_requirements2) { VkImageMemoryRequirementsInfo2KHR imri = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR}; VkMemoryDedicatedRequirementsKHR mdr = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR}; imri.image = image; if (vk.khr_dedicated_allocation) mem_reqs2.pNext = &mdr; //chain the result struct vkGetImageMemoryRequirements2KHR(vk.device, &imri, &mem_reqs2); //and now we know if it should be dedicated or not. dedicated |= mdr.prefersDedicatedAllocation || mdr.requiresDedicatedAllocation; } else vkGetImageMemoryRequirements(vk.device, image, &mem_reqs2.memoryRequirements); pooltype = vk_find_memory_try(mem_reqs2.memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); if (pooltype == ~0) pooltype = vk_find_memory_require(mem_reqs2.memoryRequirements.memoryTypeBits, 0); if (!dedicated && VK_AllocatePoolMemory(pooltype, mem_reqs2.memoryRequirements.size, mem_reqs2.memoryRequirements.alignment, mem)) return true; //got a shared allocation. else { //make it dedicated one way or another. VkMemoryAllocateInfo memAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; VkMemoryDedicatedAllocateInfoKHR khr_mdai = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR}; VkResult err; //shouldn't really happen, but just in case... mem_reqs2.memoryRequirements.size = max(1,mem_reqs2.memoryRequirements.size); memAllocInfo.allocationSize = mem_reqs2.memoryRequirements.size; memAllocInfo.memoryTypeIndex = pooltype; if (vk.khr_dedicated_allocation) { khr_mdai.image = image; khr_mdai.pNext = memAllocInfo.pNext; memAllocInfo.pNext = &khr_mdai; } mem->pool = NULL; mem->offset = 0; mem->size = mem_reqs2.memoryRequirements.size; mem->memory = VK_NULL_HANDLE; err = vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &mem->memory); if (err != VK_SUCCESS) return false; return true; } } qboolean VK_AllocateBindImageMemory(vk_image_t *image, qboolean dedicated) { if (VK_AllocateImageMemory(image->image, dedicated, &image->mem)) { VkAssert(vkBindImageMemory(vk.device, image->image, image->mem.memory, image->mem.offset)); return true; } return false; //out of memory? } vk_image_t VK_CreateTexture2DArray(uint32_t width, uint32_t height, uint32_t layers, uint32_t mips, uploadfmt_t encoding, unsigned int type, qboolean rendertarget, const char *debugname) { vk_image_t ret; VkImageViewCreateInfo viewInfo = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; VkImageCreateInfo ici = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; VkFormat format = VK_FORMAT_UNDEFINED;; ret.width = width; ret.height = height; ret.layers = layers; ret.mipcount = mips; ret.encoding = encoding; ret.type = type; ret.layout = VK_IMAGE_LAYOUT_UNDEFINED; //vulkan expresses packed formats in terms of native endian (if big-endian, then everything makes sense), non-packed formats are expressed in byte order (consistent with big-endian). //PTI formats are less well-defined... if ((int)encoding < 0) format = -(int)encoding; else switch(encoding) { //16bit formats. case PTI_RGB565: format = VK_FORMAT_R5G6B5_UNORM_PACK16; break; case PTI_RGBA4444: format = VK_FORMAT_R4G4B4A4_UNORM_PACK16; break; case PTI_ARGB4444: /*format = VK_FORMAT_A4R4G4B4_UNORM_PACK16;*/ break; case PTI_RGBA5551: format = VK_FORMAT_R5G5B5A1_UNORM_PACK16; break; case PTI_ARGB1555: format = VK_FORMAT_A1R5G5B5_UNORM_PACK16; break; //float formats case PTI_RGBA16F: format = VK_FORMAT_R16G16B16A16_SFLOAT; break; case PTI_RGBA32F: format = VK_FORMAT_R32G32B32A32_SFLOAT; break; //weird formats case PTI_R8: format = VK_FORMAT_R8_UNORM; break; case PTI_RG8: format = VK_FORMAT_R8G8_UNORM; break; case PTI_R8_SNORM: format = VK_FORMAT_R8_SNORM; break; case PTI_RG8_SNORM: format = VK_FORMAT_R8G8_SNORM; break; case PTI_A2BGR10: format = VK_FORMAT_A2B10G10R10_UNORM_PACK32; break; case PTI_E5BGR9: format = VK_FORMAT_E5B9G9R9_UFLOAT_PACK32; break; //swizzled/legacy formats case PTI_L8: format = VK_FORMAT_R8_UNORM; break; case PTI_L8A8: format = VK_FORMAT_R8G8_UNORM; break; case PTI_L8_SRGB: format = VK_FORMAT_R8_SRGB; break; case PTI_L8A8_SRGB: /*unsupportable*/ break; //compressed formats case PTI_BC1_RGB: format = VK_FORMAT_BC1_RGB_UNORM_BLOCK; break; case PTI_BC1_RGB_SRGB: format = VK_FORMAT_BC1_RGB_SRGB_BLOCK; break; case PTI_BC1_RGBA: format = VK_FORMAT_BC1_RGBA_UNORM_BLOCK; break; case PTI_BC1_RGBA_SRGB: format = VK_FORMAT_BC1_RGBA_SRGB_BLOCK; break; case PTI_BC2_RGBA: format = VK_FORMAT_BC2_UNORM_BLOCK; break; case PTI_BC2_RGBA_SRGB: format = VK_FORMAT_BC2_SRGB_BLOCK; break; case PTI_BC3_RGBA: format = VK_FORMAT_BC3_UNORM_BLOCK; break; case PTI_BC3_RGBA_SRGB: format = VK_FORMAT_BC3_SRGB_BLOCK; break; case PTI_BC4_R8: format = VK_FORMAT_BC4_UNORM_BLOCK; break; case PTI_BC4_R8_SNORM: format = VK_FORMAT_BC4_SNORM_BLOCK; break; case PTI_BC5_RG8: format = VK_FORMAT_BC5_UNORM_BLOCK; break; case PTI_BC5_RG8_SNORM: format = VK_FORMAT_BC5_SNORM_BLOCK; break; case PTI_BC6_RGB_UFLOAT: format = VK_FORMAT_BC6H_UFLOAT_BLOCK; break; case PTI_BC6_RGB_SFLOAT: format = VK_FORMAT_BC6H_SFLOAT_BLOCK; break; case PTI_BC7_RGBA: format = VK_FORMAT_BC7_UNORM_BLOCK; break; case PTI_BC7_RGBA_SRGB: format = VK_FORMAT_BC7_SRGB_BLOCK; break; case PTI_ETC1_RGB8: format = VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK; break; //vulkan doesn't support etc1, but etc2 is a superset so its all okay. case PTI_ETC2_RGB8: format = VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK; break; case PTI_ETC2_RGB8_SRGB: format = VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK; break; case PTI_ETC2_RGB8A1: format = VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK; break; case PTI_ETC2_RGB8A1_SRGB: format = VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK; break; case PTI_ETC2_RGB8A8: format = VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK; break; case PTI_ETC2_RGB8A8_SRGB: format = VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK; break; case PTI_EAC_R11: format = VK_FORMAT_EAC_R11_UNORM_BLOCK; break; case PTI_EAC_R11_SNORM: format = VK_FORMAT_EAC_R11_SNORM_BLOCK; break; case PTI_EAC_RG11: format = VK_FORMAT_EAC_R11G11_UNORM_BLOCK; break; case PTI_EAC_RG11_SNORM: format = VK_FORMAT_EAC_R11G11_SNORM_BLOCK; break; case PTI_ASTC_4X4: format = VK_FORMAT_ASTC_4x4_UNORM_BLOCK; break; case PTI_ASTC_4X4_SRGB: format = VK_FORMAT_ASTC_4x4_SRGB_BLOCK; break; case PTI_ASTC_5X4: format = VK_FORMAT_ASTC_5x4_UNORM_BLOCK; break; case PTI_ASTC_5X4_SRGB: format = VK_FORMAT_ASTC_5x4_SRGB_BLOCK; break; case PTI_ASTC_5X5: format = VK_FORMAT_ASTC_5x5_UNORM_BLOCK; break; case PTI_ASTC_5X5_SRGB: format = VK_FORMAT_ASTC_5x5_SRGB_BLOCK; break; case PTI_ASTC_6X5: format = VK_FORMAT_ASTC_6x5_UNORM_BLOCK; break; case PTI_ASTC_6X5_SRGB: format = VK_FORMAT_ASTC_6x5_SRGB_BLOCK; break; case PTI_ASTC_6X6: format = VK_FORMAT_ASTC_6x6_UNORM_BLOCK; break; case PTI_ASTC_6X6_SRGB: format = VK_FORMAT_ASTC_6x6_SRGB_BLOCK; break; case PTI_ASTC_8X5: format = VK_FORMAT_ASTC_8x5_UNORM_BLOCK; break; case PTI_ASTC_8X5_SRGB: format = VK_FORMAT_ASTC_8x5_SRGB_BLOCK; break; case PTI_ASTC_8X6: format = VK_FORMAT_ASTC_8x6_UNORM_BLOCK; break; case PTI_ASTC_8X6_SRGB: format = VK_FORMAT_ASTC_8x6_SRGB_BLOCK; break; case PTI_ASTC_8X8: format = VK_FORMAT_ASTC_8x8_UNORM_BLOCK; break; case PTI_ASTC_8X8_SRGB: format = VK_FORMAT_ASTC_8x8_SRGB_BLOCK; break; case PTI_ASTC_10X5: format = VK_FORMAT_ASTC_10x5_UNORM_BLOCK; break; case PTI_ASTC_10X5_SRGB: format = VK_FORMAT_ASTC_10x5_SRGB_BLOCK; break; case PTI_ASTC_10X6: format = VK_FORMAT_ASTC_10x6_UNORM_BLOCK; break; case PTI_ASTC_10X6_SRGB: format = VK_FORMAT_ASTC_10x6_SRGB_BLOCK; break; case PTI_ASTC_10X8: format = VK_FORMAT_ASTC_10x8_UNORM_BLOCK; break; case PTI_ASTC_10X8_SRGB: format = VK_FORMAT_ASTC_10x8_SRGB_BLOCK; break; case PTI_ASTC_10X10: format = VK_FORMAT_ASTC_10x10_UNORM_BLOCK; break; case PTI_ASTC_10X10_SRGB: format = VK_FORMAT_ASTC_10x10_SRGB_BLOCK; break; case PTI_ASTC_12X10: format = VK_FORMAT_ASTC_12x10_UNORM_BLOCK; break; case PTI_ASTC_12X10_SRGB: format = VK_FORMAT_ASTC_12x10_SRGB_BLOCK; break; case PTI_ASTC_12X12: format = VK_FORMAT_ASTC_12x12_UNORM_BLOCK; break; case PTI_ASTC_12X12_SRGB: format = VK_FORMAT_ASTC_12x12_SRGB_BLOCK; break; //depth formats case PTI_DEPTH16: format = VK_FORMAT_D16_UNORM; break; case PTI_DEPTH24: format = VK_FORMAT_X8_D24_UNORM_PACK32; break; case PTI_DEPTH32: format = VK_FORMAT_D32_SFLOAT; break; case PTI_DEPTH24_8: format = VK_FORMAT_D24_UNORM_S8_UINT; break; //srgb formats case PTI_BGRA8_SRGB: case PTI_BGRX8_SRGB: format = VK_FORMAT_B8G8R8A8_SRGB; break; case PTI_RGBA8_SRGB: case PTI_RGBX8_SRGB: format = VK_FORMAT_R8G8B8A8_SRGB; break; //standard formats case PTI_BGRA8: case PTI_BGRX8: format = VK_FORMAT_B8G8R8A8_UNORM; break; case PTI_RGBA8: case PTI_RGBX8: format = VK_FORMAT_R8G8B8A8_UNORM; break; //misaligned formats case PTI_RGB8: format = VK_FORMAT_R8G8B8_UNORM; break; case PTI_BGR8: format = VK_FORMAT_B8G8R8_UNORM; break; //unsupported 'formats' case PTI_MAX: #ifdef FTE_TARGET_WEB case PTI_WHOLEFILE: #endif case PTI_EMULATED: break; } if (format == VK_FORMAT_UNDEFINED) //no default case means warnings for unsupported formats above. Sys_Error("VK_CreateTexture2DArray: Unrecognised image encoding: %u\n", encoding); ici.flags = (ret.type==PTI_CUBEMAP)?VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT:0; ici.imageType = VK_IMAGE_TYPE_2D; ici.format = format; ici.extent.width = width; ici.extent.height = height; ici.extent.depth = 1; ici.mipLevels = mips; ici.arrayLayers = layers; ici.samples = VK_SAMPLE_COUNT_1_BIT; ici.tiling = VK_IMAGE_TILING_OPTIMAL; ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT|(rendertarget?0:VK_IMAGE_USAGE_TRANSFER_DST_BIT); ici.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ici.queueFamilyIndexCount = 0; ici.pQueueFamilyIndices = NULL; ici.initialLayout = ret.layout; VkAssert(vkCreateImage(vk.device, &ici, vkallocationcb, &ret.image)); DebugSetName(VK_OBJECT_TYPE_IMAGE, (uint64_t)ret.image, debugname); ret.view = VK_NULL_HANDLE; ret.sampler = VK_NULL_HANDLE; if (!VK_AllocateBindImageMemory(&ret, false)) return ret; //oom? viewInfo.flags = 0; viewInfo.image = ret.image; viewInfo.viewType = (ret.type==PTI_CUBEMAP)?VK_IMAGE_VIEW_TYPE_CUBE:VK_IMAGE_VIEW_TYPE_2D; viewInfo.format = format; switch(encoding) { //formats that explicitly drop the alpha case PTI_BC1_RGB: case PTI_BC1_RGB_SRGB: case PTI_RGBX8: case PTI_RGBX8_SRGB: case PTI_BGRX8: case PTI_BGRX8_SRGB: viewInfo.components.r = VK_COMPONENT_SWIZZLE_R; viewInfo.components.g = VK_COMPONENT_SWIZZLE_G; viewInfo.components.b = VK_COMPONENT_SWIZZLE_B; viewInfo.components.a = VK_COMPONENT_SWIZZLE_ONE; break; case PTI_L8: //must be an R8 texture viewInfo.components.r = VK_COMPONENT_SWIZZLE_R; viewInfo.components.g = VK_COMPONENT_SWIZZLE_R; viewInfo.components.b = VK_COMPONENT_SWIZZLE_R; viewInfo.components.a = VK_COMPONENT_SWIZZLE_ONE; break; case PTI_L8A8: //must be an RG8 texture viewInfo.components.r = VK_COMPONENT_SWIZZLE_R; viewInfo.components.g = VK_COMPONENT_SWIZZLE_R; viewInfo.components.b = VK_COMPONENT_SWIZZLE_R; viewInfo.components.a = VK_COMPONENT_SWIZZLE_G; break; default: viewInfo.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; viewInfo.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; viewInfo.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; viewInfo.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; break; } viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; viewInfo.subresourceRange.baseMipLevel = 0; viewInfo.subresourceRange.levelCount = mips; viewInfo.subresourceRange.baseArrayLayer = 0; viewInfo.subresourceRange.layerCount = layers; VkAssert(vkCreateImageView(vk.device, &viewInfo, NULL, &ret.view)); return ret; } void set_image_layout(VkCommandBuffer cmd, VkImage image, VkImageAspectFlags aspectMask, VkImageLayout old_image_layout, VkAccessFlags srcaccess, VkPipelineStageFlagBits srcstagemask, VkImageLayout new_image_layout, VkAccessFlags dstaccess, VkPipelineStageFlagBits dststagemask) { //images have weird layout representations. //we need to use a side-effect of memory barriers in order to convert from one layout to another, so that we can actually use the image. VkImageMemoryBarrier imgbarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; imgbarrier.pNext = NULL; imgbarrier.srcAccessMask = srcaccess; imgbarrier.dstAccessMask = dstaccess; imgbarrier.oldLayout = old_image_layout; imgbarrier.newLayout = new_image_layout; imgbarrier.image = image; imgbarrier.subresourceRange.aspectMask = aspectMask; imgbarrier.subresourceRange.baseMipLevel = 0; imgbarrier.subresourceRange.levelCount = 1; imgbarrier.subresourceRange.baseArrayLayer = 0; imgbarrier.subresourceRange.layerCount = 1; imgbarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imgbarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; /* if (new_image_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) // Make sure anything that was copying from this image has completed imgbarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; else if (new_image_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) // Make sure anything that was copying from this image has completed imgbarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; else if (new_image_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) imgbarrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; else if (new_image_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) imgbarrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; else if (new_image_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) // Make sure any Copy or CPU writes to image are flushed imgbarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; if (old_image_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) imgbarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; else if (old_image_layout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) imgbarrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; else if (old_image_layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) imgbarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; */ vkCmdPipelineBarrier(cmd, srcstagemask, dststagemask, 0, 0, NULL, 0, NULL, 1, &imgbarrier); } void VK_FencedCheck(void) { while(vk.fencework) { Sys_LockConditional(vk.submitcondition); if (VK_SUCCESS == vkGetFenceStatus(vk.device, vk.fencework->fence)) { struct vk_fencework *w; w = vk.fencework; vk.fencework = w->next; if (!vk.fencework) vk.fencework_last = NULL; Sys_UnlockConditional(vk.submitcondition); if (w->Passed) w->Passed(w); if (w->cbuf) vkFreeCommandBuffers(vk.device, vk.cmdpool, 1, &w->cbuf); if (w->fence) vkDestroyFence(vk.device, w->fence, vkallocationcb); Z_Free(w); continue; } Sys_UnlockConditional(vk.submitcondition); break; } } //allocate and begin a commandbuffer so we can do the copies void *VK_FencedBegin(void (*passed)(void *work), size_t worksize) { struct vk_fencework *w = BZ_Malloc(worksize?worksize:sizeof(*w)); VkCommandBufferAllocateInfo cbai = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO}; VkCommandBufferInheritanceInfo cmdinh = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO}; VkCommandBufferBeginInfo cmdinf = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; cbai.commandPool = vk.cmdpool; cbai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; cbai.commandBufferCount = 1; VkAssert(vkAllocateCommandBuffers(vk.device, &cbai, &w->cbuf)); cmdinf.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; cmdinf.pInheritanceInfo = &cmdinh; vkBeginCommandBuffer(w->cbuf, &cmdinf); w->Passed = passed; w->next = NULL; return w; } //end+submit a commandbuffer, and set up a fence so we know when its complete. this is not within the context of any frame, so make sure any textures are safe to rewrite early... //completion can be signalled before the current frame finishes, so watch out for that too. void VK_FencedSubmit(void *work) { struct vk_fencework *w = work; VkFenceCreateInfo fenceinfo = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO}; if (w->cbuf) vkEndCommandBuffer(w->cbuf); //check if we can release anything yet. VK_FencedCheck(); //FIXME: this seems to be an excessively expensive function. vkCreateFence(vk.device, &fenceinfo, vkallocationcb, &w->fence); VK_Submit_Work(w->cbuf, VK_NULL_HANDLE, 0, VK_NULL_HANDLE, w->fence, NULL, w); } void VK_FencedSync(void *work) { struct vk_fencework *w = work; VK_FencedSubmit(w); #ifdef MULTITHREAD //okay, this is crazy, but it ensures that the work was submitted BEFORE the WaitForFence call. //we should probably come up with a better sync method. if (vk.submitthread) { qboolean nnsc = vk.neednewswapchain; vk.neednewswapchain = true; Sys_LockConditional(vk.submitcondition); //annoying, but required for it to be reliable with respect to other things. Sys_ConditionSignal(vk.submitcondition); Sys_UnlockConditional(vk.submitcondition); Sys_WaitOnThread(vk.submitthread); vk.submitthread = NULL; while (vk.work) { Sys_LockConditional(vk.submitcondition); VK_Submit_DoWork(); Sys_UnlockConditional(vk.submitcondition); } //we know all work is synced now... vk.neednewswapchain = nnsc; vk.submitthread = Sys_CreateThread("vksubmission", VK_Submit_Thread, NULL, THREADP_HIGHEST, 0); } #endif //fixme: waiting for the fence while it may still be getting created by the worker is unsafe. vkWaitForFences(vk.device, 1, &w->fence, VK_FALSE, UINT64_MAX); } //called to schedule the release of a resource that may be referenced by an active command buffer. //the command buffer in question may even have not yet been submitted yet. void *VK_AtFrameEnd(void (*frameended)(void *work), void *workdata, size_t worksize) { struct vk_frameend *w = Z_Malloc(sizeof(*w) + worksize); w->FrameEnded = frameended; w->next = vk.frameendjobs; vk.frameendjobs = w; if (workdata) memcpy(w+1, workdata, worksize); return w+1; } struct texturefence { struct vk_fencework w; int mips; VkBuffer stagingbuffer; VkDeviceMemory stagingmemory; }; static void VK_TextureLoaded(void *ctx) { struct texturefence *w = ctx; vkDestroyBuffer(vk.device, w->stagingbuffer, vkallocationcb); vkFreeMemory(vk.device, w->stagingmemory, vkallocationcb); } qboolean VK_LoadTextureMips (texid_t tex, const struct pendingtextureinfo *mips) { VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; VkMemoryRequirements mem_reqs; VkMemoryAllocateInfo memAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; void *mapdata; struct texturefence *fence; VkCommandBuffer vkloadcmd; vk_image_t target; uint32_t i; uint32_t blockwidth, blockheight; uint32_t blockbytes; uint32_t layers; uint32_t mipcount = mips->mipcount; if (mips->type != PTI_2D && mips->type != PTI_CUBEMAP)// && mips->type != PTI_2D_ARRAY) return false; if (!mipcount || mips->mip[0].width == 0 || mips->mip[0].height == 0) return false; layers = (mips->type == PTI_CUBEMAP)?6:1; layers *= mips->mip[0].depth; if (layers == 1 && mipcount > 1) { //npot mipmapped textures are awkward. //vulkan floors. for (i = 1; i < mipcount; i++) { if (mips->mip[i].width != max(1,(mips->mip[i-1].width>>1)) || mips->mip[i].height != max(1,(mips->mip[i-1].height>>1))) { //okay, this mip looks like it was sized wrongly. this can easily happen with dds files. mipcount = i; break; } } } Image_BlockSizeForEncoding(mips->encoding, &blockbytes, &blockwidth, &blockheight); fence = VK_FencedBegin(VK_TextureLoaded, sizeof(*fence)); fence->mips = mipcount; vkloadcmd = fence->w.cbuf; //create our target image if (tex->vkimage) { if (tex->vkimage->width != mips->mip[0].width || tex->vkimage->height != mips->mip[0].height || tex->vkimage->layers != layers || tex->vkimage->mipcount != mipcount || tex->vkimage->encoding != mips->encoding || tex->vkimage->type != mips->type) { VK_AtFrameEnd(VK_DestroyVkTexture_Delayed, tex->vkimage, sizeof(*tex->vkimage)); // vkDeviceWaitIdle(vk.device); //erk, we can't cope with a commandbuffer poking the texture while things happen // VK_FencedCheck(); // VK_DestroyVkTexture(tex->vkimage); Z_Free(tex->vkimage); tex->vkimage = NULL; } } if (tex->vkimage) { target = *tex->vkimage; //can reuse it Z_Free(tex->vkimage); //we're meant to be replacing the entire thing, so we can just transition from undefined here // set_image_layout(vkloadcmd, target.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT); { //images have weird layout representations. //we need to use a side-effect of memory barriers in order to convert from one layout to another, so that we can actually use the image. VkImageMemoryBarrier imgbarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; imgbarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; imgbarrier.newLayout = target.layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; imgbarrier.image = target.image; imgbarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; imgbarrier.subresourceRange.baseMipLevel = 0; imgbarrier.subresourceRange.levelCount = mipcount/layers; imgbarrier.subresourceRange.baseArrayLayer = 0; imgbarrier.subresourceRange.layerCount = layers; imgbarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imgbarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imgbarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; imgbarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; vkCmdPipelineBarrier(vkloadcmd, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 0, NULL, 1, &imgbarrier); } } else { target = VK_CreateTexture2DArray(mips->mip[0].width, mips->mip[0].height, layers, mipcount/layers, mips->encoding, mips->type, !!(tex->flags&IF_RENDERTARGET), tex->ident); if (target.mem.memory == VK_NULL_HANDLE) { VK_DestroyVkTexture(&target); return false; //the alloc failed? can't copy to that which does not exist. } { //images have weird layout representations. //we need to use a side-effect of memory barriers in order to convert from one layout to another, so that we can actually use the image. VkImageMemoryBarrier imgbarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; imgbarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; imgbarrier.newLayout = target.layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; imgbarrier.image = target.image; imgbarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; imgbarrier.subresourceRange.baseMipLevel = 0; imgbarrier.subresourceRange.levelCount = mipcount/layers; imgbarrier.subresourceRange.baseArrayLayer = 0; imgbarrier.subresourceRange.layerCount = layers; imgbarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imgbarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imgbarrier.srcAccessMask = 0; imgbarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; vkCmdPipelineBarrier(vkloadcmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 0, NULL, 1, &imgbarrier); } } //figure out how big our staging buffer needs to be bci.size = 0; for (i = 0; i < mipcount; i++) { uint32_t blockswidth = (mips->mip[i].width+blockwidth-1) / blockwidth; uint32_t blocksheight = (mips->mip[i].height+blockheight-1) / blockheight; bci.size += blockswidth*blocksheight*blockbytes; } bci.flags = 0; bci.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT; bci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; bci.queueFamilyIndexCount = 0; bci.pQueueFamilyIndices = NULL; //FIXME: nvidia's vkCreateBuffer ends up calling NtYieldExecution. //which is basically a waste of time, and its hurting framerates. //create+map the staging buffer VkAssert(vkCreateBuffer(vk.device, &bci, vkallocationcb, &fence->stagingbuffer)); vkGetBufferMemoryRequirements(vk.device, fence->stagingbuffer, &mem_reqs); memAllocInfo.allocationSize = mem_reqs.size; memAllocInfo.memoryTypeIndex = vk_find_memory_require(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); if (VK_SUCCESS != vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &fence->stagingmemory)) { VK_FencedSubmit(fence); return false; //some sort of oom error? } VkAssert(vkBindBufferMemory(vk.device, fence->stagingbuffer, fence->stagingmemory, 0)); VkAssert(vkMapMemory(vk.device, fence->stagingmemory, 0, bci.size, 0, &mapdata)); if (!mapdata) Sys_Error("Unable to map staging image\n"); bci.size = 0; for (i = 0; i < mipcount; i++) { VkBufferImageCopy region; //figure out the number of 'blocks' in the image. //for non-compressed formats this is just the width directly. //for compressed formats (ie: s3tc/dxt) we need to round up to deal with npot. uint32_t blockswidth = (mips->mip[i].width+blockwidth-1) / blockwidth; uint32_t blocksheight = (mips->mip[i].height+blockheight-1) / blockheight; if (mips->mip[i].data) memcpy((char*)mapdata + bci.size, (char*)mips->mip[i].data, blockswidth*blockbytes*blocksheight); else memset((char*)mapdata + bci.size, 0, blockswidth*blockbytes*blocksheight); //queue up a buffer->image copy for this mip region.bufferOffset = bci.size; region.bufferRowLength = blockswidth*blockwidth; region.bufferImageHeight = blocksheight*blockheight; region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; region.imageSubresource.mipLevel = i%(mipcount/layers); region.imageSubresource.baseArrayLayer = i/(mipcount/layers); region.imageSubresource.layerCount = 1; region.imageOffset.x = 0; region.imageOffset.y = 0; region.imageOffset.z = 0; region.imageExtent.width = mips->mip[i].width;//blockswidth*blockwidth; region.imageExtent.height = mips->mip[i].height;//blocksheight*blockheight; region.imageExtent.depth = 1; vkCmdCopyBufferToImage(vkloadcmd, fence->stagingbuffer, target.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); bci.size += blockswidth*blocksheight*blockbytes; } vkUnmapMemory(vk.device, fence->stagingmemory); //layouts are annoying. and weird. { //images have weird layout representations. //we need to use a side-effect of memory barriers in order to convert from one layout to another, so that we can actually use the image. VkImageMemoryBarrier imgbarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; imgbarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; imgbarrier.newLayout = target.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; imgbarrier.image = target.image; imgbarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; imgbarrier.subresourceRange.baseMipLevel = 0; imgbarrier.subresourceRange.levelCount = mipcount/layers; imgbarrier.subresourceRange.baseArrayLayer = 0; imgbarrier.subresourceRange.layerCount = layers; imgbarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imgbarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imgbarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; imgbarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; vkCmdPipelineBarrier(vkloadcmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &imgbarrier); } VK_FencedSubmit(fence); //FIXME: should probably reuse these samplers. if (!target.sampler) VK_CreateSampler(tex->flags, &target); tex->vkdescriptor = VK_NULL_HANDLE; tex->vkimage = Z_Malloc(sizeof(*tex->vkimage)); *tex->vkimage = target; return true; } void VK_DestroyTexture (texid_t tex) { if (tex->vkimage) { VK_DestroyVkTexture(tex->vkimage); Z_Free(tex->vkimage); tex->vkimage = NULL; } tex->vkdescriptor = VK_NULL_HANDLE; } void VK_R_Init (void) { } void VK_R_DeInit (void) { R_GAliasFlushSkinCache(true); Surf_DeInit(); VK_Shutdown_PostProc(); VK_DestroySwapChain(); VKBE_Shutdown(); Shader_Shutdown(); Image_Shutdown(); } void VK_SetupViewPortProjection(qboolean flipy) { float fov_x, fov_y; float fovv_x, fovv_y; AngleVectors (r_refdef.viewangles, vpn, vright, vup); VectorCopy (r_refdef.vieworg, r_origin); fov_x = r_refdef.fov_x;//+sin(cl.time)*5; fov_y = r_refdef.fov_y;//-sin(cl.time+1)*5; fovv_x = r_refdef.fovv_x; fovv_y = r_refdef.fovv_y; if ((r_refdef.flags & RDF_UNDERWATER) && !(r_refdef.flags & RDF_WATERWARP)) { fov_x *= 1 + (((sin(cl.time * 4.7) + 1) * 0.015) * r_waterwarp.value); fov_y *= 1 + (((sin(cl.time * 3.0) + 1) * 0.015) * r_waterwarp.value); fovv_x *= 1 + (((sin(cl.time * 4.7) + 1) * 0.015) * r_waterwarp.value); fovv_y *= 1 + (((sin(cl.time * 3.0) + 1) * 0.015) * r_waterwarp.value); } // screenaspect = (float)r_refdef.vrect.width/r_refdef.vrect.height; /*view matrix*/ if (flipy) //mimic gl and give bottom-up { vec3_t down; VectorNegate(vup, down); VectorCopy(down, vup); Matrix4x4_CM_ModelViewMatrixFromAxis(r_refdef.m_view, vpn, vright, down, r_refdef.vieworg); r_refdef.flipcull = SHADER_CULL_FRONT | SHADER_CULL_BACK; } else { Matrix4x4_CM_ModelViewMatrixFromAxis(r_refdef.m_view, vpn, vright, vup, r_refdef.vieworg); r_refdef.flipcull = 0; } if (r_refdef.maxdist) { Matrix4x4_CM_Projection_Far(r_refdef.m_projection_std, fov_x, fov_y, r_refdef.mindist, r_refdef.maxdist, false); Matrix4x4_CM_Projection_Far(r_refdef.m_projection_view, fovv_x, fovv_y, r_refdef.mindist, r_refdef.maxdist, false); } else { Matrix4x4_CM_Projection_Inf(r_refdef.m_projection_std, fov_x, fov_y, r_refdef.mindist, false); Matrix4x4_CM_Projection_Inf(r_refdef.m_projection_view, fovv_x, fovv_y, r_refdef.mindist, false); } r_refdef.m_projection_view[2+4*0] *= 0.333; r_refdef.m_projection_view[2+4*1] *= 0.333; r_refdef.m_projection_view[2+4*2] *= 0.333; r_refdef.m_projection_view[2+4*3] *= 0.333; } void VK_Set2D(void) { vid.fbvwidth = vid.width; vid.fbvheight = vid.height; vid.fbpwidth = vid.pixelwidth; vid.fbpheight = vid.pixelheight; r_refdef.pxrect.x = 0; r_refdef.pxrect.y = 0; r_refdef.pxrect.width = vid.fbpwidth; r_refdef.pxrect.height = vid.fbpheight; r_refdef.pxrect.maxheight = vid.pixelheight; /* { VkClearDepthStencilValue val; VkImageSubresourceRange range; val.depth = 1; val.stencil = 0; range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; range.baseArrayLayer = 0; range.baseMipLevel = 0; range.layerCount = 1; range.levelCount = 1; vkCmdClearDepthStencilImage(vk.frame->cbuf, vk.depthbuf.image, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, &val, 1, &range); } */ /* vkCmdEndRenderPass(vk.frame->cbuf); { VkRenderPassBeginInfo rpiinfo = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO}; VkClearValue clearvalues[1]; clearvalues[0].depthStencil.depth = 1.0; clearvalues[0].depthStencil.stencil = 0; rpiinfo.renderPass = vk.renderpass[1]; rpiinfo.renderArea.offset.x = r_refdef.pxrect.x; rpiinfo.renderArea.offset.y = r_refdef.pxrect.y; rpiinfo.renderArea.extent.width = r_refdef.pxrect.width; rpiinfo.renderArea.extent.height = r_refdef.pxrect.height; rpiinfo.framebuffer = vk.frame->backbuf->framebuffer; rpiinfo.clearValueCount = 1; rpiinfo.pClearValues = clearvalues; vkCmdBeginRenderPass(vk.frame->cbuf, &rpiinfo, VK_SUBPASS_CONTENTS_INLINE); } */ { VkViewport vp[1]; VkRect2D scissor[1]; vp[0].x = r_refdef.pxrect.x; vp[0].y = r_refdef.pxrect.y; vp[0].width = r_refdef.pxrect.width; vp[0].height = r_refdef.pxrect.height; vp[0].minDepth = 0.0; vp[0].maxDepth = 1.0; scissor[0].offset.x = r_refdef.pxrect.x; scissor[0].offset.y = r_refdef.pxrect.y; scissor[0].extent.width = r_refdef.pxrect.width; scissor[0].extent.height = r_refdef.pxrect.height; vkCmdSetViewport(vk.rendertarg->cbuf, 0, countof(vp), vp); vkCmdSetScissor(vk.rendertarg->cbuf, 0, countof(scissor), scissor); } VKBE_Set2D(true); if (0) Matrix4x4_CM_Orthographic(r_refdef.m_projection_std, 0, vid.fbvwidth, 0, vid.fbvheight, -99999, 99999); else Matrix4x4_CM_Orthographic(r_refdef.m_projection_std, 0, vid.fbvwidth, vid.fbvheight, 0, -99999, 99999); Matrix4x4_Identity(r_refdef.m_view); BE_SelectEntity(&r_worldentity); } static void VK_Shutdown_PostProc(void) { unsigned int i; for (i = 0; i < countof(postproc); i++) VKBE_RT_Gen(&postproc[i], 0, 0, true, RT_IMAGEFLAGS); vk.scenepp_waterwarp = NULL; vk.scenepp_antialias = NULL; VK_R_BloomShutdown(); } static void VK_Init_PostProc(void) { texid_t scenepp_texture_warp, scenepp_texture_edge; //this block liberated from the opengl code { #define PP_WARP_TEX_SIZE 64 #define PP_AMP_TEX_SIZE 64 #define PP_AMP_TEX_BORDER 4 int i, x, y; unsigned char pp_warp_tex[PP_WARP_TEX_SIZE*PP_WARP_TEX_SIZE*4]; unsigned char pp_edge_tex[PP_AMP_TEX_SIZE*PP_AMP_TEX_SIZE*4]; // scenepp_postproc_cube = r_nulltex; // TEXASSIGN(sceneblur_texture, Image_CreateTexture("***postprocess_blur***", NULL, 0)); TEXASSIGN(scenepp_texture_warp, Image_CreateTexture("***postprocess_warp***", NULL, IF_NOMIPMAP|IF_NOGAMMA|IF_LINEAR)); TEXASSIGN(scenepp_texture_edge, Image_CreateTexture("***postprocess_edge***", NULL, IF_NOMIPMAP|IF_NOGAMMA|IF_LINEAR)); // init warp texture - this specifies offset in for (y=0; y PP_AMP_TEX_SIZE - PP_AMP_TEX_BORDER) { fx = (PP_AMP_TEX_SIZE - (float)x) / PP_AMP_TEX_BORDER; } if (y < PP_AMP_TEX_BORDER) { fy = (float)y / PP_AMP_TEX_BORDER; } if (y > PP_AMP_TEX_SIZE - PP_AMP_TEX_BORDER) { fy = (PP_AMP_TEX_SIZE - (float)y) / PP_AMP_TEX_BORDER; } //avoid any sudden changes. fx=sin(fx*M_PI*0.5); fy=sin(fy*M_PI*0.5); //lame fx = fy = min(fx, fy); pp_edge_tex[i ] = fx * 255; pp_edge_tex[i+1] = fy * 255; pp_edge_tex[i+2] = 0; pp_edge_tex[i+3] = 0xff; } } Image_Upload(scenepp_texture_edge, TF_RGBX32, pp_edge_tex, NULL, PP_AMP_TEX_SIZE, PP_AMP_TEX_SIZE, IF_LINEAR|IF_NOMIPMAP|IF_NOGAMMA); } vk.scenepp_waterwarp = R_RegisterShader("waterwarp", SUF_NONE, "{\n" "program underwaterwarp\n" "{\n" "map $sourcecolour\n" "}\n" "{\n" "map $upperoverlay\n" "}\n" "{\n" "map $loweroverlay\n" "}\n" "}\n" ); vk.scenepp_waterwarp->defaulttextures->upperoverlay = scenepp_texture_warp; vk.scenepp_waterwarp->defaulttextures->loweroverlay = scenepp_texture_edge; vk.scenepp_antialias = R_RegisterShader("fte_ppantialias", 0, "{\n" "program fxaa\n" "{\n" "map $sourcecolour\n" "}\n" "}\n" ); } static qboolean VK_R_RenderScene_Cubemap(struct vk_rendertarg *fb) { int cmapsize = 512; int i; static vec3_t ang[6] = { {0, -90, 0}, {0, 90, 0}, {90, 0, 0}, {-90, 0, 0}, {0, 0, 0}, {0, -180, 0} }; vec3_t saveang; vec3_t saveorg; vrect_t vrect; pxrect_t prect; extern cvar_t ffov; shader_t *shader; int facemask; extern cvar_t r_projection; int osm; struct vk_rendertarg_cube *rtc = &vk_rt_cubemap; if (!*ffov.string || !strcmp(ffov.string, "0")) { if (ffov.vec4[0] != scr_fov.value) { ffov.value = ffov.vec4[0] = scr_fov.value; Shader_NeedReload(false); //gah! } } facemask = 0; switch(r_projection.ival) { default: //invalid. return false; case PROJ_STEREOGRAPHIC: shader = R_RegisterShader("postproc_stereographic", SUF_NONE, "{\n" "program postproc_stereographic\n" "{\n" "map $sourcecube\n" "}\n" "}\n" ); facemask |= 1<<4; /*front view*/ if (ffov.value > 70) { facemask |= (1<<0) | (1<<1); /*side/top*/ if (ffov.value > 85) facemask |= (1<<2) | (1<<3); /*bottom views*/ if (ffov.value > 300) facemask |= 1<<5; /*back view*/ } break; case PROJ_FISHEYE: shader = R_RegisterShader("postproc_fisheye", SUF_NONE, "{\n" "program postproc_fisheye\n" "{\n" "map $sourcecube\n" "}\n" "}\n" ); //fisheye view sees up to a full sphere facemask |= 1<<4; /*front view*/ if (ffov.value > 77) facemask |= (1<<0) | (1<<1) | (1<<2) | (1<<3); /*side/top/bottom views*/ if (ffov.value > 270) facemask |= 1<<5; /*back view*/ break; case PROJ_PANORAMA: shader = R_RegisterShader("postproc_panorama", SUF_NONE, "{\n" "program postproc_panorama\n" "{\n" "map $sourcecube\n" "}\n" "}\n" ); //panoramic view needs at most the four sides facemask |= 1<<4; /*front view*/ if (ffov.value > 90) { facemask |= (1<<0) | (1<<1); /*side views*/ if (ffov.value > 270) facemask |= 1<<5; /*back view*/ } facemask = 0x3f; break; case PROJ_LAEA: shader = R_RegisterShader("postproc_laea", SUF_NONE, "{\n" "program postproc_laea\n" "{\n" "map $sourcecube\n" "}\n" "}\n" ); facemask |= 1<<4; /*front view*/ if (ffov.value > 90) { facemask |= (1<<0) | (1<<1) | (1<<2) | (1<<3); /*side/top/bottom views*/ if (ffov.value > 270) facemask |= 1<<5; /*back view*/ } break; case PROJ_EQUIRECTANGULAR: shader = R_RegisterShader("postproc_equirectangular", SUF_NONE, "{\n" "program postproc_equirectangular\n" "{\n" "map $sourcecube\n" "}\n" "}\n" ); facemask = 0x3f; #if 0 facemask |= 1<<4; /*front view*/ if (ffov.value > 90) { facemask |= (1<<0) | (1<<1) | (1<<2) | (1<<3); /*side/top/bottom views*/ if (ffov.value > 270) facemask |= 1<<5; /*back view*/ } #endif break; } if (!shader || !shader->prog) return false; //erk. shader failed. //FIXME: we should be able to rotate the view vrect = r_refdef.vrect; prect = r_refdef.pxrect; // prect.x = (vrect.x * vid.pixelwidth)/vid.width; // prect.width = (vrect.width * vid.pixelwidth)/vid.width; // prect.y = (vrect.y * vid.pixelheight)/vid.height; // prect.height = (vrect.height * vid.pixelheight)/vid.height; if (sh_config.texture_non_power_of_two_pic) { cmapsize = prect.width > prect.height?prect.width:prect.height; if (cmapsize > 4096)//sh_config.texture_maxsize) cmapsize = 4096;//sh_config.texture_maxsize; } r_refdef.flags |= RDF_FISHEYE; vid.fbpwidth = vid.fbpheight = cmapsize; //FIXME: gl_max_size VectorCopy(r_refdef.vieworg, saveorg); VectorCopy(r_refdef.viewangles, saveang); saveang[2] = 0; osm = r_refdef.stereomethod; r_refdef.stereomethod = STEREO_OFF; VKBE_RT_Gen_Cube(rtc, cmapsize, r_clear.ival?true:false); vrect = r_refdef.vrect; //save off the old vrect r_refdef.vrect.width = (cmapsize * vid.fbvwidth) / vid.fbpwidth; r_refdef.vrect.height = (cmapsize * vid.fbvheight) / vid.fbpheight; r_refdef.vrect.x = 0; r_refdef.vrect.y = prect.y; ang[0][0] = -saveang[0]; ang[0][1] = -90; ang[0][2] = -saveang[0]; ang[1][0] = -saveang[0]; ang[1][1] = 90; ang[1][2] = saveang[0]; ang[5][0] = -saveang[0]*2; //in theory, we could use a geometry shader to duplicate the polygons to each face. //that would of course require that every bit of glsl had such a geometry shader. //it would at least reduce cpu load quite a bit. for (i = 0; i < 6; i++) { if (!(facemask & (1<face[i]); r_refdef.fov_x = 90; r_refdef.fov_y = 90; r_refdef.viewangles[0] = saveang[0]+ang[i][0]; r_refdef.viewangles[1] = saveang[1]+ang[i][1]; r_refdef.viewangles[2] = saveang[2]+ang[i][2]; VK_SetupViewPortProjection(true); /*if (!vk.rendertarg->depthcleared) { VkClearAttachment clr; VkClearRect rect; clr.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; clr.clearValue.depthStencil.depth = 1; clr.clearValue.depthStencil.stencil = 0; clr.colorAttachment = 1; rect.rect.offset.x = r_refdef.pxrect.x; rect.rect.offset.y = r_refdef.pxrect.y; rect.rect.extent.width = r_refdef.pxrect.width; rect.rect.extent.height = r_refdef.pxrect.height; rect.layerCount = 1; rect.baseArrayLayer = 0; vkCmdClearAttachments(vk.frame->cbuf, 1, &clr, 1, &rect); vk.rendertarg->depthcleared = true; }*/ VKBE_SelectEntity(&r_worldentity); R_SetFrustum (r_refdef.m_projection_std, r_refdef.m_view); RQ_BeginFrame(); if (!(r_refdef.flags & RDF_NOWORLDMODEL)) { if (cl.worldmodel) P_DrawParticles (); } Surf_DrawWorld(); RQ_RenderBatchClear(); vk.rendertarg->depthcleared = false; if (R2D_Flush) Con_Printf("no flush\n"); VKBE_RT_End(&rtc->face[i]); } r_refdef.vrect = vrect; r_refdef.pxrect = prect; VectorCopy(saveorg, r_refdef.vieworg); r_refdef.stereomethod = osm; VKBE_RT_Begin(fb); r_refdef.flipcull = 0; VK_Set2D(); shader->defaulttextures->reflectcube = &rtc->q_colour; // draw it through the shader if (r_projection.ival == PROJ_EQUIRECTANGULAR) { //note vr screenshots have requirements here R2D_Image(vrect.x, vrect.y, vrect.width, vrect.height, 0, 1, 1, 0, shader); } else if (r_projection.ival == PROJ_PANORAMA) { float saspect = .5; float taspect = vrect.height / vrect.width * ffov.value / 90;//(0.5 * vrect.width) / vrect.height; R2D_Image(vrect.x, vrect.y, vrect.width, vrect.height, -saspect, taspect, saspect, -taspect, shader); } else if (vrect.width > vrect.height) { float aspect = (0.5 * vrect.height) / vrect.width; R2D_Image(vrect.x, vrect.y, vrect.width, vrect.height, -0.5, aspect, 0.5, -aspect, shader); } else { float aspect = (0.5 * vrect.width) / vrect.height; R2D_Image(vrect.x, vrect.y, vrect.width, vrect.height, -aspect, 0.5, aspect, -0.5, shader); } if (R2D_Flush) R2D_Flush(); return true; } void VK_R_RenderView (void) { extern unsigned int r_viewcontents; struct vk_rendertarg *rt, *rtscreen = vk.rendertarg; extern cvar_t r_fxaa; extern cvar_t r_renderscale, r_postprocshader; float renderscale = r_renderscale.value; shader_t *custompostproc; if (r_norefresh.value || !vid.fbpwidth || !vid.fbpwidth) { VK_Set2D (); return; } VKBE_Set2D(false); Surf_SetupFrame(); //check if we can do underwater warp if (cls.protocol != CP_QUAKE2) //quake2 tells us directly { if (r_viewcontents & FTECONTENTS_FLUID) r_refdef.flags |= RDF_UNDERWATER; else r_refdef.flags &= ~RDF_UNDERWATER; } if (r_refdef.flags & RDF_UNDERWATER) { extern cvar_t r_projection; if (!r_waterwarp.value || r_projection.ival) r_refdef.flags &= ~RDF_UNDERWATER; //no warp at all else if (r_waterwarp.value > 0) r_refdef.flags |= RDF_WATERWARP; //try fullscreen warp instead if we can } if (!r_refdef.globalfog.density) { int fogtype = ((r_refdef.flags & RDF_UNDERWATER) && cl.fog[1].density)?1:0; CL_BlendFog(&r_refdef.globalfog, &cl.oldfog[fogtype], realtime, &cl.fog[fogtype]); r_refdef.globalfog.density /= 64; //FIXME } custompostproc = NULL; if (r_refdef.flags & RDF_NOWORLDMODEL) renderscale = 1; //with no worldmodel, this is probably meant to be transparent so make sure that there's no post-proc stuff messing up transparencies. else { if (*r_postprocshader.string) { custompostproc = R_RegisterCustom(r_postprocshader.string, SUF_NONE, NULL, NULL); if (custompostproc) r_refdef.flags |= RDF_CUSTOMPOSTPROC; } if (r_fxaa.ival) //overlays will have problems. r_refdef.flags |= RDF_ANTIALIAS; if (R_CanBloom()) r_refdef.flags |= RDF_BLOOM; } if (vk.multisamplebits != VK_SAMPLE_COUNT_1_BIT) //these are unsupported right now. r_refdef.flags &= ~(RDF_CUSTOMPOSTPROC|RDF_ANTIALIAS|RDF_BLOOM); // // figure out the viewport // { int x = r_refdef.vrect.x * vid.pixelwidth/(int)vid.width; int x2 = (r_refdef.vrect.x + r_refdef.vrect.width) * vid.pixelwidth/(int)vid.width; int y = (r_refdef.vrect.y) * vid.pixelheight/(int)vid.height; int y2 = ((int)(r_refdef.vrect.y + r_refdef.vrect.height)) * vid.pixelheight/(int)vid.height; // fudge around because of frac screen scale if (x > 0) x--; if (x2 < vid.pixelwidth) x2++; if (y < 0) y--; if (y2 < vid.pixelheight) y2++; r_refdef.pxrect.x = x; r_refdef.pxrect.y = y; r_refdef.pxrect.width = x2 - x; r_refdef.pxrect.height = y2 - y; r_refdef.pxrect.maxheight = vid.pixelheight; } if (renderscale != 1.0) { r_refdef.flags |= RDF_RENDERSCALE; if (renderscale < 0) renderscale *= -1; r_refdef.pxrect.width *= renderscale; r_refdef.pxrect.height *= renderscale; r_refdef.pxrect.maxheight = r_refdef.pxrect.height; } if (r_refdef.pxrect.width <= 0 || r_refdef.pxrect.height <= 0) return; //you're not allowed to do that, dude. //FIXME: VF_RT_* //FIXME: if we're meant to be using msaa, render the scene to an msaa target and then resolve. postproc_buf = 0; if (r_refdef.flags & (RDF_ALLPOSTPROC|RDF_RENDERSCALE)) { r_refdef.pxrect.x = 0; r_refdef.pxrect.y = 0; rt = &postproc[postproc_buf++%countof(postproc)]; VKBE_RT_Gen(rt, r_refdef.pxrect.width, r_refdef.pxrect.height, false, (r_renderscale.value < 0)?RT_IMAGEFLAGS-IF_LINEAR+IF_NEAREST:RT_IMAGEFLAGS); } else rt = rtscreen; if (!(r_refdef.flags & RDF_NOWORLDMODEL) && VK_R_RenderScene_Cubemap(rt)) { } else { VK_SetupViewPortProjection(false); if (rt != rtscreen) VKBE_RT_Begin(rt); else { VkViewport vp[1]; VkRect2D scissor[1]; vp[0].x = r_refdef.pxrect.x; vp[0].y = r_refdef.pxrect.y; vp[0].width = r_refdef.pxrect.width; vp[0].height = r_refdef.pxrect.height; vp[0].minDepth = 0.0; vp[0].maxDepth = 1.0; scissor[0].offset.x = r_refdef.pxrect.x; scissor[0].offset.y = r_refdef.pxrect.y; scissor[0].extent.width = r_refdef.pxrect.width; scissor[0].extent.height = r_refdef.pxrect.height; vkCmdSetViewport(vk.rendertarg->cbuf, 0, countof(vp), vp); vkCmdSetScissor(vk.rendertarg->cbuf, 0, countof(scissor), scissor); } if (!vk.rendertarg->depthcleared) { VkClearAttachment clr; VkClearRect rect; clr.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; clr.clearValue.depthStencil.depth = 1; clr.clearValue.depthStencil.stencil = 0; clr.colorAttachment = 1; rect.rect.offset.x = r_refdef.pxrect.x; rect.rect.offset.y = r_refdef.pxrect.y; rect.rect.extent.width = r_refdef.pxrect.width; rect.rect.extent.height = r_refdef.pxrect.height; rect.layerCount = 1; rect.baseArrayLayer = 0; vkCmdClearAttachments(vk.rendertarg->cbuf, 1, &clr, 1, &rect); vk.rendertarg->depthcleared = true; } VKBE_SelectEntity(&r_worldentity); R_SetFrustum (r_refdef.m_projection_std, r_refdef.m_view); RQ_BeginFrame(); if (!(r_refdef.flags & RDF_NOWORLDMODEL)) { if (cl.worldmodel) P_DrawParticles (); } Surf_DrawWorld(); RQ_RenderBatchClear(); vk.rendertarg->depthcleared = false; VK_Set2D (); if (rt != rtscreen) VKBE_RT_End(rt); } if (r_refdef.flags & RDF_ALLPOSTPROC) { if (!vk.scenepp_waterwarp) VK_Init_PostProc(); //FIXME: chain renderpasses as required. if (r_refdef.flags & RDF_WATERWARP) { r_refdef.flags &= ~RDF_WATERWARP; vk.sourcecolour = &rt->q_colour; if (r_refdef.flags & RDF_ALLPOSTPROC) { rt = &postproc[postproc_buf++]; VKBE_RT_Gen(rt, 320, 200, false, RT_IMAGEFLAGS); } else rt = rtscreen; if (rt != rtscreen) VKBE_RT_Begin(rt); R2D_Image(r_refdef.vrect.x, r_refdef.vrect.y, r_refdef.vrect.width, r_refdef.vrect.height, 0, 0, 1, 1, vk.scenepp_waterwarp); R2D_Flush(); if (rt != rtscreen) VKBE_RT_End(rt); } if (r_refdef.flags & RDF_CUSTOMPOSTPROC) { r_refdef.flags &= ~RDF_CUSTOMPOSTPROC; vk.sourcecolour = &rt->q_colour; if (r_refdef.flags & RDF_ALLPOSTPROC) { rt = &postproc[postproc_buf++]; VKBE_RT_Gen(rt, 320, 200, false, RT_IMAGEFLAGS); } else rt = rtscreen; if (rt != rtscreen) VKBE_RT_Begin(rt); R2D_Image(r_refdef.vrect.x, r_refdef.vrect.y, r_refdef.vrect.width, r_refdef.vrect.height, 0, 1, 1, 0, custompostproc); R2D_Flush(); if (rt != rtscreen) VKBE_RT_End(rt); } if (r_refdef.flags & RDF_ANTIALIAS) { r_refdef.flags &= ~RDF_ANTIALIAS; R2D_ImageColours(rt->width, rt->height, 1, 1); vk.sourcecolour = &rt->q_colour; if (r_refdef.flags & RDF_ALLPOSTPROC) { rt = &postproc[postproc_buf++]; VKBE_RT_Gen(rt, 320, 200, false, RT_IMAGEFLAGS); } else rt = rtscreen; if (rt != rtscreen) VKBE_RT_Begin(rt); R2D_Image(r_refdef.vrect.x, r_refdef.vrect.y, r_refdef.vrect.width, r_refdef.vrect.height, 0, 1, 1, 0, vk.scenepp_antialias); R2D_ImageColours(1, 1, 1, 1); R2D_Flush(); if (rt != rtscreen) VKBE_RT_End(rt); } if (r_refdef.flags & RDF_BLOOM) { VK_R_BloomBlend(&rt->q_colour, r_refdef.vrect.x, r_refdef.vrect.y, r_refdef.vrect.width, r_refdef.vrect.height); rt = rtscreen; } } else if (r_refdef.flags & RDF_RENDERSCALE) { if (!vk.scenepp_rescale) vk.scenepp_rescale = R_RegisterShader("fte_rescaler", 0, "{\n" "program default2d\n" "{\n" "map $sourcecolour\n" "}\n" "}\n" ); vk.sourcecolour = &rt->q_colour; rt = rtscreen; R2D_Image(r_refdef.vrect.x, r_refdef.vrect.y, r_refdef.vrect.width, r_refdef.vrect.height, 0, 0, 1, 1, vk.scenepp_rescale); R2D_Flush(); } vk.sourcecolour = r_nulltex; } typedef struct { uint32_t imageformat; uint32_t imagestride; uint32_t imagewidth; uint32_t imageheight; VkBuffer buffer; size_t memsize; VkDeviceMemory memory; void (*gotrgbdata) (void *rgbdata, intptr_t bytestride, size_t width, size_t height, enum uploadfmt fmt); } vkscreencapture_t; static void VKVID_CopiedRGBData (void*ctx) { //some fence got hit, we did our copy, data is now cpu-visible, cache-willing. vkscreencapture_t *capt = ctx; void *imgdata; VkAssert(vkMapMemory(vk.device, capt->memory, 0, capt->memsize, 0, &imgdata)); capt->gotrgbdata(imgdata, capt->imagestride, capt->imagewidth, capt->imageheight, capt->imageformat); vkUnmapMemory(vk.device, capt->memory); vkDestroyBuffer(vk.device, capt->buffer, vkallocationcb); vkFreeMemory(vk.device, capt->memory, vkallocationcb); } void VKVID_QueueGetRGBData (void (*gotrgbdata) (void *rgbdata, intptr_t bytestride, size_t width, size_t height, enum uploadfmt fmt)) { //should be half way through rendering vkscreencapture_t *capt; VkBufferImageCopy icpy; VkMemoryRequirements mem_reqs; VkMemoryAllocateInfo memAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; if (!VK_SCR_GrabBackBuffer()) return; if (!vk.frame->backbuf->colour.width || !vk.frame->backbuf->colour.height) return; //erm, some kind of error? capt = VK_AtFrameEnd(VKVID_CopiedRGBData, NULL, sizeof(*capt)); capt->gotrgbdata = gotrgbdata; //FIXME: vkCmdBlitImage the image to convert it from half-float or whatever to a format that our screenshot etc code can cope with. capt->imageformat = TF_BGRA32; capt->imagestride = vk.frame->backbuf->colour.width*4; //vulkan is top-down, so this should be positive. capt->imagewidth = vk.frame->backbuf->colour.width; capt->imageheight = vk.frame->backbuf->colour.height; bci.flags = 0; bci.size = capt->memsize = capt->imagewidth*capt->imageheight*4; bci.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; bci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; bci.queueFamilyIndexCount = 0; bci.pQueueFamilyIndices = NULL; VkAssert(vkCreateBuffer(vk.device, &bci, vkallocationcb, &capt->buffer)); vkGetBufferMemoryRequirements(vk.device, capt->buffer, &mem_reqs); memAllocInfo.allocationSize = mem_reqs.size; memAllocInfo.memoryTypeIndex = vk_find_memory_try(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_CACHED_BIT); if (memAllocInfo.memoryTypeIndex == ~0u) memAllocInfo.memoryTypeIndex = vk_find_memory_require(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &capt->memory)); VkAssert(vkBindBufferMemory(vk.device, capt->buffer, capt->memory, 0)); set_image_layout(vk.rendertarg->cbuf, vk.frame->backbuf->colour.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); icpy.bufferOffset = 0; icpy.bufferRowLength = 0; //packed icpy.bufferImageHeight = 0; //packed icpy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; icpy.imageSubresource.mipLevel = 0; icpy.imageSubresource.baseArrayLayer = 0; icpy.imageSubresource.layerCount = 1; icpy.imageOffset.x = 0; icpy.imageOffset.y = 0; icpy.imageOffset.z = 0; icpy.imageExtent.width = capt->imagewidth; icpy.imageExtent.height = capt->imageheight; icpy.imageExtent.depth = 1; vkCmdCopyImageToBuffer(vk.rendertarg->cbuf, vk.frame->backbuf->colour.image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, capt->buffer, 1, &icpy); set_image_layout(vk.rendertarg->cbuf, vk.frame->backbuf->colour.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); } char *VKVID_GetRGBInfo (int *bytestride, int *truevidwidth, int *truevidheight, enum uploadfmt *fmt) { //in order to deal with various backbuffer formats (like half-float) etc, we play safe and blit the framebuffer to a safe format. //we then transfer that into a buffer that we can then directly read. //and then we allocate a C buffer that we then copy it into... //so yeah, 3 copies. life sucks. //blit requires support for VK_IMAGE_USAGE_TRANSFER_DST_BIT on our image, which means we need optimal, which means we can't directly map it, which means we need the buffer copy too. //this might be relaxed on mobile, but who really takes screenshots on mobiles anyway?!? anyway, video capture shouldn't be using this either way so top performance isn't a concern if (VK_SCR_GrabBackBuffer()) { VkImageLayout framebufferlayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;//vk.frame->backbuf->colour.layout; void *imgdata, *outdata; struct vk_fencework *fence = VK_FencedBegin(NULL, 0); VkImage tempimage; VkDeviceMemory tempmemory; VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; VkBuffer tempbuffer; VkDeviceMemory tempbufmemory; VkMemoryRequirements mem_reqs; VkMemoryAllocateInfo memAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; VkImageCreateInfo ici = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; //VkFormatProperties vkfmt; ici.flags = 0; ici.imageType = VK_IMAGE_TYPE_2D; /*vkGetPhysicalDeviceFormatProperties(vk.gpu, VK_FORMAT_B8G8R8_UNORM, &vkfmt); if ((vkfmt.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT) && (vkfmt.optimalTilingFeatures & VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR)) { //if we can do BGR, then use it, because that's what most PC file formats use, like tga. //we don't really want alpha data anyway. if (vid.flags & VID_SRGB_FB) ici.format = VK_FORMAT_B8G8R8_SRGB; else ici.format = VK_FORMAT_B8G8R8_UNORM; } else*/ { //otherwise lets just get bgra data. if (vid.flags & VID_SRGB_FB) ici.format = VK_FORMAT_B8G8R8A8_SRGB; else ici.format = VK_FORMAT_B8G8R8A8_UNORM; } ici.extent.width = vid.pixelwidth; ici.extent.height = vid.pixelheight; ici.extent.depth = 1; ici.mipLevels = 1; ici.arrayLayers = 1; ici.samples = VK_SAMPLE_COUNT_1_BIT; ici.tiling = VK_IMAGE_TILING_OPTIMAL; ici.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT; ici.sharingMode = VK_SHARING_MODE_EXCLUSIVE; ici.queueFamilyIndexCount = 0; ici.pQueueFamilyIndices = NULL; ici.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; VkAssert(vkCreateImage(vk.device, &ici, vkallocationcb, &tempimage)); DebugSetName(VK_OBJECT_TYPE_IMAGE, (uint64_t)tempimage, "VKVID_GetRGBInfo staging"); vkGetImageMemoryRequirements(vk.device, tempimage, &mem_reqs); memAllocInfo.allocationSize = mem_reqs.size; memAllocInfo.memoryTypeIndex = vk_find_memory_require(mem_reqs.memoryTypeBits, 0); VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &tempmemory)); VkAssert(vkBindImageMemory(vk.device, tempimage, tempmemory, 0)); bci.flags = 0; bci.size = vid.pixelwidth*vid.pixelheight*4; bci.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; bci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; bci.queueFamilyIndexCount = 0; bci.pQueueFamilyIndices = NULL; VkAssert(vkCreateBuffer(vk.device, &bci, vkallocationcb, &tempbuffer)); vkGetBufferMemoryRequirements(vk.device, tempbuffer, &mem_reqs); memAllocInfo.allocationSize = mem_reqs.size; memAllocInfo.memoryTypeIndex = vk_find_memory_try(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_CACHED_BIT); if (memAllocInfo.memoryTypeIndex == ~0u) memAllocInfo.memoryTypeIndex = vk_find_memory_require(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &tempbufmemory)); VkAssert(vkBindBufferMemory(vk.device, tempbuffer, tempbufmemory, 0)); set_image_layout(fence->cbuf, vk.frame->backbuf->colour.image, VK_IMAGE_ASPECT_COLOR_BIT, framebufferlayout, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); set_image_layout(fence->cbuf, tempimage, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, 0, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); { VkImageBlit iblt; iblt.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; iblt.srcSubresource.mipLevel = 0; iblt.srcSubresource.baseArrayLayer = 0; iblt.srcSubresource.layerCount = 1; iblt.srcOffsets[0].x = 0; iblt.srcOffsets[0].y = 0; iblt.srcOffsets[0].z = 0; iblt.srcOffsets[1].x = vid.pixelwidth; iblt.srcOffsets[1].y = vid.pixelheight; iblt.srcOffsets[1].z = 1; iblt.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; iblt.dstSubresource.mipLevel = 0; iblt.dstSubresource.baseArrayLayer = 0; iblt.dstSubresource.layerCount = 1; iblt.dstOffsets[0].x = 0; iblt.dstOffsets[0].y = 0; iblt.dstOffsets[0].z = 0; iblt.dstOffsets[1].x = vid.pixelwidth; iblt.dstOffsets[1].y = vid.pixelheight; iblt.dstOffsets[1].z = 1; vkCmdBlitImage(fence->cbuf, vk.frame->backbuf->colour.image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, tempimage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &iblt, VK_FILTER_LINEAR); } set_image_layout(fence->cbuf, vk.frame->backbuf->colour.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, framebufferlayout, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); set_image_layout(fence->cbuf, tempimage, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); { VkBufferImageCopy icpy; icpy.bufferOffset = 0; icpy.bufferRowLength = 0; //packed icpy.bufferImageHeight = 0; //packed icpy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; icpy.imageSubresource.mipLevel = 0; icpy.imageSubresource.baseArrayLayer = 0; icpy.imageSubresource.layerCount = 1; icpy.imageOffset.x = 0; icpy.imageOffset.y = 0; icpy.imageOffset.z = 0; icpy.imageExtent.width = ici.extent.width; icpy.imageExtent.height = ici.extent.height; icpy.imageExtent.depth = 1; vkCmdCopyImageToBuffer(fence->cbuf, tempimage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, tempbuffer, 1, &icpy); } VK_FencedSync(fence); outdata = BZ_Malloc(4*ici.extent.width*ici.extent.height); if (ici.format == VK_FORMAT_B8G8R8_SRGB || ici.format == VK_FORMAT_B8G8R8_UNORM) *fmt = PTI_BGR8; else if (ici.format == VK_FORMAT_R8G8B8_SRGB || ici.format == VK_FORMAT_R8G8B8_UNORM) *fmt = PTI_RGB8; else if (ici.format == VK_FORMAT_R8G8B8A8_SRGB || ici.format == VK_FORMAT_R8G8B8A8_UNORM) *fmt = PTI_RGBA8; else *fmt = PTI_BGRA8; *bytestride = ici.extent.width*4; *truevidwidth = ici.extent.width; *truevidheight = ici.extent.height; VkAssert(vkMapMemory(vk.device, tempbufmemory, 0, 4*ici.extent.width*ici.extent.height, 0, &imgdata)); memcpy(outdata, imgdata, 4*ici.extent.width*ici.extent.height); vkUnmapMemory(vk.device, tempbufmemory); vkDestroyImage(vk.device, tempimage, vkallocationcb); vkFreeMemory(vk.device, tempmemory, vkallocationcb); vkDestroyBuffer(vk.device, tempbuffer, vkallocationcb); vkFreeMemory(vk.device, tempbufmemory, vkallocationcb); return outdata; } return NULL; } static void VK_PaintScreen(void) { int uimenu; qboolean nohud; qboolean noworld; vid.fbvwidth = vid.width; vid.fbvheight = vid.height; vid.fbpwidth = vid.pixelwidth; vid.fbpheight = vid.pixelheight; r_refdef.pxrect.x = 0; r_refdef.pxrect.y = 0; r_refdef.pxrect.width = vid.fbpwidth; r_refdef.pxrect.height = vid.fbpheight; r_refdef.pxrect.maxheight = vid.pixelheight; vid.numpages = vk.backbuf_count + 1; R2D_Font_Changed(); VK_Set2D (); Shader_DoReload(); if (scr_disabled_for_loading) { extern float scr_disabled_time; if (Sys_DoubleTime() - scr_disabled_time > 60 || !Key_Dest_Has(~kdm_game)) { //FIXME: instead of reenabling the screen, we should just draw the relevent things skipping only the game. scr_disabled_for_loading = false; } else { // scr_drawloading = true; SCR_DrawLoading (true); // scr_drawloading = false; return; } } /* if (!scr_initialized || !con_initialized) { RSpeedEnd(RSPEED_TOTALREFRESH); return; // not initialized yet } */ #ifdef VM_UI uimenu = UI_MenuState(); #else uimenu = 0; #endif #ifdef TEXTEDITOR if (editormodal) { Editor_Draw(); V_UpdatePalette (false); #if defined(_WIN32) && defined(GLQUAKE) Media_RecordFrame(); #endif R2D_BrightenScreen(); if (key_dest_mask & kdm_console) Con_DrawConsole(vid.height/2, false); else Con_DrawConsole(0, false); // SCR_DrawCursor(); return; } #endif if (Media_ShowFilm()) { M_Draw(0); V_UpdatePalette (false); R2D_BrightenScreen(); #if defined(_WIN32) && defined(GLQUAKE) Media_RecordFrame(); #endif return; } // // do 3D refresh drawing, and then update the screen // SCR_SetUpToDrawConsole (); noworld = false; nohud = false; #ifdef VM_CG if (CG_Refresh()) nohud = true; else #endif #ifdef CSQC_DAT if (CSQC_DrawView()) nohud = true; else #endif { if (uimenu != 1) { if (r_worldentity.model && cls.state == ca_active) V_RenderView (nohud); else { noworld = true; } } } scr_con_forcedraw = false; if (noworld) { extern char levelshotname[]; //draw the levelshot or the conback fullscreen if (*levelshotname) R2D_ScalePic(0, 0, vid.width, vid.height, R2D_SafeCachePic (levelshotname)); else if (scr_con_current != vid.height) R2D_ConsoleBackground(0, vid.height, true); else scr_con_forcedraw = true; nohud = true; } SCR_DrawTwoDimensional(uimenu, nohud); V_UpdatePalette (false); R2D_BrightenScreen(); #if defined(_WIN32) && defined(GLQUAKE) Media_RecordFrame(); #endif RSpeedShow(); } VkCommandBuffer VK_AllocFrameCBuf(void) { struct vkframe *frame = vk.frame; if (frame->numcbufs == frame->maxcbufs) { VkCommandBufferAllocateInfo cbai = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO}; frame->maxcbufs++; frame->cbufs = BZ_Realloc(frame->cbufs, sizeof(*frame->cbufs)*frame->maxcbufs); cbai.commandPool = vk.cmdpool; cbai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; cbai.commandBufferCount = frame->maxcbufs - frame->numcbufs; VkAssert(vkAllocateCommandBuffers(vk.device, &cbai, frame->cbufs+frame->numcbufs)); } return frame->cbufs[frame->numcbufs++]; } qboolean VK_SCR_GrabBackBuffer(void) { VkSemaphore sem; RSpeedLocals(); if (vk.frame) //erk, we already have one... return true; RSpeedRemark(); VK_FencedCheck(); if (!vk.unusedframes) { struct vkframe *newframe = Z_Malloc(sizeof(*vk.frame)); VKBE_InitFramePools(newframe); newframe->next = vk.unusedframes; vk.unusedframes = newframe; } while (vk.aquirenext == vk.aquirelast) { //we're still waiting for the render thread to increment acquirelast. //shouldn't really happen, but can if the gpu is slow. Sys_Sleep(0); //o.O #ifdef _WIN32 Sys_SendKeyEvents(); #endif } if (vk.acquirefences[vk.aquirenext%ACQUIRELIMIT] != VK_NULL_HANDLE) { //wait for the queued acquire to actually finish if (vk_busywait.ival) { //busy wait, to try to get the highest fps possible for (;;) { switch(vkGetFenceStatus(vk.device, vk.acquirefences[vk.aquirenext%ACQUIRELIMIT])) { case VK_SUCCESS: break; //hurrah case VK_NOT_READY: continue; //keep going until its actually signaled. submission thread is probably just slow. case VK_TIMEOUT: continue; //erk? this isn't a documented result here. case VK_ERROR_DEVICE_LOST: Sys_Error("Vulkan device lost"); default: return false; } break; } } else { //friendly wait VkResult err = vkWaitForFences(vk.device, 1, &vk.acquirefences[vk.aquirenext%ACQUIRELIMIT], VK_FALSE, UINT64_MAX); if (err) { if (err == VK_ERROR_DEVICE_LOST) Sys_Error("Vulkan device lost"); return false; } } VkAssert(vkResetFences(vk.device, 1, &vk.acquirefences[vk.aquirenext%ACQUIRELIMIT])); } vk.bufferidx = vk.acquirebufferidx[vk.aquirenext%ACQUIRELIMIT]; sem = vk.acquiresemaphores[vk.aquirenext%ACQUIRELIMIT]; vk.aquirenext++; //grab the first unused Sys_LockConditional(vk.submitcondition); vk.frame = vk.unusedframes; vk.unusedframes = vk.frame->next; vk.frame->next = NULL; Sys_UnlockConditional(vk.submitcondition); VkAssert(vkResetFences(vk.device, 1, &vk.frame->finishedfence)); vk.frame->backbuf = &vk.backbufs[vk.bufferidx]; vk.rendertarg = vk.frame->backbuf; vk.frame->numcbufs = 0; vk.rendertarg->cbuf = VK_AllocFrameCBuf(); vk.frame->acquiresemaphore = sem; RSpeedEnd(RSPEED_SETUP); { VkCommandBufferBeginInfo begininf = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; VkCommandBufferInheritanceInfo inh = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO}; begininf.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; begininf.pInheritanceInfo = &inh; inh.renderPass = VK_NULL_HANDLE; //unused inh.subpass = 0; //unused inh.framebuffer = VK_NULL_HANDLE; //unused inh.occlusionQueryEnable = VK_FALSE; inh.queryFlags = 0; inh.pipelineStatistics = 0; vkBeginCommandBuffer(vk.rendertarg->cbuf, &begininf); } VKBE_RestartFrame(); // VK_DebugFramerate(); // vkCmdWriteTimestamp(vk.frame->cbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, querypool, vk.bufferidx*2+0); if (vk.multisamplebits == VK_SAMPLE_COUNT_1_BIT) { VkImageMemoryBarrier imgbarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; imgbarrier.pNext = NULL; imgbarrier.srcAccessMask = 0;//VK_ACCESS_MEMORY_READ_BIT; imgbarrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; imgbarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;//vk.rendertarg->colour.layout; //'Alternately, oldLayout can be VK_IMAGE_LAYOUT_UNDEFINED, if the image's contents need not be preserved.' imgbarrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; imgbarrier.image = vk.frame->backbuf->colour.image; imgbarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; imgbarrier.subresourceRange.baseMipLevel = 0; imgbarrier.subresourceRange.levelCount = 1; imgbarrier.subresourceRange.baseArrayLayer = 0; imgbarrier.subresourceRange.layerCount = 1; imgbarrier.srcQueueFamilyIndex = vk.queuefam[1]; imgbarrier.dstQueueFamilyIndex = vk.queuefam[0]; if (vk.frame->backbuf->firstuse) { imgbarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; imgbarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imgbarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vk.frame->backbuf->firstuse = false; } vk.rendertarg->colour.layout = imgbarrier.newLayout; vkCmdPipelineBarrier(vk.rendertarg->cbuf, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, NULL, 0, NULL, 1, &imgbarrier); } { VkImageMemoryBarrier imgbarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; imgbarrier.pNext = NULL; imgbarrier.srcAccessMask = 0; imgbarrier.dstAccessMask = 0;//VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; imgbarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; imgbarrier.newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; imgbarrier.image = vk.frame->backbuf->depth.image; imgbarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; imgbarrier.subresourceRange.baseMipLevel = 0; imgbarrier.subresourceRange.levelCount = 1; imgbarrier.subresourceRange.baseArrayLayer = 0; imgbarrier.subresourceRange.layerCount = 1; imgbarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imgbarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vkCmdPipelineBarrier(vk.rendertarg->cbuf, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, NULL, 0, NULL, 1, &imgbarrier); } { VkClearValue clearvalues[3]; extern cvar_t r_clear; VkRenderPassBeginInfo rpbi = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO}; //attachments are: screen[1], depth[msbits], (screen[msbits]) clearvalues[0].color.float32[0] = !!(r_clear.ival & 1); clearvalues[0].color.float32[1] = !!(r_clear.ival & 2); clearvalues[0].color.float32[2] = !!(r_clear.ival & 4); clearvalues[0].color.float32[3] = 1; clearvalues[1].depthStencil.depth = 1.0; clearvalues[1].depthStencil.stencil = 0; if (vk.multisamplebits != VK_SAMPLE_COUNT_1_BIT) { clearvalues[2].color.float32[0] = !!(r_clear.ival & 1); clearvalues[2].color.float32[1] = !!(r_clear.ival & 2); clearvalues[2].color.float32[2] = !!(r_clear.ival & 4); clearvalues[2].color.float32[3] = 1; rpbi.clearValueCount = 3; } else rpbi.clearValueCount = 2; if (r_clear.ival) rpbi.renderPass = vk.renderpass[2]; else rpbi.renderPass = vk.renderpass[1]; //may still clear rpbi.framebuffer = vk.frame->backbuf->framebuffer; rpbi.renderArea.offset.x = 0; rpbi.renderArea.offset.y = 0; rpbi.renderArea.extent.width = vid.pixelwidth; rpbi.renderArea.extent.height = vid.pixelheight; rpbi.pClearValues = clearvalues; vkCmdBeginRenderPass(vk.rendertarg->cbuf, &rpbi, VK_SUBPASS_CONTENTS_INLINE); vk.frame->backbuf->width = vid.pixelwidth; vk.frame->backbuf->height = vid.pixelheight; rpbi.clearValueCount = 0; rpbi.pClearValues = NULL; rpbi.renderPass = vk.renderpass[0]; vk.rendertarg->restartinfo = rpbi; vk.rendertarg->depthcleared = true; } return true; } struct vk_presented { struct vk_fencework fw; struct vkframe *frame; }; void VK_Presented(void *fw) { struct vk_presented *pres = fw; struct vkframe *frame = pres->frame; pres->fw.fence = VK_NULL_HANDLE; //don't allow that to be freed. while(frame->frameendjobs) { struct vk_frameend *job = frame->frameendjobs; frame->frameendjobs = job->next; job->FrameEnded(job+1); Z_Free(job); } frame->next = vk.unusedframes; vk.unusedframes = frame; } #if 0 void VK_DebugFramerate(void) { static double lastupdatetime; static double lastsystemtime; double t; extern int fps_count; float lastfps; float frametime; t = Sys_DoubleTime(); if ((t - lastupdatetime) >= 1.0) { lastfps = fps_count/(t - lastupdatetime); fps_count = 0; lastupdatetime = t; OutputDebugStringA(va("%g fps\n", lastfps)); } frametime = t - lastsystemtime; lastsystemtime = t; } #endif qboolean VK_SCR_UpdateScreen (void) { VkImageLayout fblayout; VK_FencedCheck(); //a few cvars need some extra work if they're changed if ((vk.allowsubmissionthread && vk_submissionthread.modified) || vid_vsync.modified || vk_waitfence.modified || vid_triplebuffer.modified || vid_srgb.modified || vid_multisample.modified) vk.neednewswapchain = true; if (vk.devicelost) { //vkQueueSubmit returning vk_error_device_lost means we give up and try resetting everything. //if someone's installing new drivers then wait a little time before reloading everything, in the hope that any other dependant files got copied. or something. //fixme: don't allow this to be spammed... Sys_Sleep(5); Con_Printf("Device was lost. Restarting video\n"); Cmd_ExecuteString("vid_restart", RESTRICT_LOCAL); return false; } if (vk.neednewswapchain && !vk.frame) { #ifdef MULTITHREAD //kill the thread if (vk.submitthread) { Sys_LockConditional(vk.submitcondition); //annoying, but required for it to be reliable with respect to other things. Sys_ConditionSignal(vk.submitcondition); Sys_UnlockConditional(vk.submitcondition); Sys_WaitOnThread(vk.submitthread); vk.submitthread = NULL; } #endif //make sure any work is actually done BEFORE the swapchain gets destroyed while (vk.work) { Sys_LockConditional(vk.submitcondition); VK_Submit_DoWork(); Sys_UnlockConditional(vk.submitcondition); } if (vk.dopresent) vk.dopresent(NULL); vkDeviceWaitIdle(vk.device); VK_CreateSwapChain(); vk.neednewswapchain = false; #ifdef MULTITHREAD if (vk.allowsubmissionthread && (vk_submissionthread.ival || !*vk_submissionthread.string)) { vk.submitthread = Sys_CreateThread("vksubmission", VK_Submit_Thread, NULL, THREADP_HIGHEST, 0); } #endif } if (!VK_SCR_GrabBackBuffer()) return false; VKBE_Set2D(true); VKBE_SelectDLight(NULL, vec3_origin, NULL, 0); VK_PaintScreen(); if (R2D_Flush) R2D_Flush(); vkCmdEndRenderPass(vk.rendertarg->cbuf); fblayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; /*if (0) { vkscreencapture_t *capt = VK_AtFrameEnd(atframeend, sizeof(vkscreencapture_t)); VkImageMemoryBarrier imgbarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; VkBufferImageCopy region; imgbarrier.pNext = NULL; imgbarrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; imgbarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; imgbarrier.oldLayout = fblayout; imgbarrier.newLayout = fblayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; imgbarrier.image = vk.frame->backbuf->colour.image; imgbarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; imgbarrier.subresourceRange.baseMipLevel = 0; imgbarrier.subresourceRange.levelCount = 1; imgbarrier.subresourceRange.baseArrayLayer = 0; imgbarrier.subresourceRange.layerCount = 1; imgbarrier.srcQueueFamilyIndex = vk.queuefam[0]; imgbarrier.dstQueueFamilyIndex = vk.queuefam[0]; vkCmdPipelineBarrier(vk.frame->cbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, NULL, 0, NULL, 1, &imgbarrier); region.bufferOffset = 0; region.bufferRowLength = 0; //tightly packed region.bufferImageHeight = 0; //tightly packed region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; region.imageSubresource.mipLevel = 0; region.imageSubresource.baseArrayLayer = 0; region.imageSubresource.layerCount = 1; region.imageOffset.x = 0; region.imageOffset.y = 0; region.imageOffset.z = 0; region.imageExtent.width = capt->imagewidth = vk.frame->backbuf->colour.width; region.imageExtent.height = capt->imageheight = vk.frame->backbuf->colour.height; region.imageExtent.depth = 1; vkCmdCopyImageToBuffer(vk.frame->cbuf, vk.frame->backbuf->colour.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, 1, ®ion); }*/ if (vk.multisamplebits == VK_SAMPLE_COUNT_1_BIT) { VkImageMemoryBarrier imgbarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; imgbarrier.pNext = NULL; imgbarrier.srcAccessMask = /*VK_ACCESS_TRANSFER_READ_BIT|*/VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; imgbarrier.dstAccessMask = 0; imgbarrier.oldLayout = fblayout; imgbarrier.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; imgbarrier.image = vk.frame->backbuf->colour.image; imgbarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; imgbarrier.subresourceRange.baseMipLevel = 0; imgbarrier.subresourceRange.levelCount = 1; imgbarrier.subresourceRange.baseArrayLayer = 0; imgbarrier.subresourceRange.layerCount = 1; imgbarrier.srcQueueFamilyIndex = vk.queuefam[0]; imgbarrier.dstQueueFamilyIndex = vk.queuefam[1]; vkCmdPipelineBarrier(vk.rendertarg->cbuf, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, NULL, 0, NULL, 1, &imgbarrier); vk.rendertarg->colour.layout = imgbarrier.newLayout; } // vkCmdWriteTimestamp(vk.rendertarg->cbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, querypool, vk.bufferidx*2+1); vkEndCommandBuffer(vk.rendertarg->cbuf); VKBE_FlushDynamicBuffers(); { struct vk_presented *fw = Z_Malloc(sizeof(*fw)); fw->fw.Passed = VK_Presented; fw->fw.fence = vk.frame->finishedfence; fw->frame = vk.frame; //hand over any post-frame jobs to the frame in question. vk.frame->frameendjobs = vk.frameendjobs; vk.frameendjobs = NULL; VK_Submit_Work(vk.rendertarg->cbuf, vk.frame->acquiresemaphore, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, vk.frame->backbuf->presentsemaphore, vk.frame->finishedfence, vk.frame, &fw->fw); } //now would be a good time to do any compute work or lightmap updates... vk.frame = NULL; VK_FencedCheck(); VID_SwapBuffers(); #ifdef TEXTEDITOR if (editormodal) { //FIXME VK_SCR_GrabBackBuffer(); } #endif return true; } void VKBE_RenderToTextureUpdate2d(qboolean destchanged) { } static void VK_DestroyRenderPass(void) { int i; for (i = 0; i < countof(vk.renderpass); i++) { if (vk.renderpass[i] != VK_NULL_HANDLE) { vkDestroyRenderPass(vk.device, vk.renderpass[i], vkallocationcb); vk.renderpass[i] = VK_NULL_HANDLE; } } } static void VK_CreateRenderPass(void) { int pass; int numattachments; static VkAttachmentReference color_reference; static VkAttachmentReference depth_reference; static VkAttachmentReference resolve_reference; static VkAttachmentDescription attachments[3] = {{0}}; static VkSubpassDescription subpass = {0}; static VkRenderPassCreateInfo rp_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO}; //two render passes are compatible for piplines when they match exactly except for: //initial and final layouts in attachment descriptions. //load and store operations in attachment descriptions. //image layouts in attachment references. for (pass = 0; pass < 3; pass++) { if (vk.renderpass[pass] != VK_NULL_HANDLE) continue; numattachments = 0; if (vk.multisamplebits != VK_SAMPLE_COUNT_1_BIT) { resolve_reference.attachment = numattachments++; depth_reference.attachment = numattachments++; color_reference.attachment = numattachments++; } else { color_reference.attachment = numattachments++; depth_reference.attachment = numattachments++; resolve_reference.attachment = ~(uint32_t)0; } color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; depth_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; resolve_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; attachments[color_reference.attachment].format = vk.backbufformat; attachments[color_reference.attachment].samples = vk.multisamplebits; // attachments[color_reference.attachment].loadOp = pass?VK_ATTACHMENT_LOAD_OP_LOAD:VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[color_reference.attachment].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[color_reference.attachment].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[color_reference.attachment].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[color_reference.attachment].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; attachments[color_reference.attachment].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; attachments[depth_reference.attachment].format = vk.depthformat; attachments[depth_reference.attachment].samples = vk.multisamplebits; // attachments[depth_reference.attachment].loadOp = pass?VK_ATTACHMENT_LOAD_OP_LOAD:VK_ATTACHMENT_LOAD_OP_CLEAR; attachments[depth_reference.attachment].storeOp = VK_ATTACHMENT_STORE_OP_STORE;//VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[depth_reference.attachment].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[depth_reference.attachment].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[depth_reference.attachment].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; attachments[depth_reference.attachment].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; if (resolve_reference.attachment != ~(uint32_t)0) { attachments[resolve_reference.attachment].format = vk.backbufformat; attachments[resolve_reference.attachment].samples = VK_SAMPLE_COUNT_1_BIT; attachments[resolve_reference.attachment].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[resolve_reference.attachment].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[resolve_reference.attachment].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[resolve_reference.attachment].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[resolve_reference.attachment].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; attachments[resolve_reference.attachment].finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; } subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass.flags = 0; subpass.inputAttachmentCount = 0; subpass.pInputAttachments = NULL; subpass.colorAttachmentCount = 1; subpass.pColorAttachments = &color_reference; subpass.pResolveAttachments = (resolve_reference.attachment != ~(uint32_t)0)?&resolve_reference:NULL; subpass.pDepthStencilAttachment = &depth_reference; subpass.preserveAttachmentCount = 0; subpass.pPreserveAttachments = NULL; rp_info.attachmentCount = numattachments; rp_info.pAttachments = attachments; rp_info.subpassCount = 1; rp_info.pSubpasses = &subpass; rp_info.dependencyCount = 0; rp_info.pDependencies = NULL; if (pass == 0) { //nothing cleared, both are just re-loaded. attachments[color_reference.attachment].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; attachments[depth_reference.attachment].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; } else if (pass == 1) { //depth cleared, colour is whatever. attachments[color_reference.attachment].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[depth_reference.attachment].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; } else { //both cleared attachments[color_reference.attachment].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; attachments[depth_reference.attachment].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; } VkAssert(vkCreateRenderPass(vk.device, &rp_info, vkallocationcb, &vk.renderpass[pass])); } } void VK_DoPresent(struct vkframe *theframe) { VkResult err; uint32_t framenum; VkPresentInfoKHR presinfo = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR}; if (!theframe) return; //used to ensure that the queue is flushed at shutdown framenum = theframe->backbuf - vk.backbufs; presinfo.waitSemaphoreCount = 1; presinfo.pWaitSemaphores = &theframe->backbuf->presentsemaphore; presinfo.swapchainCount = 1; presinfo.pSwapchains = &vk.swapchain; presinfo.pImageIndices = &framenum; { RSpeedMark(); err = vkQueuePresentKHR(vk.queue_present, &presinfo); RSpeedEnd(RSPEED_PRESENT); } { RSpeedMark(); if (err) { if (err == VK_SUBOPTIMAL_KHR) Con_DPrintf("vkQueuePresentKHR: VK_SUBOPTIMAL_KHR\n"); else if (err == VK_ERROR_OUT_OF_DATE_KHR) Con_DPrintf("vkQueuePresentKHR: VK_ERROR_OUT_OF_DATE_KHR\n"); else Con_Printf("ERROR: vkQueuePresentKHR: %i\n", err); vk.neednewswapchain = true; } else { err = vkAcquireNextImageKHR(vk.device, vk.swapchain, 0, vk.acquiresemaphores[vk.aquirelast%ACQUIRELIMIT], vk.acquirefences[vk.aquirelast%ACQUIRELIMIT], &vk.acquirebufferidx[vk.aquirelast%ACQUIRELIMIT]); if (err) { Con_Printf("ERROR: vkAcquireNextImageKHR: %i\n", err); vk.neednewswapchain = true; vk.devicelost |= (err == VK_ERROR_DEVICE_LOST); } vk.aquirelast++; } RSpeedEnd(RSPEED_ACQUIRE); } } static void VK_Submit_DoWork(void) { VkCommandBuffer cbuf[64]; VkSemaphore wsem[64]; VkPipelineStageFlags wsemstageflags[64]; VkSemaphore ssem[64]; VkQueue subqueue = NULL; VkSubmitInfo subinfo[64]; unsigned int subcount = 0; struct vkwork_s *work; struct vkframe *present = NULL; VkFence waitfence = VK_NULL_HANDLE; VkResult err; struct vk_fencework *fencedwork = NULL; qboolean errored = false; while(vk.work && !present && !waitfence && !fencedwork && subcount < countof(subinfo)) { work = vk.work; if (subcount && subqueue != work->queue) break; subinfo[subcount].sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; subinfo[subcount].pNext = NULL; subinfo[subcount].waitSemaphoreCount = work->semwait?1:0; subinfo[subcount].pWaitSemaphores = &wsem[subcount]; wsem[subcount] = work->semwait; subinfo[subcount].pWaitDstStageMask = &wsemstageflags[subcount]; wsemstageflags[subcount] = work->semwaitstagemask; subinfo[subcount].commandBufferCount = work->cmdbuf?1:0; subinfo[subcount].pCommandBuffers = &cbuf[subcount]; cbuf[subcount] = work->cmdbuf; subinfo[subcount].signalSemaphoreCount = work->semsignal?1:0; subinfo[subcount].pSignalSemaphores = &ssem[subcount]; ssem[subcount] = work->semsignal; waitfence = work->fencesignal; fencedwork = work->fencedwork; subqueue = work->queue; subcount++; present = work->present; vk.work = work->next; Z_Free(work); } Sys_UnlockConditional(vk.submitcondition); //don't block people giving us work while we're occupied if (subcount || waitfence) { RSpeedMark(); err = vkQueueSubmit(subqueue, subcount, subinfo, waitfence); if (err) { Con_Printf("ERROR: vkQueueSubmit: %i\n", err); errored = vk.neednewswapchain = true; vk.devicelost |= (err==VK_ERROR_DEVICE_LOST); } RSpeedEnd(RSPEED_SUBMIT); } if (present && !errored) { vk.dopresent(present); } Sys_LockConditional(vk.submitcondition); if (fencedwork) { //this is used for loading and cleaning up things after the gpu has consumed it. if (vk.fencework_last) { vk.fencework_last->next = fencedwork; vk.fencework_last = fencedwork; } else vk.fencework_last = vk.fencework = fencedwork; } } #ifdef MULTITHREAD //oh look. a thread. //nvidia's drivers seem to like doing a lot of blocking in queuesubmit and queuepresent(despite the whole QUEUE thing). //so thread this work so the main thread doesn't have to block so much. int VK_Submit_Thread(void *arg) { Sys_LockConditional(vk.submitcondition); while(!vk.neednewswapchain) { if (!vk.work) Sys_ConditionWait(vk.submitcondition); VK_Submit_DoWork(); } Sys_UnlockConditional(vk.submitcondition); return true; } #endif void VK_Submit_Work(VkCommandBuffer cmdbuf, VkSemaphore semwait, VkPipelineStageFlags semwaitstagemask, VkSemaphore semsignal, VkFence fencesignal, struct vkframe *presentframe, struct vk_fencework *fencedwork) { struct vkwork_s *work = Z_Malloc(sizeof(*work)); struct vkwork_s **link; work->queue = vk.queue_render; work->cmdbuf = cmdbuf; work->semwait = semwait; work->semwaitstagemask = semwaitstagemask; work->semsignal = semsignal; work->fencesignal = fencesignal; work->present = presentframe; work->fencedwork = fencedwork; Sys_LockConditional(vk.submitcondition); #ifdef MULTITHREAD if (vk.neednewswapchain && vk.submitthread) { //if we're trying to kill the submission thread, don't post work to it - instead wait for it to die cleanly then do it ourselves. Sys_ConditionSignal(vk.submitcondition); Sys_UnlockConditional(vk.submitcondition); Sys_WaitOnThread(vk.submitthread); vk.submitthread = NULL; Sys_LockConditional(vk.submitcondition); //annoying, but required for it to be reliable with respect to other things. } #endif //add it on the end in a lazy way. for (link = &vk.work; *link; link = &(*link)->next) ; *link = work; #ifdef MULTITHREAD if (vk.submitthread) Sys_ConditionSignal(vk.submitcondition); else #endif VK_Submit_DoWork(); Sys_UnlockConditional(vk.submitcondition); } void VK_Submit_Sync(void) { Sys_LockConditional(vk.submitcondition); //FIXME: vkDeviceWaitIdle(vk.device); //just in case Sys_UnlockConditional(vk.submitcondition); } void VK_CheckTextureFormats(void) { struct { unsigned int pti; VkFormat vulkan; unsigned int needextra; } texfmt[] = { {PTI_RGBA8, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_RGBX8, VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BGRA8, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BGRX8, VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_RGB8, VK_FORMAT_R8G8B8_UNORM, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BGR8, VK_FORMAT_B8G8R8_UNORM, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_RGBA8_SRGB, VK_FORMAT_R8G8B8A8_SRGB, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT|VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT}, {PTI_RGBX8_SRGB, VK_FORMAT_R8G8B8A8_SRGB, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT|VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT}, {PTI_BGRA8_SRGB, VK_FORMAT_B8G8R8A8_SRGB, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT|VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT}, {PTI_BGRX8_SRGB, VK_FORMAT_B8G8R8A8_SRGB, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT|VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT}, {PTI_E5BGR9, VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT|VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT}, {PTI_A2BGR10, VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT|VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT}, {PTI_RGB565, VK_FORMAT_R5G6B5_UNORM_PACK16, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_RGBA4444, VK_FORMAT_R4G4B4A4_UNORM_PACK16, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, // {PTI_ARGB4444, VK_FORMAT_A4R4G4B4_UNORM_PACK16, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_RGBA5551, VK_FORMAT_R5G5B5A1_UNORM_PACK16, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ARGB1555, VK_FORMAT_A1R5G5B5_UNORM_PACK16, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_RGBA16F, VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT|VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT|VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT}, {PTI_RGBA32F, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT|VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT|VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT}, {PTI_R8, VK_FORMAT_R8_UNORM, VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT}, {PTI_RG8, VK_FORMAT_R8G8_UNORM, VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT}, {PTI_R8_SNORM, VK_FORMAT_R8_SNORM, VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT}, {PTI_RG8_SNORM, VK_FORMAT_R8G8_SNORM, VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT}, {PTI_DEPTH16, VK_FORMAT_D16_UNORM, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT}, {PTI_DEPTH24, VK_FORMAT_X8_D24_UNORM_PACK32, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT}, {PTI_DEPTH32, VK_FORMAT_D32_SFLOAT, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT}, {PTI_DEPTH24_8, VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT}, {PTI_BC1_RGB, VK_FORMAT_BC1_RGB_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC1_RGBA, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC2_RGBA, VK_FORMAT_BC2_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC3_RGBA, VK_FORMAT_BC3_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC1_RGB_SRGB, VK_FORMAT_BC1_RGB_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC1_RGBA_SRGB, VK_FORMAT_BC1_RGBA_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC2_RGBA_SRGB, VK_FORMAT_BC2_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC3_RGBA_SRGB, VK_FORMAT_BC3_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC4_R8, VK_FORMAT_BC4_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC4_R8_SNORM, VK_FORMAT_BC4_SNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC5_RG8, VK_FORMAT_BC5_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC5_RG8_SNORM, VK_FORMAT_BC5_SNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC6_RGB_UFLOAT, VK_FORMAT_BC6H_UFLOAT_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC6_RGB_SFLOAT, VK_FORMAT_BC6H_SFLOAT_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC7_RGBA, VK_FORMAT_BC7_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_BC7_RGBA_SRGB, VK_FORMAT_BC7_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ETC1_RGB8, VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, //vulkan doesn't support etc1 (but that's okay, because etc2 is a superset). {PTI_ETC2_RGB8, VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ETC2_RGB8A1, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK,VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ETC2_RGB8A8, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK,VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ETC2_RGB8_SRGB, VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ETC2_RGB8A1_SRGB, VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ETC2_RGB8A8_SRGB, VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_EAC_R11, VK_FORMAT_EAC_R11_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_EAC_R11_SNORM, VK_FORMAT_EAC_R11_SNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_EAC_RG11, VK_FORMAT_EAC_R11G11_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_EAC_RG11_SNORM, VK_FORMAT_EAC_R11G11_SNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_4X4, VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_4X4_SRGB, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_5X4, VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_5X4_SRGB, VK_FORMAT_ASTC_5x4_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_5X5, VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_5X5_SRGB, VK_FORMAT_ASTC_5x5_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_6X5, VK_FORMAT_ASTC_6x5_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_6X5_SRGB, VK_FORMAT_ASTC_6x5_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_6X6, VK_FORMAT_ASTC_6x6_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_6X6_SRGB, VK_FORMAT_ASTC_6x6_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_8X5, VK_FORMAT_ASTC_8x5_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_8X5_SRGB, VK_FORMAT_ASTC_8x5_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_8X6, VK_FORMAT_ASTC_8x6_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_8X6_SRGB, VK_FORMAT_ASTC_8x6_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_8X8, VK_FORMAT_ASTC_8x8_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_8X8_SRGB, VK_FORMAT_ASTC_8x8_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_10X5, VK_FORMAT_ASTC_10x5_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_10X5_SRGB, VK_FORMAT_ASTC_10x5_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_10X6, VK_FORMAT_ASTC_10x6_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_10X6_SRGB, VK_FORMAT_ASTC_10x6_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_10X8, VK_FORMAT_ASTC_10x8_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_10X8_SRGB, VK_FORMAT_ASTC_10x8_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_10X10, VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_10X10_SRGB, VK_FORMAT_ASTC_10x10_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_12X10, VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_12X10_SRGB, VK_FORMAT_ASTC_12x10_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_12X12, VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, {PTI_ASTC_12X12_SRGB, VK_FORMAT_ASTC_12x12_SRGB_BLOCK, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT}, }; unsigned int i; VkPhysicalDeviceProperties props; vkGetPhysicalDeviceProperties(vk.gpu, &props); vk.limits = props.limits; sh_config.texture2d_maxsize = props.limits.maxImageDimension2D; sh_config.texturecube_maxsize = props.limits.maxImageDimensionCube; for (i = 0; i < countof(texfmt); i++) { unsigned int need = /*VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT |*/ texfmt[i].needextra; VkFormatProperties fmt; vkGetPhysicalDeviceFormatProperties(vk.gpu, texfmt[i].vulkan, &fmt); if ((fmt.optimalTilingFeatures & need) == need) sh_config.texfmt[texfmt[i].pti] = true; } } //initialise the vulkan instance, context, device, etc. qboolean VK_Init(rendererstate_t *info, const char **sysextnames, qboolean (*createSurface)(void), void (*dopresent)(struct vkframe *theframe)) { VkQueueFamilyProperties *queueprops; VkResult err; VkApplicationInfo app; VkInstanceCreateInfo inst_info; int gpuidx = 0; const char *extensions[8]; uint32_t extensions_count = 0; //device extensions that want to enable //initialised in reverse order, so superseeded should name later extensions. struct { qboolean *flag; const char *name; cvar_t *var; qboolean def; qboolean *superseeded; //if this is set then the extension will not be enabled after all const char *warningtext; //printed if the extension is requested but not supported by the device qboolean supported; } knowndevexts[] = { {&vk.khr_swapchain, VK_KHR_SWAPCHAIN_EXTENSION_NAME, NULL, true, NULL, " Nothing will be drawn!"}, {&vk.nv_glsl_shader, VK_NV_GLSL_SHADER_EXTENSION_NAME, &vk_nv_glsl_shader, false, NULL, " Direct use of glsl is not supported."}, {&vk.khr_get_memory_requirements2, VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME,&vk_khr_get_memory_requirements2,true, NULL, NULL}, {&vk.khr_dedicated_allocation, VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME, &vk_khr_dedicated_allocation, true, NULL, NULL}, {&vk.khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, &vk_khr_push_descriptor, true, NULL, NULL}, {&vk.amd_rasterization_order, VK_AMD_RASTERIZATION_ORDER_EXTENSION_NAME, &vk_amd_rasterization_order, false, NULL, NULL}, }; size_t e; for (e = 0; e < countof(knowndevexts); e++) *knowndevexts[e].flag = false; #ifdef MULTITHREAD vk.allowsubmissionthread = true; #endif vk.neednewswapchain = true; vk.triplebuffer = info->triplebuffer; vk.vsync = info->wait; vk.dopresent = dopresent?dopresent:VK_DoPresent; vk.max_anistophy_limit = 1.0; memset(&sh_config, 0, sizeof(sh_config)); //get second set of pointers... (instance-level) #ifdef VK_NO_PROTOTYPES if (!vkGetInstanceProcAddr) { Con_Printf("vkGetInstanceProcAddr is null\n"); return false; } #define VKFunc(n) vk##n = (PFN_vk##n)vkGetInstanceProcAddr(VK_NULL_HANDLE, "vk"#n); VKInstFuncs #undef VKFunc #endif //try and enable some instance extensions... { qboolean surfext = false; uint32_t count, i, j; VkExtensionProperties *ext; #ifdef VK_EXT_debug_utils qboolean havedebugutils = false; #endif #ifdef VK_EXT_debug_report qboolean havedebugreport = false; #endif vkEnumerateInstanceExtensionProperties(NULL, &count, NULL); ext = malloc(sizeof(*ext)*count); vkEnumerateInstanceExtensionProperties(NULL, &count, ext); for (i = 0; i < count && extensions_count < countof(extensions); i++) { #ifdef VK_EXT_debug_utils if (!strcmp(ext[i].extensionName, VK_EXT_DEBUG_REPORT_EXTENSION_NAME)) havedebugutils = true; #endif #ifdef VK_EXT_debug_report if (!strcmp(ext[i].extensionName, VK_EXT_DEBUG_REPORT_EXTENSION_NAME)) havedebugreport = true; #endif if (!strcmp(ext[i].extensionName, VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME)) extensions[extensions_count++] = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME; else if (sysextnames && !strcmp(ext[i].extensionName, VK_KHR_SURFACE_EXTENSION_NAME)) { extensions[extensions_count++] = VK_KHR_SURFACE_EXTENSION_NAME; surfext = true; } else if (sysextnames) { for (j = 0; sysextnames[j]; j++) { if (!strcmp(ext[i].extensionName, sysextnames[j])) { extensions[extensions_count++] = sysextnames[j]; vk.khr_swapchain = true; } } } } free(ext); if (!vk_debug.ival) ; #ifdef VK_EXT_debug_utils else if (havedebugutils) extensions[extensions_count++] = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; #endif #ifdef VK_EXT_debug_report else if (havedebugreport) extensions[extensions_count++] = VK_EXT_DEBUG_REPORT_EXTENSION_NAME; #endif if (sysextnames && (!vk.khr_swapchain || !surfext)) { Con_Printf("Vulkan instance lacks driver support for %s\n", sysextnames[0]); return false; } } #define ENGINEVERSION 1 memset(&app, 0, sizeof(app)); app.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; app.pNext = NULL; app.pApplicationName = NULL; app.applicationVersion = 0; app.pEngineName = FULLENGINENAME; app.engineVersion = ENGINEVERSION; app.apiVersion = VK_MAKE_VERSION(1, 0, 2); memset(&inst_info, 0, sizeof(inst_info)); inst_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; inst_info.pApplicationInfo = &app; inst_info.enabledLayerCount = vklayercount; inst_info.ppEnabledLayerNames = vklayerlist; inst_info.enabledExtensionCount = extensions_count; inst_info.ppEnabledExtensionNames = extensions; err = vkCreateInstance(&inst_info, vkallocationcb, &vk.instance); switch(err) { case VK_ERROR_INCOMPATIBLE_DRIVER: Con_Printf("VK_ERROR_INCOMPATIBLE_DRIVER: please install an appropriate vulkan driver\n"); return false; case VK_ERROR_EXTENSION_NOT_PRESENT: Con_Printf("VK_ERROR_EXTENSION_NOT_PRESENT: something on a system level is probably misconfigured\n"); return false; case VK_ERROR_LAYER_NOT_PRESENT: Con_Printf("VK_ERROR_LAYER_NOT_PRESENT: requested layer is not known/usable\n"); return false; default: Con_Printf("Unknown vulkan instance creation error: %x\n", err); return false; case VK_SUCCESS: break; } //third set of functions... #ifdef VK_NO_PROTOTYPES vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)vkGetInstanceProcAddr(vk.instance, "vkGetInstanceProcAddr"); #define VKFunc(n) vk##n = (PFN_vk##n)vkGetInstanceProcAddr(vk.instance, "vk"#n); VKInst2Funcs #undef VKFunc #endif //set up debug callbacks if (vk_debug.ival) { #ifdef VK_EXT_debug_utils vkCreateDebugUtilsMessengerEXT = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(vk.instance, "vkCreateDebugUtilsMessengerEXT"); vkDestroyDebugUtilsMessengerEXT = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(vk.instance, "vkDestroyDebugUtilsMessengerEXT"); if (vkCreateDebugUtilsMessengerEXT) { VkDebugUtilsMessengerCreateInfoEXT dbgCreateInfo; memset(&dbgCreateInfo, 0, sizeof(dbgCreateInfo)); dbgCreateInfo.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; dbgCreateInfo.pfnUserCallback = mydebugutilsmessagecallback; dbgCreateInfo.pUserData = NULL; dbgCreateInfo.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; dbgCreateInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; vkCreateDebugUtilsMessengerEXT(vk.instance, &dbgCreateInfo, vkallocationcb, &vk_debugucallback); } #endif #ifdef VK_EXT_debug_report vkCreateDebugReportCallbackEXT = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr(vk.instance, "vkCreateDebugReportCallbackEXT"); vkDestroyDebugReportCallbackEXT = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr(vk.instance, "vkDestroyDebugReportCallbackEXT"); if (vkCreateDebugReportCallbackEXT && vkDestroyDebugReportCallbackEXT) { VkDebugReportCallbackCreateInfoEXT dbgCreateInfo; memset(&dbgCreateInfo, 0, sizeof(dbgCreateInfo)); dbgCreateInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT; dbgCreateInfo.pfnCallback = mydebugreportcallback; dbgCreateInfo.pUserData = NULL; dbgCreateInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT | /* VK_DEBUG_REPORT_INFORMATION_BIT_EXT | */ VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT | VK_DEBUG_REPORT_DEBUG_BIT_EXT; vkCreateDebugReportCallbackEXT(vk.instance, &dbgCreateInfo, vkallocationcb, &vk_debugcallback); } #endif } //create the platform-specific surface createSurface(); //figure out which gpu we're going to use { uint32_t gpucount = 0, i; uint32_t bestpri = ~0u, pri; VkPhysicalDevice *devs; char *s = info->subrenderer; int wantdev = -1; if (*s) { if (!Q_strncasecmp(s, "GPU", 3)) s += 3; wantdev = strtoul(s, &s, 0); if (*s) //its a named device. wantdev = -1; } vkEnumeratePhysicalDevices(vk.instance, &gpucount, NULL); if (!gpucount) { Con_Printf("vulkan: no devices known!\n"); return false; } devs = malloc(sizeof(VkPhysicalDevice)*gpucount); vkEnumeratePhysicalDevices(vk.instance, &gpucount, devs); for (i = 0; i < gpucount; i++) { VkPhysicalDeviceProperties props; uint32_t j, queue_count; vkGetPhysicalDeviceProperties(devs[i], &props); vkGetPhysicalDeviceQueueFamilyProperties(devs[i], &queue_count, NULL); if (vk.khr_swapchain) { for (j = 0; j < queue_count; j++) { VkBool32 supportsPresent = false; VkAssert(vkGetPhysicalDeviceSurfaceSupportKHR(devs[i], j, vk.surface, &supportsPresent)); if (supportsPresent) break; //okay, this one should be usable } if (j == queue_count) { //no queues can present to that surface, so I guess we can't use that device Con_DPrintf("vulkan: ignoring device \"%s\" as it can't present to window\n", props.deviceName); continue; } } Con_DPrintf("Found Vulkan Device \"%s\"\n", props.deviceName); if (!vk.gpu) { gpuidx = i; vk.gpu = devs[i]; } switch(props.deviceType) { default: case VK_PHYSICAL_DEVICE_TYPE_OTHER: pri = 5; break; case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: pri = 2; break; case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: pri = 1; break; case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: pri = 3; break; case VK_PHYSICAL_DEVICE_TYPE_CPU: pri = 4; break; } if (wantdev >= 0) { if (wantdev == i) pri = 0; } else { if (!Q_strcasecmp(props.deviceName, info->subrenderer)) pri = 0; } if (pri < bestpri) { gpuidx = i; vk.gpu = devs[gpuidx]; bestpri = pri; } } free(devs); if (!vk.gpu) { Con_Printf("vulkan: unable to pick a usable device\n"); return false; } } { char *vendor, *type; VkPhysicalDeviceProperties props; vkGetPhysicalDeviceProperties(vk.gpu, &props); switch(props.vendorID) { //explicit registered vendors case 0x10001: vendor = "Vivante"; break; case 0x10002: vendor = "VeriSilicon"; break; //pci vendor ids //there's a lot of pci vendors, some even still exist, but not all of them actually have 3d hardware. //many of these probably won't even be used... Oh well. //anyway, here's some of the ones that are listed case 0x1002: vendor = "AMD"; break; case 0x10DE: vendor = "NVIDIA"; break; case 0x8086: vendor = "Intel"; break; //cute case 0x13B5: vendor = "ARM"; break; case 0x5143: vendor = "Qualcomm"; break; case 0x1AEE: vendor = "Imagination";break; case 0x1957: vendor = "Freescale"; break; //I really have no idea who makes mobile gpus nowadays, but lets make some guesses. case 0x1AE0: vendor = "Google"; break; case 0x5333: vendor = "S3"; break; case 0xA200: vendor = "NEC"; break; case 0x0A5C: vendor = "Broadcom"; break; case 0x1131: vendor = "NXP"; break; case 0x1099: vendor = "Samsung"; break; case 0x10C3: vendor = "Samsung"; break; case 0x11E2: vendor = "Samsung"; break; case 0x1249: vendor = "Samsung"; break; default: vendor = va("VEND_%x", props.vendorID); break; } switch(props.deviceType) { default: case VK_PHYSICAL_DEVICE_TYPE_OTHER: type = "(other)"; break; case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: type = "integrated"; break; case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: type = "discrete"; break; case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: type = "virtual"; break; case VK_PHYSICAL_DEVICE_TYPE_CPU: type = "software"; break; } Con_Printf("Vulkan %u.%u.%u: GPU%i %s %s %s (%u.%u.%u)\n", VK_VERSION_MAJOR(props.apiVersion), VK_VERSION_MINOR(props.apiVersion), VK_VERSION_PATCH(props.apiVersion), gpuidx, type, vendor, props.deviceName, VK_VERSION_MAJOR(props.driverVersion), VK_VERSION_MINOR(props.driverVersion), VK_VERSION_PATCH(props.driverVersion) ); } //figure out which of the device's queue's we're going to use { uint32_t queue_count, i; vkGetPhysicalDeviceQueueFamilyProperties(vk.gpu, &queue_count, NULL); queueprops = malloc(sizeof(VkQueueFamilyProperties)*queue_count); //Oh how I wish I was able to use C99. vkGetPhysicalDeviceQueueFamilyProperties(vk.gpu, &queue_count, queueprops); vk.queuefam[0] = ~0u; vk.queuefam[1] = ~0u; vk.queuenum[0] = 0; vk.queuenum[1] = 0; /* //try to find a 'dedicated' present queue for (i = 0; i < queue_count; i++) { VkBool32 supportsPresent = FALSE; VkAssert(vkGetPhysicalDeviceSurfaceSupportKHR(vk.gpu, i, vk.surface, &supportsPresent)); if (supportsPresent && !(queueprops[i].queueFlags & VK_QUEUE_GRAPHICS_BIT)) { vk.queuefam[1] = i; break; } } if (vk.queuefam[1] != ~0u) { //try to find a good graphics queue for (i = 0; i < queue_count; i++) { if (queueprops[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { vk.queuefam[0] = i; break; } } } else*/ { for (i = 0; i < queue_count; i++) { VkBool32 supportsPresent = false; if (!vk.khr_swapchain) supportsPresent = true; //won't be used anyway. else VkAssert(vkGetPhysicalDeviceSurfaceSupportKHR(vk.gpu, i, vk.surface, &supportsPresent)); if ((queueprops[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) && supportsPresent) { vk.queuefam[0] = i; vk.queuefam[1] = i; break; } else if (vk.queuefam[0] == ~0u && (queueprops[i].queueFlags & VK_QUEUE_GRAPHICS_BIT)) vk.queuefam[0] = i; else if (vk.queuefam[1] == ~0u && supportsPresent) vk.queuefam[1] = i; } } if (vk.queuefam[0] == ~0u || vk.queuefam[1] == ~0u) { free(queueprops); Con_Printf("unable to find suitable queues\n"); return false; } } { uint32_t extcount = 0; VkExtensionProperties *ext; vkEnumerateDeviceExtensionProperties(vk.gpu, NULL, &extcount, NULL); ext = malloc(sizeof(*ext)*extcount); vkEnumerateDeviceExtensionProperties(vk.gpu, NULL, &extcount, ext); while (extcount --> 0) { for (e = 0; e < countof(knowndevexts); e++) { if (!strcmp(ext[extcount].extensionName, knowndevexts[e].name)) { if (knowndevexts[e].var) *knowndevexts[e].flag = !!knowndevexts[e].var->ival || (!*knowndevexts[e].var->string && knowndevexts[e].def); knowndevexts[e].supported = true; } } } free(ext); } { const char *devextensions[1+countof(knowndevexts)]; size_t numdevextensions = 0; float queue_priorities[2] = {0.8, 1.0}; VkDeviceQueueCreateInfo queueinf[2] = {{VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO},{VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO}}; VkDeviceCreateInfo devinf = {VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO}; VkPhysicalDeviceFeatures features; VkPhysicalDeviceFeatures avail; memset(&features, 0, sizeof(features)); vkGetPhysicalDeviceFeatures(vk.gpu, &avail); //try to enable whatever we can use, if we can. features.robustBufferAccess = avail.robustBufferAccess; features.textureCompressionBC = avail.textureCompressionBC; features.textureCompressionETC2 = avail.textureCompressionETC2; features.textureCompressionASTC_LDR = avail.textureCompressionASTC_LDR; features.samplerAnisotropy = avail.samplerAnisotropy; features.geometryShader = avail.geometryShader; features.tessellationShader = avail.tessellationShader; //Add in the extensions we support for (e = 0; e < countof(knowndevexts); e++) { //prints are to let the user know what's going on. only warn if its explicitly enabled if (knowndevexts[e].superseeded && *knowndevexts[e].superseeded) { Con_DPrintf("Superseeded %s.\n", knowndevexts[e].name); *knowndevexts[e].flag = false; } else if (*knowndevexts[e].flag) { Con_DPrintf("Using %s.\n", knowndevexts[e].name); devextensions[numdevextensions++] = knowndevexts[e].name; } else if (knowndevexts[e].var && knowndevexts[e].var->ival) Con_Printf("unable to enable %s extension.%s\n", knowndevexts[e].name, knowndevexts[e].warningtext?knowndevexts[e].warningtext:""); else if (knowndevexts[e].supported) Con_DPrintf("Ignoring %s.\n", knowndevexts[e].name); else Con_DPrintf("Unavailable %s.\n", knowndevexts[e].name); } queueinf[0].pNext = NULL; queueinf[0].queueFamilyIndex = vk.queuefam[0]; queueinf[0].queueCount = 1; queueinf[0].pQueuePriorities = &queue_priorities[0]; queueinf[1].pNext = NULL; queueinf[1].queueFamilyIndex = vk.queuefam[1]; queueinf[1].queueCount = 1; queueinf[1].pQueuePriorities = &queue_priorities[1]; if (vk.queuefam[0] == vk.queuefam[1]) { devinf.queueCreateInfoCount = 1; if (queueprops[queueinf[0].queueFamilyIndex].queueCount >= 2 && vk_dualqueue.ival) { queueinf[0].queueCount = 2; vk.queuenum[1] = 1; Con_DPrintf("Using duel queue\n"); } else { queueinf[0].queueCount = 1; if (vk.khr_swapchain) vk.dopresent = VK_DoPresent; //can't split submit+present onto different queues, so do these on a single thread. Con_DPrintf("Using single queue\n"); } } else { devinf.queueCreateInfoCount = 2; Con_DPrintf("Using separate queue families\n"); } free(queueprops); devinf.pQueueCreateInfos = queueinf; devinf.enabledLayerCount = vklayercount; devinf.ppEnabledLayerNames = vklayerlist; devinf.enabledExtensionCount = numdevextensions; devinf.ppEnabledExtensionNames = devextensions; devinf.pEnabledFeatures = &features; #if 0 if (vkEnumeratePhysicalDeviceGroupsKHR && vk_afr.ival) { //'Every physical device must be in exactly one device group'. So we can just use the first group that lists it and automatically get AFR. uint32_t gpugroups = 0; VkDeviceGroupDeviceCreateInfoKHX dgdci = {VK_STRUCTURE_TYPE_DEVICE_GROUP_DEVICE_CREATE_INFO_KHR}; VkPhysicalDeviceGroupPropertiesKHR *groups; vkEnumeratePhysicalDeviceGroupsKHR(vk.instance, &gpugroups, NULL); groups = malloc(sizeof(*groups)*gpugroups); vkEnumeratePhysicalDeviceGroupsKHR(vk.instance, &gpugroups, groups); for (i = 0; i < gpugroups; i++) { for (j = 0; j < groups[i].physicalDeviceCount; j++) if (groups[i].physicalDevices[j] == vk.gpu) { dgdci.physicalDeviceCount = groups[i].physicalDeviceCount; dgdci.pPhysicalDevices = groups[i].physicalDevices; break; } } if (dgdci.physicalDeviceCount > 1) { vk.subdevices = dgdci.physicalDeviceCount; dgdci.pNext = devinf.pNext; devinf.pNext = &dgdci; } err = vkCreateDevice(vk.gpu, &devinf, NULL, &vk.device); free(groups); } else #endif err = vkCreateDevice(vk.gpu, &devinf, NULL, &vk.device); switch(err) { case VK_ERROR_INCOMPATIBLE_DRIVER: Con_Printf("VK_ERROR_INCOMPATIBLE_DRIVER: please install an appropriate vulkan driver\n"); return false; case VK_ERROR_EXTENSION_NOT_PRESENT: Con_Printf("VK_ERROR_EXTENSION_NOT_PRESENT: something on a system level is probably misconfigured\n"); return false; default: Con_Printf("Unknown vulkan device creation error: %x\n", err); return false; case VK_SUCCESS: break; } } #ifdef VK_NO_PROTOTYPES vkGetDeviceProcAddr = (PFN_vkGetDeviceProcAddr)vkGetInstanceProcAddr(vk.instance, "vkGetDeviceProcAddr"); #define VKFunc(n) vk##n = (PFN_vk##n)vkGetDeviceProcAddr(vk.device, "vk"#n); VKDevFuncs #undef VKFunc #endif vkGetDeviceQueue(vk.device, vk.queuefam[0], vk.queuenum[0], &vk.queue_render); vkGetDeviceQueue(vk.device, vk.queuefam[1], vk.queuenum[1], &vk.queue_present); vkGetPhysicalDeviceMemoryProperties(vk.gpu, &vk.memory_properties); { VkCommandPoolCreateInfo cpci = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO}; cpci.queueFamilyIndex = vk.queuefam[0]; cpci.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT|VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; VkAssert(vkCreateCommandPool(vk.device, &cpci, vkallocationcb, &vk.cmdpool)); } sh_config.progpath = NULL; sh_config.blobpath = "spirv"; sh_config.shadernamefmt = NULL;//".spv"; if (vk.nv_glsl_shader) { sh_config.progpath = "glsl/%s.glsl"; sh_config.shadernamefmt = "%s_glsl"; } sh_config.progs_supported = true; sh_config.progs_required = true; sh_config.minver = -1; sh_config.maxver = -1; sh_config.texture_allow_block_padding = true; sh_config.texture_non_power_of_two = true; //is this always true? sh_config.texture_non_power_of_two_pic = true; //probably true... sh_config.npot_rounddown = false; sh_config.tex_env_combine = false; //fixme: figure out what this means... sh_config.nv_tex_env_combine4 = false; //fixme: figure out what this means... sh_config.env_add = false; //fixme: figure out what this means... sh_config.can_mipcap = true; sh_config.havecubemaps = true; VK_CheckTextureFormats(); sh_config.pDeleteProg = NULL; sh_config.pLoadBlob = NULL; if (vk.nv_glsl_shader) sh_config.pCreateProgram = VK_LoadGLSL; else sh_config.pCreateProgram = NULL; sh_config.pValidateProgram = NULL; sh_config.pProgAutoFields = NULL; if (sh_config.texfmt[PTI_DEPTH32]) vk.depthformat = VK_FORMAT_D32_SFLOAT; else if (sh_config.texfmt[PTI_DEPTH24]) vk.depthformat = VK_FORMAT_X8_D24_UNORM_PACK32; else if (sh_config.texfmt[PTI_DEPTH24_8]) vk.depthformat = VK_FORMAT_D24_UNORM_S8_UINT; else //16bit depth is guarenteed in vulkan vk.depthformat = VK_FORMAT_D16_UNORM; #ifdef MULTITHREAD vk.submitcondition = Sys_CreateConditional(); #endif { VkPipelineCacheCreateInfo pci = {VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO}; qofs_t size = 0; pci.pInitialData = FS_MallocFile("vulkan.pcache", FS_ROOT, &size); pci.initialDataSize = size; VkAssert(vkCreatePipelineCache(vk.device, &pci, vkallocationcb, &vk.pipelinecache)); FS_FreeFile((void*)pci.pInitialData); } if (VK_CreateSwapChain()) { vk.neednewswapchain = false; #ifdef MULTITHREAD if (vk.allowsubmissionthread && (vk_submissionthread.ival || !*vk_submissionthread.string)) { vk.submitthread = Sys_CreateThread("vksubmission", VK_Submit_Thread, NULL, THREADP_HIGHEST, 0); } #endif } if (info->srgb != 1 && (vid.flags & VID_SRGB_FB)) vid.flags |= VID_SRGBAWARE; return true; } void VK_Shutdown(void) { uint32_t i; VK_DestroySwapChain(); for (i = 0; i < countof(postproc); i++) VKBE_RT_Gen(&postproc[i], 0, 0, false, RT_IMAGEFLAGS); VKBE_RT_Gen_Cube(&vk_rt_cubemap, 0, false); VK_R_BloomShutdown(); if (vk.cmdpool) vkDestroyCommandPool(vk.device, vk.cmdpool, vkallocationcb); VK_DestroyRenderPass(); if (vk.pipelinecache) { size_t size; if (VK_SUCCESS == vkGetPipelineCacheData(vk.device, vk.pipelinecache, &size, NULL)) { void *ptr = Z_Malloc(size); //valgrind says nvidia isn't initialising this. if (VK_SUCCESS == vkGetPipelineCacheData(vk.device, vk.pipelinecache, &size, ptr)) FS_WriteFile("vulkan.pcache", ptr, size, FS_ROOT); Z_Free(ptr); } vkDestroyPipelineCache(vk.device, vk.pipelinecache, vkallocationcb); } while(vk.mempools) { void *l; vkFreeMemory(vk.device, vk.mempools->memory, vkallocationcb); l = vk.mempools; vk.mempools = vk.mempools->next; Z_Free(l); } if (vk.device) vkDestroyDevice(vk.device, vkallocationcb); #ifdef VK_EXT_debug_utils if (vk_debugucallback) { vkDestroyDebugUtilsMessengerEXT(vk.instance, vk_debugucallback, vkallocationcb); vk_debugucallback = VK_NULL_HANDLE; } #endif #ifdef VK_EXT_debug_report if (vk_debugcallback) { vkDestroyDebugReportCallbackEXT(vk.instance, vk_debugcallback, vkallocationcb); vk_debugcallback = VK_NULL_HANDLE; } #endif if (vk.surface) vkDestroySurfaceKHR(vk.instance, vk.surface, vkallocationcb); if (vk.instance) vkDestroyInstance(vk.instance, vkallocationcb); #ifdef MULTITHREAD if (vk.submitcondition) Sys_DestroyConditional(vk.submitcondition); #endif memset(&vk, 0, sizeof(vk)); #ifdef VK_NO_PROTOTYPES #define VKFunc(n) vk##n = NULL; VKFuncs #undef VKFunc #endif } #endif