SDFRT/3rdparty/nvpro_core/nvvk/sbtwrapper_vk.cpp


								/*

								 * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.

								 *

								 * Licensed under the Apache License, Version 2.0 (the "License");

								 * you may not use this file except in compliance with the License.

								 * You may obtain a copy of the License at

								 *

								 *     http://www.apache.org/licenses/LICENSE-2.0

								 *

								 * Unless required by applicable law or agreed to in writing, software

								 * distributed under the License is distributed on an "AS IS" BASIS,

								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

								 * See the License for the specific language governing permissions and

								 * limitations under the License.

								 *

								 * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION

								 * SPDX-License-Identifier: Apache-2.0

								 */


								#include "sbtwrapper_vk.hpp"


								#include "nvvk/commands_vk.hpp"

								#include "nvvk/debug_util_vk.hpp"

								#include "nvvk/error_vk.hpp"

								#include "nvh/nvprint.hpp"

								#include "nvh/alignment.hpp"


								using namespace nvvk;


								//--------------------------------------------------------------------------------------------------

								// Default setup

								//

								void nvvk::SBTWrapper::setup(VkDevice                                               device,

								                             uint32_t                                               familyIndex,

								                             nvvk::ResourceAllocator*                               allocator,

								                             const VkPhysicalDeviceRayTracingPipelinePropertiesKHR& rtProperties)

								{

								  m_device     = device;

								  m_queueIndex = familyIndex;

								  m_pAlloc     = allocator;

								  m_debug.setup(device);


								  m_handleSize               = rtProperties.shaderGroupHandleSize;       // Size of a program identifier

								  m_handleAlignment          = rtProperties.shaderGroupHandleAlignment;  // Alignment in bytes for each SBT entry

								  m_shaderGroupBaseAlignment = rtProperties.shaderGroupBaseAlignment;

								}


								//--------------------------------------------------------------------------------------------------

								// Destroying the allocated buffers and clearing all vectors

								//

								void SBTWrapper::destroy()

								{

								  if(m_pAlloc)

								  {

								    for(auto& b : m_buffer)

								      m_pAlloc->destroy(b);

								  }


								  for(auto& i : m_index)

								    i = {};

								}


								//--------------------------------------------------------------------------------------------------

								// Finding the handle index position of each group type in the pipeline creation info.

								// If the pipeline was created like: raygen, miss, hit, miss, hit, hit

								// The result will be: raygen[0], miss[1, 3], hit[2, 4, 5], callable[]

								//

								void SBTWrapper::addIndices(VkRayTracingPipelineCreateInfoKHR                     rayPipelineInfo,

								                            const std::vector<VkRayTracingPipelineCreateInfoKHR>& libraries)

								{

								  for(auto& i : m_index)

								    i = {};


								  // Libraries contain stages referencing their internal groups. When those groups

								  // are used in the final pipeline we need to offset them to ensure each group has

								  // a unique index

								  uint32_t groupOffset = 0;


								  for(size_t i = 0; i < libraries.size() + 1; i++)

								  {

								    // When using libraries, their groups and stages are appended after the groups and

								    // stages defined in the main VkRayTracingPipelineCreateInfoKHR

								    const auto& info = (i == 0) ? rayPipelineInfo : libraries[i - 1];


								    // Finding the handle position of each group, splitting by raygen, miss and hit group

								    for(uint32_t g = 0; g < info.groupCount; g++)

								    {

								      if(info.pGroups[g].type == VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR)

								      {

								        uint32_t genShader = info.pGroups[g].generalShader;

								        assert(genShader < info.stageCount);

								        if(info.pStages[genShader].stage == VK_SHADER_STAGE_RAYGEN_BIT_KHR)

								        {

								          m_index[eRaygen].push_back(g + groupOffset);

								        }

								        else if(info.pStages[genShader].stage == VK_SHADER_STAGE_MISS_BIT_KHR)

								        {

								          m_index[eMiss].push_back(g + groupOffset);

								        }

								        else if(info.pStages[genShader].stage == VK_SHADER_STAGE_CALLABLE_BIT_KHR)

								        {

								          m_index[eCallable].push_back(g + groupOffset);

								        }

								      }

								      else

								      {

								        m_index[eHit].push_back(g + groupOffset);

								      }

								    }


								    groupOffset += info.groupCount;

								  }

								}


								//--------------------------------------------------------------------------------------------------

								// This function creates 4 buffers, for raygen, miss, hit and callable shader.

								// Each buffer will have the handle + 'data (if any)', .. n-times they have entries in the pipeline.

								//

								void SBTWrapper::create(VkPipeline                                            rtPipeline,

								                        VkRayTracingPipelineCreateInfoKHR                     rayPipelineInfo /*= {}*/,

								                        const std::vector<VkRayTracingPipelineCreateInfoKHR>& librariesInfo /*= {}*/)

								{

								  for(auto& b : m_buffer)

								    m_pAlloc->destroy(b);


								  // Get the total number of groups and handle index position

								  uint32_t              totalGroupCount{0};

								  std::vector<uint32_t> groupCountPerInput;

								  // A pipeline is defined by at least its main VkRayTracingPipelineCreateInfoKHR, plus a number of external libraries

								  groupCountPerInput.reserve(1 + librariesInfo.size());

								  if(rayPipelineInfo.sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR)

								  {

								    addIndices(rayPipelineInfo, librariesInfo);

								    groupCountPerInput.push_back(rayPipelineInfo.groupCount);

								    totalGroupCount += rayPipelineInfo.groupCount;

								    for(const auto& lib : librariesInfo)

								    {

								      groupCountPerInput.push_back(lib.groupCount);

								      totalGroupCount += lib.groupCount;

								    }

								  }

								  else

								  {

								    // Find how many groups when added manually, by finding the largest index and adding 1

								    // See also addIndex for manual entries

								    for(auto& i : m_index)

								    {

								      if(!i.empty())

								        totalGroupCount = std::max(totalGroupCount, *std::max_element(std::begin(i), std::end(i)));

								    }

								    totalGroupCount++;

								    groupCountPerInput.push_back(totalGroupCount);

								  }


								  // Fetch all the shader handles used in the pipeline, so that they can be written in the SBT

								  uint32_t             sbtSize = totalGroupCount * m_handleSize;

								  std::vector<uint8_t> shaderHandleStorage(sbtSize);


								  NVVK_CHECK(vkGetRayTracingShaderGroupHandlesKHR(m_device, rtPipeline, 0, totalGroupCount, sbtSize, shaderHandleStorage.data()));

								  // Find the max stride, minimum is the handle size + size of 'data (if any)' aligned to shaderGroupBaseAlignment

								  auto findStride = [&](auto entry, auto& stride) {

								    stride = nvh::align_up(m_handleSize, m_handleAlignment);  // minimum stride

								    for(auto& e : entry)

								    {

								      // Find the largest data + handle size, all aligned

								      uint32_t dataHandleSize =

								          nvh::align_up(static_cast<uint32_t>(m_handleSize + e.second.size() * sizeof(uint8_t)), m_handleAlignment);

								      stride = std::max(stride, dataHandleSize);

								    }

								  };

								  findStride(m_data[eRaygen], m_stride[eRaygen]);

								  findStride(m_data[eMiss], m_stride[eMiss]);

								  findStride(m_data[eHit], m_stride[eHit]);

								  findStride(m_data[eCallable], m_stride[eCallable]);


								  // Special case, all Raygen must start aligned on GroupBase

								  m_stride[eRaygen] = nvh::align_up(m_stride[eRaygen], m_shaderGroupBaseAlignment);


								  // Buffer holding the staging information

								  std::array<std::vector<uint8_t>, 4> stage;

								  stage[eRaygen]   = std::vector<uint8_t>(m_stride[eRaygen] * indexCount(eRaygen));

								  stage[eMiss]     = std::vector<uint8_t>(m_stride[eMiss] * indexCount(eMiss));

								  stage[eHit]      = std::vector<uint8_t>(m_stride[eHit] * indexCount(eHit));

								  stage[eCallable] = std::vector<uint8_t>(m_stride[eCallable] * indexCount(eCallable));


								  // Write the handles in the SBT buffer + data info (if any)

								  auto copyHandles = [&](std::vector<uint8_t>& buffer, std::vector<uint32_t>& indices, uint32_t stride, auto& data) {

								    auto* pBuffer = buffer.data();

								    for(uint32_t index = 0; index < static_cast<uint32_t>(indices.size()); index++)

								    {

								      auto* pStart = pBuffer;

								      // Copy the handle

								      memcpy(pBuffer, shaderHandleStorage.data() + (indices[index] * m_handleSize), m_handleSize);

								      // If there is data for this group index, copy it too

								      auto it = data.find(index);

								      if(it != std::end(data))

								      {

								        pBuffer += m_handleSize;

								        memcpy(pBuffer, it->second.data(), it->second.size() * sizeof(uint8_t));

								      }

								      pBuffer = pStart + stride;  // Jumping to next group

								    }

								  };


								  // Copy the handles/data to each staging buffer

								  copyHandles(stage[eRaygen], m_index[eRaygen], m_stride[eRaygen], m_data[eRaygen]);

								  copyHandles(stage[eMiss], m_index[eMiss], m_stride[eMiss], m_data[eMiss]);

								  copyHandles(stage[eHit], m_index[eHit], m_stride[eHit], m_data[eHit]);

								  copyHandles(stage[eCallable], m_index[eCallable], m_stride[eCallable], m_data[eCallable]);


								  // Creating device local buffers where handles will be stored

								  auto usage_flags = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR;

								  auto mem_flags   = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;

								  nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);

								  VkCommandBuffer   cmdBuf = genCmdBuf.createCommandBuffer();

								  for(uint32_t i = 0; i < 4; i++)

								  {

								    if(!stage[i].empty())

								    {

								      m_buffer[i] = m_pAlloc->createBuffer(cmdBuf, stage[i], usage_flags, mem_flags);

								      NAME_IDX_VK(m_buffer[i].buffer, i);

								    }

								  }

								  genCmdBuf.submitAndWait(cmdBuf);

								  m_pAlloc->finalizeAndReleaseStaging();

								}


								VkDeviceAddress SBTWrapper::getAddress(GroupType t)

								{

								  if(m_buffer[t].buffer == VK_NULL_HANDLE)

								    return 0;

								  VkBufferDeviceAddressInfo i{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, nullptr, m_buffer[t].buffer};

								  return vkGetBufferDeviceAddress(m_device, &i);  // Aligned on VkMemoryRequirements::alignment which includes shaderGroupBaseAlignment

								}


								const VkStridedDeviceAddressRegionKHR SBTWrapper::getRegion(GroupType t, uint32_t indexOffset)

								{

								  return VkStridedDeviceAddressRegionKHR{getAddress(t) + indexOffset * getStride(t), getStride(t), getSize(t)};

								}


								const std::array<VkStridedDeviceAddressRegionKHR, 4> SBTWrapper::getRegions(uint32_t rayGenIndexOffset)

								{

								  std::array<VkStridedDeviceAddressRegionKHR, 4> regions{getRegion(eRaygen, rayGenIndexOffset), getRegion(eMiss),

								                                                         getRegion(eHit), getRegion(eCallable)};

								  return regions;

								}