Implicit surface rendering via ray tracing
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

568 lines
20 KiB

/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <platform.h>
#include <vector>
#include <vulkan/vulkan_core.h>
namespace nvvk {
//--------------------------------------------------------------------------------------------------
/**
# functions in nvvk
- makeAccessMaskPipelineStageFlags : depending on accessMask returns appropriate VkPipelineStageFlagBits
- cmdBegin : wraps vkBeginCommandBuffer with VkCommandBufferUsageFlags and implicitly handles VkCommandBufferBeginInfo setup
- makeSubmitInfo : VkSubmitInfo struct setup using provided arrays of signals and commandbuffers, leaving rest zeroed
*/
// useful for barriers, derive all compatible stage flags from an access mask
uint32_t makeAccessMaskPipelineStageFlags(uint32_t accessMask,
VkPipelineStageFlags supportedShaderBits = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
| VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT
| VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
| VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
void cmdBegin(VkCommandBuffer cmd, VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
inline VkSubmitInfo makeSubmitInfo(uint32_t numCmds, VkCommandBuffer* cmds, uint32_t numSignals, VkSemaphore* signals)
{
VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
submitInfo.pCommandBuffers = cmds;
submitInfo.commandBufferCount = numCmds;
submitInfo.pSignalSemaphores = signals;
submitInfo.signalSemaphoreCount = numSignals;
return submitInfo;
}
//--------------------------------------------------------------------------------------------------
/**
\class nvvk::CommandPool
nvvk::CommandPool stores a single VkCommandPool and provides utility functions
to create VkCommandBuffers from it.
Example:
\code{.cpp}
{
nvvk::CommandPool cmdPool;
cmdPool.init(...);
// some setup/one shot work
{
vkCommandBuffer cmd = scopePool.createAndBegin();
... record commands ...
// trigger execution with a blocking operation
// not recommended for performance
// but useful for sample setup
scopePool.submitAndWait(cmd, queue);
}
// other cmds you may batch, or recycle
std::vector<VkCommandBuffer> cmds;
{
vkCommandBuffer cmd = scopePool.createAndBegin();
... record commands ...
cmds.push_back(cmd);
}
{
vkCommandBuffer cmd = scopePool.createAndBegin();
... record commands ...
cmds.push_back(cmd);
}
// do some form of batched submission of cmds
// after completion destroy cmd
cmdPool.destroy(cmds.size(), cmds.data());
cmdPool.deinit();
}
\endcode
*/
class CommandPool
{
public:
CommandPool(CommandPool const&) = delete;
CommandPool& operator=(CommandPool const&) = delete;
CommandPool() {}
~CommandPool() { deinit(); }
// if defaultQueue is null, uses first queue from familyIndex as default
CommandPool(VkDevice device,
uint32_t familyIndex,
VkCommandPoolCreateFlags flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
VkQueue defaultQueue = VK_NULL_HANDLE)
{
init(device, familyIndex, flags, defaultQueue);
}
// if defaultQueue is null, uses first queue from familyIndex as default
void init(VkDevice device,
uint32_t familyIndex,
VkCommandPoolCreateFlags flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
VkQueue defaultQueue = VK_NULL_HANDLE);
void deinit();
VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
bool begin = true,
VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
const VkCommandBufferInheritanceInfo* pInheritanceInfo = nullptr);
// free cmdbuffers from this pool
void destroy(size_t count, const VkCommandBuffer* cmds);
void destroy(const std::vector<VkCommandBuffer>& cmds) { destroy(cmds.size(), cmds.data()); }
void destroy(VkCommandBuffer cmd) { destroy(1, &cmd); }
VkCommandPool getCommandPool() const { return m_commandPool; }
// Ends command buffer recording and submits to queue, if 'fence' is not
// VK_NULL_HANDLE, it will be used to signal the completion of the command
// buffer execution. Does NOT destroy the command buffers! This is not
// optimal use for queue submission asity may lead to a large number of
// vkQueueSubmit() calls per frame. . Consider batching submissions up via
// FencedCommandPools and BatchedSubmission classes down below.
void submit(size_t count, const VkCommandBuffer* cmds, VkQueue queue, VkFence fence = VK_NULL_HANDLE);
void submit(size_t count, const VkCommandBuffer* cmds, VkFence fence = VK_NULL_HANDLE);
void submit(const std::vector<VkCommandBuffer>& cmds, VkFence fence = VK_NULL_HANDLE);
// Non-optimal usage pattern using wait for idles, avoid in production use.
// Consider batching submissions up via FencedCommandPools and
// BatchedSubmission classes down below. Ends command buffer recording and
// submits to queue, waits for queue idle and destroys cmds.
void submitAndWait(size_t count, const VkCommandBuffer* cmds, VkQueue queue);
void submitAndWait(const std::vector<VkCommandBuffer>& cmds, VkQueue queue)
{
submitAndWait(cmds.size(), cmds.data(), queue);
}
void submitAndWait(VkCommandBuffer cmd, VkQueue queue) { submitAndWait(1, &cmd, queue); }
// ends and submits to default queue, waits for queue idle and destroys cmds
void submitAndWait(size_t count, const VkCommandBuffer* cmds) { submitAndWait(count, cmds, m_queue); }
void submitAndWait(const std::vector<VkCommandBuffer>& cmds) { submitAndWait(cmds.size(), cmds.data(), m_queue); }
void submitAndWait(VkCommandBuffer cmd) { submitAndWait(1, &cmd, m_queue); }
protected:
VkDevice m_device = VK_NULL_HANDLE;
VkQueue m_queue = VK_NULL_HANDLE;
VkCommandPool m_commandPool = VK_NULL_HANDLE;
};
//--------------------------------------------------------------------------------------------------
/**
\class nvvk::ScopeCommandBuffer
nvvk::ScopeCommandBuffer provides a single VkCommandBuffer that lives within the scope
and is directly submitted and deleted when the scope is left.
Not recommended for efficiency, since it results in a blocking
operation, but aids sample writing.
Example:
\code{.cpp}
{
ScopeCommandBuffer cmd(device, queueFamilyIndex, queue);
... do stuff
vkCmdCopyBuffer(cmd, ...);
}
\endcode
*/
class ScopeCommandBuffer : public CommandPool
{
public:
// if queue is null, uses first queue from familyIndex
ScopeCommandBuffer(VkDevice device, uint32_t familyIndex, VkQueue queue = VK_NULL_HANDLE)
{
CommandPool::init(device, familyIndex, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queue);
m_cmd = createCommandBuffer();
}
~ScopeCommandBuffer() { submitAndWait(m_cmd); }
operator VkCommandBuffer() const { return m_cmd; };
private:
VkCommandBuffer m_cmd;
};
//--------------------------------------------------------------------------------------------------
/**
\classes **nvvk::Ring...**
In real-time processing, the CPU typically generates commands
in advance to the GPU and send them in batches for execution.
To avoid having the CPU to wait for the GPU'S completion and let it "race ahead"
we make use of double, or tripple-buffering techniques, where we cycle through
a pool of resources every frame. We know that those resources are currently
not in use by the GPU and can therefore manipulate them directly.
Especially in Vulkan it is the developer's responsibility to avoid such
access of resources that are in-flight.
The "Ring" classes cycle through a pool of resources. The default value
is set to allow two frames in-flight, assuming one fence is used per-frame.
*/
// typically the driver will not let the CPU race ahead more than two frames of GPU
// during swapchain operations.
static const uint32_t DEFAULT_RING_SIZE = 3;
//--------------------------------------------------------------------------------------------------
/**
#\class nvvk::RingFences
nvvk::RingFences recycles a fixed number of fences, provides information in which cycle
we are currently at, and prevents accidental access to a cycle in-flight.
A typical frame would start by "setCycleAndWait", which waits for the
requested cycle to be available.
*/
class RingFences
{
public:
RingFences(RingFences const&) = delete;
RingFences& operator=(RingFences const&) = delete;
RingFences() {}
RingFences(VkDevice device, uint32_t ringSize = DEFAULT_RING_SIZE) { init(device, ringSize); }
~RingFences() { deinit(); }
void init(VkDevice device, uint32_t ringSize = DEFAULT_RING_SIZE);
void deinit();
void reset()
{
VkDevice device = m_device;
uint32_t ringSize = m_cycleSize;
deinit();
init(device, ringSize);
}
// ensures the availability of the passed cycle
void setCycleAndWait(uint32_t cycle);
// get current cycle fence
VkFence getFence();
// query current cycle index
uint32_t getCycleIndex() const { return m_cycleIndex; }
uint32_t getCycleSize() const { return m_cycleSize; }
private:
struct Entry
{
VkFence fence;
bool active;
};
uint32_t m_cycleIndex{0};
uint32_t m_cycleSize{0};
std::vector<Entry> m_fences;
VkDevice m_device = VK_NULL_HANDLE;
};
//--------------------------------------------------------------------------------------------------
/**
#\class nvvk::RingCommandPool
nvvk::RingCommandPool manages a fixed cycle set of VkCommandBufferPools and
one-shot command buffers allocated from them.
The usage of multiple command buffer pools also means we get nice allocation
behavior (linear allocation from frame start to frame end) without fragmentation.
If we were using a single command pool over multiple frames, it could fragment easily.
You must ensure cycle is available manually, typically by keeping in sync
with ring fences.
Example:
\code{.cpp}
{
frame++;
// wait until we can use the new cycle
// (very rare if we use the fence at then end once per-frame)
ringFences.setCycleAndWait( frame );
// update cycle state, allows recycling of old resources
ringPool.setCycle( frame );
VkCommandBuffer cmd = ringPool.createCommandBuffer(...);
... do stuff / submit etc...
VkFence fence = ringFences.getFence();
// use this fence in the submit
vkQueueSubmit(...fence..);
}
\endcode
*/
class RingCommandPool
{
public:
RingCommandPool(RingCommandPool const&) = delete;
RingCommandPool& operator=(RingCommandPool const&) = delete;
RingCommandPool(VkDevice device,
uint32_t queueFamilyIndex,
VkCommandPoolCreateFlags flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
uint32_t ringSize = DEFAULT_RING_SIZE)
{
init(device, queueFamilyIndex, flags, ringSize);
}
RingCommandPool() {}
~RingCommandPool() { deinit(); }
void init(VkDevice device,
uint32_t queueFamilyIndex,
VkCommandPoolCreateFlags flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
uint32_t ringSize = DEFAULT_RING_SIZE);
void deinit();
void reset()
{
VkDevice device = m_device;
VkCommandPoolCreateFlags flags = m_flags;
uint32_t queueFamilyIndex = m_familyIndex;
uint32_t ringSize = m_cycleSize;
deinit();
init(device, queueFamilyIndex, flags, ringSize);
}
// call when cycle has changed, prior creating command buffers
// resets old pools etc.
void setCycle(uint32_t cycle);
// ensure proper cycle or frame is set prior these
VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
bool begin = true,
VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
const VkCommandBufferInheritanceInfo* pInheritanceInfo = nullptr);
// pointer is only valid until next create
const VkCommandBuffer* createCommandBuffers(VkCommandBufferLevel level, uint32_t count);
protected:
struct Entry
{
VkCommandPool pool{};
std::vector<VkCommandBuffer> cmds;
};
uint32_t m_cycleIndex{0};
uint32_t m_cycleSize{0};
std::vector<Entry> m_pools;
VkDevice m_device = VK_NULL_HANDLE;
VkCommandPoolCreateFlags m_flags{0};
uint32_t m_familyIndex{0};
};
//--------------------------------------------------------------------------------------------------
/**
\class nvvk::BatchSubmission
nvvk::BatchSubmission batches the submission arguments of VkSubmitInfo for VkQueueSubmit.
vkQueueSubmit is a rather costly operation (depending on OS)
and should be avoided to be done too often (e.g. < 10 per frame). Therefore
this utility class allows adding commandbuffers, semaphores etc. and
submit them later in a batch.
When using manual locks, it can also be useful to feed commandbuffers
from different threads and then later kick it off.
Example
\code{.cpp}
// within upload logic
{
semTransfer = handleUpload(...);
// for example trigger async upload on transfer queue here
vkQueueSubmit(... semTransfer ...);
// tell next frame's batch submission
// that its commandbuffers should wait for transfer
// to be completed
graphicsSubmission.enqueWait(semTransfer)
}
// within present logic
{
// for example ensure the next frame waits until proper present semaphore was triggered
graphicsSubmission.enqueueWait(presentSemaphore);
}
// within drawing logic
{
// enqueue some graphics work for submission
graphicsSubmission.enqueue(getSceneCmdBuffer());
graphicsSubmission.enqueue(getUiCmdBuffer());
graphicsSubmission.execute(frameFence);
}
\endcode
*/
class BatchSubmission
{
private:
VkQueue m_queue = nullptr;
std::vector<VkSemaphore> m_waits;
std::vector<VkPipelineStageFlags> m_waitFlags;
std::vector<VkSemaphore> m_signals;
std::vector<VkCommandBuffer> m_commands;
public:
BatchSubmission(BatchSubmission const&) = delete;
BatchSubmission& operator=(BatchSubmission const&) = delete;
BatchSubmission() {}
BatchSubmission(VkQueue queue) { init(queue); }
uint32_t getCommandBufferCount() const { return uint32_t(m_commands.size()); }
VkQueue getQueue() const { return m_queue; }
// can change queue if nothing is pending
void init(VkQueue queue);
void enqueue(uint32_t num, const VkCommandBuffer* cmdbuffers);
void enqueue(VkCommandBuffer cmdbuffer);
void enqueueSignal(VkSemaphore sem);
void enqueueWait(VkSemaphore sem, VkPipelineStageFlags flag);
// submits the work and resets internal state
VkResult execute(VkFence fence = nullptr, uint32_t deviceMask = 0);
void waitIdle() const;
};
//////////////////////////////////////////////////////////////////////////
/**
\class nvvk::FencedCommandPools
nvvk::FencedCommandPools container class contains the typical utilities to handle
command submission. It contains RingFences, RingCommandPool and BatchSubmission
with a convenient interface.
*/
class FencedCommandPools : protected RingFences, protected RingCommandPool, protected BatchSubmission
{
public:
FencedCommandPools(FencedCommandPools const&) = delete;
FencedCommandPools& operator=(FencedCommandPools const&) = delete;
FencedCommandPools() {}
~FencedCommandPools() { deinit(); }
FencedCommandPools(VkDevice device,
VkQueue queue,
uint32_t queueFamilyIndex,
VkCommandPoolCreateFlags flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
uint32_t ringSize = DEFAULT_RING_SIZE)
{
init(device, queue, queueFamilyIndex, flags, ringSize);
}
void init(VkDevice device,
VkQueue queue,
uint32_t queueFamilyIndex,
VkCommandPoolCreateFlags flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
uint32_t ringSize = DEFAULT_RING_SIZE)
{
RingFences::init(device, ringSize);
RingCommandPool::init(device, queueFamilyIndex, flags, ringSize);
BatchSubmission::init(queue);
}
void deinit()
{
RingFences::deinit();
RingCommandPool::deinit();
//BatchSubmission::deinit();
}
void reset()
{
waitIdle();
RingFences::reset();
RingCommandPool::reset();
}
void enqueue(uint32_t num, const VkCommandBuffer* cmdbuffers) { BatchSubmission::enqueue(num, cmdbuffers); }
void enqueue(VkCommandBuffer cmdbuffer) { BatchSubmission::enqueue(cmdbuffer); }
void enqueueSignal(VkSemaphore sem) { BatchSubmission::enqueueSignal(sem); }
void enqueueWait(VkSemaphore sem, VkPipelineStageFlags flag) { BatchSubmission::enqueueWait(sem, flag); }
VkResult execute(uint32_t deviceMask = 0) { return BatchSubmission::execute(getFence(), deviceMask); }
void waitIdle() const { BatchSubmission::waitIdle(); }
void setCycleAndWait(uint32_t cycle)
{
RingFences::setCycleAndWait(cycle);
RingCommandPool::setCycle(cycle);
}
// ensure proper cycle is set prior this
VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
bool begin = true,
VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
const VkCommandBufferInheritanceInfo* pInheritanceInfo = nullptr)
{
return RingCommandPool::createCommandBuffer(level, begin, flags, pInheritanceInfo);
}
// pointer is only valid until next create
const VkCommandBuffer* createCommandBuffers(VkCommandBufferLevel level, uint32_t count)
{
return RingCommandPool::createCommandBuffers(level, count);
}
struct ScopedCmd
{
FencedCommandPools* pCmdPools;
VkCommandBuffer cmd;
ScopedCmd(FencedCommandPools& cp)
{
pCmdPools = &cp;
cmd = cp.createCommandBuffer();
}
~ScopedCmd()
{
vkEndCommandBuffer(cmd);
pCmdPools->enqueue(cmd);
pCmdPools->execute();
pCmdPools->waitIdle();
}
operator VkCommandBuffer() { return cmd; }
};
};
} // namespace nvvk