You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
200 lines
5.7 KiB
200 lines
5.7 KiB
/*
|
|
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
|
|
#include "profiler_vk.hpp"
|
|
#include "debug_util_vk.hpp"
|
|
#include "error_vk.hpp"
|
|
#include <assert.h>
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
namespace nvvk {
|
|
|
|
void ProfilerVK::init(VkDevice device, VkPhysicalDevice physicalDevice, int queueFamilyIndex)
|
|
{
|
|
assert(!m_device);
|
|
m_device = device;
|
|
#if 0
|
|
m_useCoreHostReset = supportsCoreHostReset;
|
|
#endif
|
|
|
|
VkPhysicalDeviceProperties properties;
|
|
vkGetPhysicalDeviceProperties(physicalDevice, &properties);
|
|
|
|
m_frequency = properties.limits.timestampPeriod;
|
|
|
|
std::vector<VkQueueFamilyProperties> queueProperties;
|
|
uint32_t queueFamilyCount = 0;
|
|
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, nullptr);
|
|
queueProperties.resize(queueFamilyCount);
|
|
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, queueProperties.data());
|
|
|
|
uint32_t validBits = queueProperties[queueFamilyIndex].timestampValidBits;
|
|
|
|
m_queueFamilyMask = validBits == 64 ? uint64_t(-1) : ((uint64_t(1) << validBits) - uint64_t(1));
|
|
|
|
|
|
resize();
|
|
}
|
|
|
|
void ProfilerVK::deinit()
|
|
{
|
|
if(m_queryPool)
|
|
{
|
|
vkDestroyQueryPool(m_device, m_queryPool, nullptr);
|
|
m_queryPool = VK_NULL_HANDLE;
|
|
}
|
|
m_device = VK_NULL_HANDLE;
|
|
}
|
|
|
|
void ProfilerVK::setLabelUsage(bool state)
|
|
{
|
|
m_useLabels = state;
|
|
}
|
|
|
|
void ProfilerVK::resize()
|
|
{
|
|
if(getRequiredTimers() < m_queryPoolSize)
|
|
return;
|
|
|
|
if(m_queryPool)
|
|
{
|
|
// FIXME we may loose results this way
|
|
// not exactly efficient, but when timers changed a lot, we have a slow frame anyway
|
|
// cleaner would be allocating more pools
|
|
VkResult result = vkDeviceWaitIdle(m_device);
|
|
if(nvvk::checkResult(result, __FILE__, __LINE__))
|
|
{
|
|
exit(-1);
|
|
}
|
|
vkDestroyQueryPool(m_device, m_queryPool, nullptr);
|
|
}
|
|
|
|
VkQueryPoolCreateInfo createInfo = {VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO};
|
|
createInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
|
|
createInfo.queryCount = getRequiredTimers();
|
|
m_queryPoolSize = createInfo.queryCount;
|
|
|
|
VkResult res = vkCreateQueryPool(m_device, &createInfo, nullptr, &m_queryPool);
|
|
assert(res == VK_SUCCESS);
|
|
|
|
nvvk::DebugUtil(m_device).setObjectName(m_queryPool, m_debugName);
|
|
}
|
|
|
|
nvh::Profiler::SectionID ProfilerVK::beginSection(const char* name, VkCommandBuffer cmd, bool singleShot, bool useHostReset)
|
|
{
|
|
nvh::Profiler::gpuTimeProvider_fn fnProvider = [&](SectionID i, uint32_t queryFrame, double& gpuTime) {
|
|
return getSectionTime(i, queryFrame, gpuTime);
|
|
};
|
|
|
|
|
|
SectionID slot = Profiler::beginSection(name, "VK ", fnProvider, singleShot);
|
|
if(getRequiredTimers() > m_queryPoolSize)
|
|
{
|
|
resize();
|
|
}
|
|
if(m_useLabels)
|
|
{
|
|
VkDebugUtilsLabelEXT label = {VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT};
|
|
label.pLabelName = name;
|
|
label.color[1] = 1.0f;
|
|
vkCmdBeginDebugUtilsLabelEXT(cmd, &label);
|
|
}
|
|
#if 0
|
|
else if(m_useMarkers)
|
|
{
|
|
VkDebugMarkerMarkerInfoEXT marker = {VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT};
|
|
marker.pMarkerName = name;
|
|
vkCmdDebugMarkerBeginEXT(cmd, &marker);
|
|
}
|
|
#endif
|
|
|
|
uint32_t idx = getTimerIdx(slot, getSubFrame(slot), true);
|
|
|
|
if(useHostReset)
|
|
{
|
|
#if 0
|
|
if(m_useCoreHostReset)
|
|
{
|
|
vkResetQueryPool(m_device, m_queryPool, idx, 2);
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
vkResetQueryPoolEXT(m_device, m_queryPool, idx, 2);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// not ideal to do this per query
|
|
vkCmdResetQueryPool(cmd, m_queryPool, idx, 2);
|
|
}
|
|
// log timestamp
|
|
vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_queryPool, idx);
|
|
|
|
return slot;
|
|
}
|
|
|
|
void ProfilerVK::endSection(SectionID slot, VkCommandBuffer cmd)
|
|
{
|
|
uint32_t idx = getTimerIdx(slot, getSubFrame(slot), false);
|
|
vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_queryPool, idx);
|
|
if(m_useLabels)
|
|
{
|
|
vkCmdEndDebugUtilsLabelEXT(cmd);
|
|
}
|
|
#if 0
|
|
else if(m_useMarkers)
|
|
{
|
|
vkCmdDebugMarkerEndEXT(cmd);
|
|
}
|
|
#endif
|
|
Profiler::endSection(slot);
|
|
}
|
|
|
|
|
|
bool ProfilerVK::getSectionTime(SectionID i, uint32_t queryFrame, double& gpuTime)
|
|
{
|
|
bool isRecurring = isSectionRecurring(i);
|
|
uint32_t idxBegin = getTimerIdx(i, queryFrame, true);
|
|
uint32_t idxEnd = getTimerIdx(i, queryFrame, false);
|
|
assert(idxEnd == idxBegin + 1);
|
|
|
|
uint64_t times[2];
|
|
VkResult result = vkGetQueryPoolResults(m_device, m_queryPool, idxBegin, 2, sizeof(uint64_t) * 2, times, sizeof(uint64_t),
|
|
VK_QUERY_RESULT_64_BIT | (isRecurring ? VK_QUERY_RESULT_WAIT_BIT : 0));
|
|
// validation layer bug, complains if VK_QUERY_RESULT_WAIT_BIT is not provided, even if we wait
|
|
// through another fence for the buffer containing the problem
|
|
// fixed in VK SDK fixed with 1.1.126, but we keep old logic still here
|
|
|
|
if(result == VK_SUCCESS)
|
|
{
|
|
uint64_t mask = m_queueFamilyMask;
|
|
gpuTime = (double((times[1] & mask) - (times[0] & mask)) * double(m_frequency)) / double(1000);
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
} // namespace nvvk
|
|
|