Implicit surface rendering via ray tracing
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

457 lines
11 KiB

/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "profiler.hpp"
#include <assert.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
//////////////////////////////////////////////////////////////////////////
namespace nvh {
const uint32_t Profiler::CONFIG_DELAY;
const uint32_t Profiler::FRAME_DELAY;
const uint32_t Profiler::START_SECTIONS;
const uint32_t Profiler::MAX_NUM_AVERAGE;
Profiler::Profiler(Profiler* master)
{
m_data = master ? master->m_data : std::shared_ptr<Data>(new Data);
grow(START_SECTIONS);
}
Profiler::Profiler(uint32_t startSections)
{
m_data = std::shared_ptr<Data>(new Data);
grow(startSections);
}
void Profiler::setAveragingSize(uint32_t num)
{
assert(num <= MAX_NUM_AVERAGE);
m_data->numAveraging = num;
for(size_t i = 0; i < m_data->entries.size(); i++)
{
m_data->entries[i].cpuTime.init(num);
m_data->entries[i].gpuTime.init(num);
}
m_data->cpuTime.init(num);
}
void Profiler::beginFrame()
{
m_data->level = 0;
m_data->nextSection = 0;
m_data->frameSections.clear();
m_data->cpuCurrentTime = -m_clock.getMicroSeconds();
}
void Profiler::endFrame()
{
assert(m_data->level == 0);
m_data->cpuCurrentTime += m_clock.getMicroSeconds();
if(!m_data->frameSections.empty() && ((uint32_t)m_data->frameSections.size() != m_data->numLastEntries))
{
m_data->numLastEntries = (uint32_t)m_data->frameSections.size();
m_data->numLastSections = m_data->frameSections.back() + 1;
m_data->resetDelay = CONFIG_DELAY;
}
if(m_data->resetDelay)
{
m_data->resetDelay--;
for(uint32_t i = 0; i < m_data->entries.size(); i++)
{
Entry& entry = m_data->entries[i];
if(entry.level != LEVEL_SINGLESHOT)
{
entry.numTimes = 0;
entry.cpuTime.reset();
entry.gpuTime.reset();
}
}
m_data->cpuTime.reset();
m_data->numFrames = 0;
}
if(m_data->numFrames > FRAME_DELAY)
{
for(uint32_t i : m_data->frameSections)
{
Entry& entry = m_data->entries[i];
if(entry.splitter)
continue;
uint32_t queryFrame = (m_data->numFrames + 1) % FRAME_DELAY;
bool available = entry.api.empty() || entry.gpuTimeProvider(i, queryFrame, entry.gpuTimes[queryFrame]);
if(available)
{
entry.cpuTime.add(entry.cpuTimes[queryFrame]);
entry.gpuTime.add(entry.gpuTimes[queryFrame]);
entry.numTimes++;
}
}
for(uint32_t i : m_data->singleSections)
{
Entry& entry = m_data->entries[i];
uint32_t queryFrame = entry.subFrame;
// query once
bool available = entry.cpuTime.numValid == 0
&& (entry.api.empty() || entry.gpuTimeProvider(i, queryFrame, entry.gpuTimes[queryFrame]));
if(available)
{
entry.cpuTime.add(entry.cpuTimes[queryFrame]);
entry.gpuTime.add(entry.gpuTimes[queryFrame]);
entry.numTimes++;
}
}
m_data->cpuTime.add(m_data->cpuCurrentTime);
}
m_data->numFrames++;
}
void Profiler::grow(uint32_t newsize)
{
size_t oldsize = m_data->entries.size();
if(oldsize == newsize)
{
return;
}
m_data->entries.resize(newsize);
for(size_t i = oldsize; i < newsize; i++)
{
m_data->entries[i].cpuTime.init(m_data->numAveraging);
m_data->entries[i].gpuTime.init(m_data->numAveraging);
}
}
void Profiler::clear()
{
m_data->entries.clear();
m_data->singleSections.clear();
}
void Profiler::reset(uint32_t delay)
{
m_data->resetDelay = delay;
}
static std::string format(const char* msg, ...)
{
std::size_t const STRING_BUFFER(8192);
char text[STRING_BUFFER];
va_list list;
if(msg == 0)
return std::string();
va_start(list, msg);
#ifdef _WIN32
vsprintf_s(text, msg, list);
#else // #ifdef _WIN32
vsprintf(text, msg, list);
#endif
va_end(list);
return std::string(text);
}
bool Profiler::getTimerInfo(uint32_t i, TimerInfo& info)
{
Entry& entry = m_data->entries[i];
if(!entry.numTimes || entry.accumulated)
{
return false;
}
info.gpu.average = entry.gpuTime.getAveraged();
info.cpu.average = entry.cpuTime.getAveraged();
info.cpu.absMinValue = entry.cpuTime.absMinValue;
info.cpu.absMaxValue = entry.cpuTime.absMaxValue;
info.gpu.absMinValue = entry.gpuTime.absMinValue;
info.gpu.absMaxValue = entry.gpuTime.absMaxValue;
bool found = false;
for(uint32_t n = i + 1; n < m_data->numLastSections; n++)
{
Entry& otherentry = m_data->entries[n];
if(otherentry.name == entry.name && otherentry.level == entry.level && otherentry.api == entry.api && !otherentry.accumulated)
{
found = true;
info.gpu.average += otherentry.gpuTime.getAveraged();
info.cpu.average += otherentry.cpuTime.getAveraged();
info.cpu.absMinValue += entry.cpuTime.absMinValue;
info.cpu.absMaxValue += entry.cpuTime.absMaxValue;
info.gpu.absMinValue += entry.gpuTime.absMinValue;
info.gpu.absMaxValue += entry.gpuTime.absMaxValue;
otherentry.accumulated = true;
}
if(otherentry.splitter && otherentry.level <= entry.level)
break;
}
info.accumulated = found;
info.numAveraged = entry.cpuTime.numValid;
return true;
}
bool Profiler::getTimerInfo(const char* name, TimerInfo& info)
{
if(name == nullptr)
{
info = TimerInfo();
if(!m_data->cpuTime.numValid)
{
return false;
}
info.cpu.average = m_data->cpuTime.getAveraged();
info.cpu.absMaxValue = m_data->cpuTime.absMaxValue;
info.cpu.absMinValue = m_data->cpuTime.absMinValue;
info.numAveraged = m_data->cpuTime.numValid;
return true;
}
for(uint32_t i = 0; i < m_data->numLastSections; i++)
{
Entry& entry = m_data->entries[i];
entry.accumulated = false;
}
for(uint32_t i = 0; i < (uint32_t)m_data->entries.size(); i++)
{
Entry& entry = m_data->entries[i];
if(entry.name.empty())
continue;
if(name != entry.name)
continue;
return getTimerInfo(i, info);
}
return false;
}
void Profiler::print(std::string& stats)
{
stats.clear();
for(uint32_t i = 0; i < m_data->numLastSections; i++)
{
Entry& entry = m_data->entries[i];
entry.accumulated = false;
}
printf("Timer null;\t N/A %6d; CPU %6d;\n", 0, (uint32_t)m_data->cpuTime.getAveraged());
for(uint32_t i = 0; i < m_data->numLastSections; i++)
{
static const char* spaces = " "; // 8
Entry& entry = m_data->entries[i];
if(entry.level == LEVEL_SINGLESHOT)
continue;
uint32_t level = 7 - (entry.level > 7 ? 7 : entry.level);
TimerInfo info;
if(!getTimerInfo(i, info))
continue;
const char* gpuname = !entry.api.empty() ? entry.api.c_str() : "N/A";
const char* entryname = !entry.name.empty() ? entry.name.c_str() : "N/A";
if(info.accumulated)
{
stats += format("%sTimer %s;\t %s %6d; CPU %6d; (microseconds, accumulated loop)\n", &spaces[level], entryname,
gpuname, (uint32_t)(info.gpu.average), (uint32_t)(info.cpu.average));
}
else
{
stats += format("%sTimer %s;\t %s %6d; CPU %6d; (microseconds, avg %d)\n", &spaces[level], entryname, gpuname,
(uint32_t)(info.gpu.average), (uint32_t)(info.cpu.average), (uint32_t)entry.cpuTime.numValid);
}
}
}
uint32_t Profiler::getTotalFrames() const
{
return m_data->numFrames;
}
void Profiler::accumulationSplit()
{
SectionID sec = getSectionID(false, nullptr);
if(sec >= m_data->entries.size())
{
grow((uint32_t)(m_data->entries.size() * 2));
}
m_data->entries[sec].level = m_data->level;
m_data->entries[sec].splitter = true;
}
Profiler::SectionID Profiler::getSectionID(bool singleShot, const char* name)
{
uint32_t numEntries = (uint32_t)m_data->entries.size();
if(singleShot)
{
// find empty slot or with same name
for(uint32_t i = 0; i < numEntries; i++)
{
Entry& entry = m_data->entries[i];
if(entry.name == name || entry.name.empty())
{
m_data->singleSections.push_back(i);
return i;
}
}
m_data->singleSections.push_back(numEntries);
return numEntries;
}
else
{
// find non-single shot slot
while(m_data->nextSection < numEntries && m_data->entries[m_data->nextSection].level == LEVEL_SINGLESHOT)
{
m_data->nextSection++;
}
m_data->frameSections.push_back(m_data->nextSection);
return m_data->nextSection++;
}
}
Profiler::SectionID Profiler::beginSection(const char* name, const char* api, gpuTimeProvider_fn gpuTimeProvider, bool singleShot)
{
uint32_t subFrame = m_data->numFrames % FRAME_DELAY;
SectionID sec = getSectionID(singleShot, name);
if(sec >= m_data->entries.size())
{
grow((uint32_t)(m_data->entries.size() * 2));
}
Entry& entry = m_data->entries[sec];
uint32_t level = singleShot ? LEVEL_SINGLESHOT : (m_data->level++);
if(entry.name != name || entry.api != api || entry.level != level)
{
entry.name = name;
entry.api = api;
if(!singleShot)
{
m_data->resetDelay = CONFIG_DELAY;
}
}
entry.subFrame = subFrame;
entry.level = level;
entry.splitter = false;
entry.gpuTimeProvider = gpuTimeProvider;
#ifdef NVP_SUPPORTS_NVTOOLSEXT
{
nvtxEventAttributes_t eventAttrib = {0};
eventAttrib.version = NVTX_VERSION;
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
eventAttrib.colorType = NVTX_COLOR_ARGB;
unsigned char color[4];
color[0] = 255;
color[1] = 0;
color[2] = sec % 2 ? 127 : 255;
color[3] = 255;
color[2] -= level * 16;
color[3] -= level * 16;
eventAttrib.color = *(uint32_t*)(color);
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
eventAttrib.message.ascii = name;
nvtxRangePushEx(&eventAttrib);
}
#endif
entry.cpuTimes[subFrame] = -getMicroSeconds();
entry.gpuTimes[subFrame] = 0;
if(singleShot)
{
entry.cpuTime.init(1);
entry.gpuTime.init(1);
}
return sec;
}
void Profiler::endSection(SectionID sec)
{
Entry& entry = m_data->entries[sec];
entry.cpuTimes[entry.subFrame] += getMicroSeconds();
#ifdef NVP_SUPPORTS_NVTOOLSEXT
nvtxRangePop();
#endif
if(entry.level != LEVEL_SINGLESHOT)
{
m_data->level--;
}
}
Profiler::Clock::Clock()
{
m_init = std::chrono::high_resolution_clock::now();
}
double Profiler::Clock::getMicroSeconds() const
{
return double(std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - m_init).count())
/ double(1000);
}
} // namespace nvh