Implicit surface rendering via ray tracing
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

370 lines
10 KiB

/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef NV_PROFILER_INCLUDED
#define NV_PROFILER_INCLUDED
#include <algorithm>
#include <chrono>
#include <float.h> // DBL_MAX
#include <functional>
#include <memory>
#include <stdint.h>
#include <stdio.h>
#include <string.h> //memset
#include <string>
#include <vector>
#ifdef NVP_SUPPORTS_NVTOOLSEXT
#define NVTX_STDINT_TYPES_ALREADY_DEFINED
#include <nvtx3/nvToolsExt.h>
#endif
namespace nvh {
//////////////////////////////////////////////////////////////////////////
/**
\class nvh::Profiler
\brief The nvh::Profiler class is designed to measure timed sections.
Each section has a cpu and gpu time. Gpu times are typically provided
by derived classes for each individual api (e.g. OpenGL, Vulkan etc.).
There is functionality to pretty print the sections with their nesting level.
Multiple profilers can reference the same database, so one profiler
can serve as master that they others contribute to. Typically the
base class measuring only CPU time could be the master, and the api
derived classes reference it to share the same database.
Profiler::Clock can be used standalone for time measuring.
*/
class Profiler
{
public:
/// if we detect a change in timers (api/name change we trigger a reset after that amount of frames)
static const uint32_t CONFIG_DELAY = 8;
/// gpu times are queried after that amount of frames
static const uint32_t FRAME_DELAY = 4;
/// by default we start with space for that many begin/end sections per-frame
static const uint32_t START_SECTIONS = 64;
/// cyclic window for averaging
static const uint32_t MAX_NUM_AVERAGE = 128;
public:
typedef uint32_t SectionID;
typedef uint32_t OnceID;
class Clock
{
// generic utility class for measuring time
// uses high resolution timer provided by OS
public:
Clock();
double getMicroSeconds() const;
private:
std::chrono::time_point<std::chrono::high_resolution_clock> m_init;
};
//////////////////////////////////////////////////////////////////////////
// utility class for automatic calling of begin/end within a local scope
class Section
{
public:
Section(Profiler& profiler, const char* name, bool singleShot = false)
: m_profiler(profiler)
{
m_id = profiler.beginSection(name, nullptr, nullptr, singleShot);
}
~Section() { m_profiler.endSection(m_id); }
private:
SectionID m_id;
Profiler& m_profiler;
};
// recurring, must be within beginFrame/endFrame
Section timeRecurring(const char* name) { return Section(*this, name, false); }
// single shot, results are available after FRAME_DELAY many endFrame
Section timeSingle(const char* name) { return Section(*this, name, true); }
//////////////////////////////////////////////////////////////////////////
// num <= MAX_NUM_AVERAGE
void setAveragingSize(uint32_t num);
//////////////////////////////////////////////////////////////////////////
// gpu times for a section are queried at "endFrame" with the use of this optional function.
// It returns true if the queried result was available, and writes the microseconds into gpuTime.
typedef std::function<bool(SectionID, uint32_t subFrame, double& gpuTime)> gpuTimeProvider_fn;
// must be called every frame
void beginFrame();
void endFrame();
// there are two types of sections
// singleShot = true, means the timer can exist outside begin/endFrame and is non-recurring
// results of previous singleShot with same name will be overwritten.
// singleShot = false, sections can be nested, but must be within begin/endFrame
//
SectionID beginSection(const char* name, const char* api = nullptr, gpuTimeProvider_fn gpuTimeProvider = nullptr, bool singleShot = false);
void endSection(SectionID slot);
// When a section is used within a loop (same nesting level), and the the same arguments for name and api are
// passed, we normally average the results of those sections together when printing the stats or using the
// getAveraged functions below.
// Calling the splitter (outside of a section) means we insert a split point that the averaging will not
// pass.
void accumulationSplit();
inline double getMicroSeconds() const { return m_clock.getMicroSeconds(); }
//////////////////////////////////////////////////////////////////////////
// resets all stats
void clear();
// resets recurring sections
// in case averaging should be reset after a few frames (warm-up cache, hide early heavier frames after
// configuration changes)
// implicit resets are triggered if the frame's configuration of timer section changes compared to
// previous frame.
void reset(uint32_t delay = CONFIG_DELAY);
// pretty print current averaged timers
void print(std::string& stats);
// returns number of frames since reset
uint32_t getTotalFrames() const;
struct TimerStats
{
// time in microseconds
double average = 0;
double absMinValue = DBL_MAX;
double absMaxValue = 0;
};
struct TimerInfo
{
// number of averaged values, <= MAX_NUM_AVERAGE
uint32_t numAveraged = 0;
// accumulation happens for example in loops:
// for (..) { auto scopeTimer = timeSection("blah"); ... }
// then the reported values are the accumulated sum of all those timers.
bool accumulated = false;
TimerStats cpu;
TimerStats gpu;
};
// query functions for current gathered cyclic averages ( <= MAX_NUM_AVERAGE)
// use nullptr name to get the cpu timing of the outermost scope (beginFrame/endFrame)
// returns true if found timer and it had valid values
bool getTimerInfo(const char* name, TimerInfo& info);
// simplified wrapper
bool getAveragedValues(const char* name, double& cpuTime, double& gpuTime)
{
TimerInfo info;
if(getTimerInfo(name, info))
{
cpuTime = info.cpu.average;
gpuTime = info.gpu.average;
return true;
}
else
{
cpuTime = 0;
gpuTime = 0;
return false;
}
}
//////////////////////////////////////////////////////////////////////////
// if a master is provided we use its database
// otherwise our own
Profiler(Profiler* master = nullptr);
Profiler(uint32_t startSections);
protected:
//////////////////////////////////////////////////////////////////////////
// Utility functions for derived classes that provide gpu times.
// We assume most apis use a big pool of api-specific events/timers,
// the functions below help manage such pool.
inline uint32_t getSubFrame(SectionID slot) const { return m_data->entries[slot].subFrame; }
inline uint32_t getRequiredTimers() const { return (uint32_t)(m_data->entries.size() * FRAME_DELAY * 2); }
static inline uint32_t getTimerIdx(SectionID slot, uint32_t subFrame, bool begin)
{
// must not change order of begin/end
return ((slot * FRAME_DELAY) + subFrame) * 2 + (begin ? 0 : 1);
}
inline bool isSectionRecurring(SectionID slot) const { return m_data->entries[slot].level != LEVEL_SINGLESHOT; }
private:
//////////////////////////////////////////////////////////////////////////
static const uint32_t LEVEL_SINGLESHOT = ~0;
struct TimeValues
{
double times[MAX_NUM_AVERAGE] = {0};
double valueTotal = 0;
double absMinValue = DBL_MAX;
double absMaxValue = 0;
uint32_t index = 0;
uint32_t numCycle = MAX_NUM_AVERAGE;
uint32_t numValid = 0;
TimeValues(uint32_t cycleSize = MAX_NUM_AVERAGE) { init(cycleSize); }
void init(uint32_t cycleSize)
{
numCycle = std::min(cycleSize, MAX_NUM_AVERAGE);
reset();
}
void reset()
{
valueTotal = 0;
absMinValue = DBL_MAX;
absMaxValue = 0;
index = 0;
numValid = 0;
memset(times, 0, sizeof(times));
}
void add(double time)
{
valueTotal += time - times[index];
times[index] = time;
index = (index + 1) % numCycle;
numValid = std::min(numValid + 1, numCycle);
absMinValue = std::min(time, absMinValue);
absMaxValue = std::max(time, absMaxValue);
}
double getAveraged()
{
if(numValid)
{
return valueTotal / double(numValid);
}
else
{
return 0;
}
}
};
struct Entry
{
std::string name = {};
std::string api = {};
gpuTimeProvider_fn gpuTimeProvider = nullptr;
// level == ~0 used for "singleShot"
uint32_t level = 0;
uint32_t subFrame = 0;
#ifdef NVP_SUPPORTS_NVTOOLSEXT
nvtxRangeId_t m_nvrange;
#endif
double cpuTimes[FRAME_DELAY] = {0};
double gpuTimes[FRAME_DELAY] = {0};
// number of times summed since last reset
uint32_t numTimes = 0;
TimeValues gpuTime;
TimeValues cpuTime;
// splitter is used to prevent accumulated case below
// when same depth level is used
// {section("BLAH"); ... }
// splitter
// {section("BLAH"); ...}
// now the result of "BLAH" is not accumulated
bool splitter = false;
// if the same timer name is used within a loop (same
// depth level), e.g.:
//
// for () { section("BLAH"); ... }
//
// we accumulate the timing values of all of them
bool accumulated = false;
};
struct Data
{
uint32_t numAveraging = MAX_NUM_AVERAGE;
uint32_t resetDelay = 0;
uint32_t numFrames = 0;
uint32_t level = 0;
uint32_t nextSection = 0;
uint32_t numLastSections = 0;
uint32_t numLastEntries = 0;
std::vector<uint32_t> frameSections;
std::vector<uint32_t> singleSections;
double cpuCurrentTime = 0;
TimeValues cpuTime;
std::vector<Entry> entries;
};
std::shared_ptr<Data> m_data = nullptr;
Clock m_clock;
SectionID getSectionID(bool singleShot, const char* name);
bool getTimerInfo(uint32_t i, TimerInfo& info);
void grow(uint32_t newsize);
};
} // namespace nvh
#endif