aboutsummaryrefslogtreecommitdiff
path: root/demo/benchmark.h
diff options
context:
space:
mode:
Diffstat (limited to 'demo/benchmark.h')
-rw-r--r--demo/benchmark.h444
1 files changed, 340 insertions, 104 deletions
diff --git a/demo/benchmark.h b/demo/benchmark.h
index c0ab7c2..d04e780 100644
--- a/demo/benchmark.h
+++ b/demo/benchmark.h
@@ -27,166 +27,402 @@
#pragma once
+
+
#include <iomanip>
+#include <algorithm>
+#include <stdint.h>
const char* g_benchmarkFilename = "../../benchmark.txt";
-std::ofstream g_benchmarkFile;
+std::wofstream g_benchmarkFile;
-// returns the new scene if one is requested
-int BenchmarkUpdate()
+const int benchmarkPhaseFrameCount = 400;
+const int benchmarkEndWarmup = 200;
+
+const int benchmarkAsyncOffDummyOnBeginFrame = benchmarkPhaseFrameCount;
+const int benchmarkAsyncOnDummyOnBeginFrame = benchmarkPhaseFrameCount*2;
+const int benchmarkEndFrame = benchmarkPhaseFrameCount*3;
+const char* benchmarkList[] = { "Env Cloth Small", "Viscosity Med", "Inflatables", "Game Mesh Particles", "Rigid4" };
+const char* benchmarkChartPrefix[] = { "EnvClothSmall", "ViscosityMed", "Inflatables", "GameMeshParticles", "Rigid4" }; //no spaces
+int numBenchmarks = sizeof(benchmarkList)/sizeof(benchmarkList[0]);
+
+struct GpuTimers
+{
+ unsigned long long renderBegin;
+ unsigned long long renderEnd;
+ unsigned long long renderFreq;
+ unsigned long long computeBegin;
+ unsigned long long computeEnd;
+ unsigned long long computeFreq;
+
+ static const int maxTimerCount = 4;
+ double timers[benchmarkEndFrame][maxTimerCount];
+ int timerCount[benchmarkEndFrame];
+};
+
+
+struct TimerTotals
{
- // Enable console benchmark profiling
- static NvFlexTimers sTimersSum;
- static std::vector<NvFlexDetailTimer> sDTimersSum;
- static float sTotalFrameTime = 0.0f;
- static int sSamples = 0;
+ std::vector<NvFlexDetailTimer> detailTimers;
+
+ float frameTime;
+ int samples;
- static int benchmarkIter = 0;
- const int numBenchmarks = 5;
- const char* benchmarkList[numBenchmarks] = { "Env Cloth Small", "Viscosity Med", "Inflatables", "Game Mesh Particles", "Rigid4" };
- const char* benchmarkChartPrefix[numBenchmarks] = { "EnvClothSmall", "ViscosityMed", "Inflatables", "GameMeshParticles", "Rigid4" }; //no spaces
- //float benchmarkEnergyCheck[numBenchmarks] = { 6000, 1000, 1000, 150426, 63710 };
+ float frameTimeAsync;
+ int samplesAsync;
- int newScene = -1;
+ float computeTimeAsyncOff;
+ float computeTimeAsyncOn;
+ int computeSamples;
+
+ TimerTotals() : frameTime(0), samples(0), frameTimeAsync(0), samplesAsync(0), computeTimeAsyncOff(0), computeTimeAsyncOn(0), computeSamples(0) {}
+};
+
+GpuTimers g_GpuTimers;
+
+int g_benchmarkFrame = 0;
+int g_benchmarkScene = 0;
+int g_benchmarkSceneNumber;
+
+#if defined(__linux__)
+int sprintf_s(char* const buffer, size_t const bufferCount,
+ const char* format,...)
+{
+ va_list args;
+ va_start(args, format);
+ int retval = vsprintf(buffer, format, args);
+ va_end(args);
+
+ return retval;
+}
+#endif
+
+//-----------------------------------------------------------------------------
+char* removeSpaces(const char* in)
+{
+ int len = strlen(in);
+ char* out = new char[len+1];
- if (g_benchmark && benchmarkIter == 0 && g_frame == 1)
+ int i = 0;
+ int j = 0;
+ while (in[i] != 0)
{
- // check and see if the first scene is the same as the first benchmark
- // switch to benchmark if it is not the same
- if (strcmp(benchmarkList[0], g_scenes[g_scene]->GetName()) == 0)
- benchmarkIter++;
- else
- g_frame = -1;
+ if (in[i] != ' ')
+ {
+ out[j] = in[i];
+ j++;
+ }
+ i++;
}
+ out[j] = 0;
+
+ return out;
+}
+//-----------------------------------------------------------------------------
+void ProcessGpuTimes()
+{
+ static bool timerfirstTime = true;
+
+ double renderTime;
+ double compTime;
+ double unionTime;
+ double overlapBeginTime;
+
+ int numParticles = NvFlexGetActiveCount(g_solver);
- if (g_frame == 200)
+ renderTime = double(g_GpuTimers.renderEnd - g_GpuTimers.renderBegin) / double(g_GpuTimers.renderFreq);
+ compTime = double(g_GpuTimers.computeEnd - g_GpuTimers.computeBegin) / double(g_GpuTimers.computeFreq);
+
+ uint64_t minTime = min(g_GpuTimers.renderBegin, g_GpuTimers.computeBegin);
+ uint64_t maxTime = max(g_GpuTimers.renderEnd, g_GpuTimers.computeEnd);
+ unionTime = double(maxTime - minTime) / double(g_GpuTimers.computeFreq);
+
+ overlapBeginTime = abs((long long)g_GpuTimers.renderBegin - (long long)g_GpuTimers.computeBegin) / double(g_GpuTimers.computeFreq);
+
+ if (!timerfirstTime && g_benchmarkFrame < benchmarkEndFrame)
{
- memset(&sTimersSum, 0, sizeof(NvFlexTimers));
- sTotalFrameTime = 0.0f;
- sSamples = 0;
- g_emit = true;
- sDTimersSum.resize(g_numDetailTimers);
+ if (g_useAsyncCompute)
+ {
+ g_GpuTimers.timers[g_benchmarkFrame][0] = numParticles;
+ g_GpuTimers.timers[g_benchmarkFrame][1] = unionTime * 1000;
+ g_GpuTimers.timers[g_benchmarkFrame][2] = overlapBeginTime * 1000;
+ g_GpuTimers.timers[g_benchmarkFrame][3] = g_realdt * 1000;
+ g_GpuTimers.timerCount[g_benchmarkFrame] = 4;
+ }
+ else
+ {
+ g_GpuTimers.timers[g_benchmarkFrame][0] = numParticles;
+ g_GpuTimers.timers[g_benchmarkFrame][1] = renderTime * 1000;
+ g_GpuTimers.timers[g_benchmarkFrame][2] = compTime * 1000;
+ g_GpuTimers.timers[g_benchmarkFrame][3] = g_realdt * 1000;
+ g_GpuTimers.timerCount[g_benchmarkFrame] = 4;
+ }
}
- if (g_frame >= 200 && g_frame < 400)
+ timerfirstTime = false;
+}
+//-----------------------------------------------------------------------------
+void UpdateTotals(TimerTotals& totals)
+{
+ // Phase 0B, async off, dummy work off
+ if (benchmarkEndWarmup <= g_benchmarkFrame && g_benchmarkFrame < benchmarkAsyncOffDummyOnBeginFrame)
{
- sTotalFrameTime += g_realdt * 1000.0f; //convert to milliseconds
+ totals.frameTime += g_realdt * 1000.0f; //convert to milliseconds
for (int i = 0; i < g_numDetailTimers; i++) {
- sDTimersSum[i].name = g_detailTimers[i].name;
- sDTimersSum[i].time += g_detailTimers[i].time;
+ strcpy(totals.detailTimers[i].name,g_detailTimers[i].name);
+ totals.detailTimers[i].time += g_detailTimers[i].time;
}
- sTimersSum.total += g_timers.total;
+ totals.samples++;
+ }
- sSamples++;
+ // Phase 2B, async on, dummy work on
+ if (benchmarkAsyncOnDummyOnBeginFrame + benchmarkEndWarmup <= g_benchmarkFrame)
+ {
+ float offGraphics = (float)g_GpuTimers.timers[g_benchmarkFrame - benchmarkPhaseFrameCount][1];
+ float offCompute = (float)g_GpuTimers.timers[g_benchmarkFrame - benchmarkPhaseFrameCount][2];
+ float onBoth = (float)g_GpuTimers.timers[g_benchmarkFrame][1];
+
+ float onCompute = onBoth - offGraphics;
+
+ totals.computeTimeAsyncOff += offCompute;
+ totals.computeTimeAsyncOn += onCompute;
+ totals.computeSamples++;
+
+ totals.frameTimeAsync += g_realdt * 1000.0f; //convert to milliseconds
+ totals.samplesAsync++;
}
- if (g_frame == 400)
+}
+//-----------------------------------------------------------------------------
+void BeginNewPhaseIfNecessary(int& sceneToSwitchTo,TimerTotals& totals)
+{
+ // Are we beginning phase 0B?
+ if (g_benchmarkFrame == benchmarkEndWarmup)
{
+ totals.frameTime = 0.0f;
+ totals.samples = 0;
+ g_emit = true;
+ totals.detailTimers.resize(g_numDetailTimers);
- for (int i = 0; i < g_numDetailTimers; i++) {
- sDTimersSum[i].time /= sSamples;
+ for (int i = 0; i != g_numDetailTimers; i++)
+ {
+ totals.detailTimers[i].name = new char[256];
}
+ }
- if (g_teamCity)
- {
- const char* prefix = benchmarkChartPrefix[benchmarkIter - 1];
+ // Are we beginning phase 1?
+ if (g_benchmarkFrame == benchmarkAsyncOffDummyOnBeginFrame)
+ {
+ sceneToSwitchTo = g_benchmarkSceneNumber;
+ g_useAsyncCompute = false;
+ g_increaseGfxLoadForAsyncComputeTesting = true;
+ }
- float exclusive = 0.0f;
+ // Are we beginning phase 2?
+ if (g_benchmarkFrame == benchmarkAsyncOnDummyOnBeginFrame)
+ {
+ sceneToSwitchTo = g_benchmarkSceneNumber;
+ g_useAsyncCompute = true;
+ g_increaseGfxLoadForAsyncComputeTesting = true;
+ }
- for (int i = 0; i < g_numDetailTimers - 1; i++) {
- exclusive += sDTimersSum[i].time;
- }
+ // Are we beginning phase 2B?
+ if (g_benchmarkFrame == benchmarkAsyncOnDummyOnBeginFrame + benchmarkEndWarmup)
+ {
+ totals.frameTimeAsync = 0.0f;
+ totals.samplesAsync = 0;
+ totals.computeTimeAsyncOff = 0.0f;
+ totals.computeTimeAsyncOn = 0.0f;
+ totals.computeSamples = 0;
+ g_emit = true;
+ }
+}
+//-----------------------------------------------------------------------------
+void WriteSceneResults(TimerTotals& totals)
+{
+ // Write results for scene
+ for (int i = 0; i < g_numDetailTimers; i++) {
+ totals.detailTimers[i].time /= totals.samples;
+ }
- printf("##teamcity[buildStatisticValue key='%s_FrameTime' value='%f']\n", prefix, sTotalFrameTime / sSamples);
- printf("##teamcity[buildStatisticValue key='%s_SumKernel' value='%f']\n", prefix, exclusive);
+ if (g_profile && g_teamCity)
+ {
+ const char* prefix = benchmarkChartPrefix[g_benchmarkScene - 1];
- for (int i = 0; i < g_numDetailTimers - 1; i++) {
- printf("##teamcity[buildStatisticValue key='%s_%s' value='%f']\n", prefix, sDTimersSum[i].name, sDTimersSum[i].time);
- }
- printf("\n");
+ float exclusive = 0.0f;
+
+ for (int i = 0; i < g_numDetailTimers - 1; i++) {
+ exclusive += totals.detailTimers[i].time;
}
- else
- {
- printf("Scene: %s\n", g_scenes[g_scene]->GetName());
- printf("FrameTime %f\n", sTotalFrameTime / sSamples);
- printf("________________________________\n");
- float exclusive = 0.0f;
-
- for (int i = 0; i < g_numDetailTimers-1; i++) {
- exclusive += sDTimersSum[i].time;
- printf("%s %f\n", sDTimersSum[i].name, sDTimersSum[i].time);
- }
- printf("Sum(exclusive) %f\n", exclusive);
- printf("Sum(inclusive) %f\n", sDTimersSum[g_numDetailTimers - 1].time);
- printf("________________________________\n");
+
+ printf("##teamcity[buildStatisticValue key='%s_FrameTime' value='%f']\n", prefix, totals.frameTime / totals.samples);
+ printf("##teamcity[buildStatisticValue key='%s_SumKernel' value='%f']\n", prefix, exclusive);
+
+ for (int i = 0; i < g_numDetailTimers - 1; i++) {
+ printf("##teamcity[buildStatisticValue key='%s_%s' value='%f']\n", prefix, totals.detailTimers[i].name, totals.detailTimers[i].time);
}
+ printf("\n");
+ }
- // Dumping benchmark data to txt files
+ printf("Scene: %s\n", g_scenes[g_scene]->GetName());
+ printf("FrameTime %f\n", totals.frameTime / totals.samples);
+ printf("________________________________\n");
+ float exclusive = 0.0f;
- g_benchmarkFile.open(g_benchmarkFilename, std::ofstream::out | std::ofstream::app);
- g_benchmarkFile << std::fixed << std::setprecision(6);
- g_benchmarkFile << "Scene: " << g_scenes[g_scene]->GetName() << std::endl;
- g_benchmarkFile << "FrameTime " << sTotalFrameTime / sSamples << std::endl;
- g_benchmarkFile << "________________________________" << std::endl;
+ for (int i = 0; i < g_numDetailTimers - 1; i++) {
+ exclusive += totals.detailTimers[i].time;
+ printf("%s %f\n", totals.detailTimers[i].name, totals.detailTimers[i].time);
+ }
+ printf("Sum(exclusive) %f\n", exclusive);
+ printf("Sum(inclusive) %f\n", totals.detailTimers[g_numDetailTimers - 1].time);
+ printf("________________________________\n");
+
+ // Dumping benchmark data to txt files
+
+ g_benchmarkFile.open(g_benchmarkFilename, std::ofstream::out | std::ofstream::app);
+ g_benchmarkFile << std::fixed << std::setprecision(6);
+ g_benchmarkFile << "Scene: " << g_scenes[g_scene]->GetName() << std::endl;
+ g_benchmarkFile << "FrameTime " << totals.frameTime / totals.samples << std::endl;
+ g_benchmarkFile << "________________________________" << std::endl;
+
+ if (g_profile)
+ {
float exclusive = 0.0f;
+ g_benchmarkFile << std::fixed << std::setprecision(6);
+
for (int i = 0; i < g_numDetailTimers - 1; i++) {
- exclusive += sDTimersSum[i].time;
- g_benchmarkFile << sDTimersSum[i].name<<" "<< sDTimersSum[i].time << std::endl;
+ exclusive += totals.detailTimers[i].time;
+ g_benchmarkFile << totals.detailTimers[i].name << " " << totals.detailTimers[i].time << std::endl;
+
+ delete totals.detailTimers[i].name;
}
- g_benchmarkFile << "Sum(exclusive) "<< exclusive << std::endl;
- g_benchmarkFile << "Sum(inclusive) "<< sDTimersSum[g_numDetailTimers - 1].time<< std::endl;
+ g_benchmarkFile << "Sum(exclusive) " << exclusive << std::endl;
+ g_benchmarkFile << "Sum(inclusive) " << totals.detailTimers[g_numDetailTimers - 1].time << std::endl;
g_benchmarkFile << "________________________________" << std::endl << std::endl;
- g_benchmarkFile.close();
+ }
- if (g_benchmark)
+ if (g_outputAllFrameTimes)
+ {
+ for (int i = 0; i != benchmarkEndFrame; i++)
{
+ g_benchmarkFile << g_GpuTimers.timers[i][3] << std::endl;
+ }
-#if 0
- // Do basic kinetic energy verification check to ensure that the benchmark runs correctly
- NvFlexGetVelocities(g_flex, g_buffers->velocities.buffer, g_buffers->velocities.size());
+ // Per frame timers
+ for (int i = benchmarkAsyncOffDummyOnBeginFrame; i != benchmarkAsyncOnDummyOnBeginFrame; i++)
+ {
+ for (int j = 0; j != g_GpuTimers.timerCount[i]; j++)
+ {
+ g_benchmarkFile << g_GpuTimers.timers[i][j] << " ";
+ }
- float sumVelocities = 0.0f;
- for (int i = 0; i < g_buffers->velocities.size(); ++i)
+ for (int j = 0; j != g_GpuTimers.timerCount[i + benchmarkPhaseFrameCount]; j++)
{
- sumVelocities += g_buffers->velocities[i].x * g_buffers->velocities[i].x + g_buffers->velocities[i].y * g_buffers->velocities[i].y + g_buffers->velocities[i].z * g_buffers->velocities[i].z;
+ g_benchmarkFile << g_GpuTimers.timers[i + benchmarkPhaseFrameCount][j] << " ";
}
- // Tolerance 50%
- int benchmark_id = benchmarkIter - 1;
- if (sumVelocities > (benchmarkEnergyCheck[benchmark_id] * 1.50) ||
- sumVelocities < (benchmarkEnergyCheck[benchmark_id] * 0.50))
- printf("Benchmark kinetic energy verification failed! Expected: [%f], Actual: [%f]\n\n", benchmarkEnergyCheck[benchmark_id], sumVelocities);
-#endif
- g_frame = -1;
+ g_benchmarkFile << std::endl;
}
+
}
- if (g_benchmark && g_frame == -1)
+ g_benchmarkFile.close();
+
+ if (g_benchmark)
{
- if (benchmarkIter == numBenchmarks)
- exit(0);
- for (int i = 0; i < int(g_scenes.size()); ++i)
+#if 0
+ // Do basic kinetic energy verification check to ensure that the benchmark runs correctly
+ NvFlexGetVelocities(g_flex, g_buffers->velocities.buffer, g_buffers->velocities.size());
+
+ float sumVelocities = 0.0f;
+ for (int i = 0; i < g_buffers->velocities.size(); ++i)
{
- if (strcmp(benchmarkList[benchmarkIter], g_scenes[i]->GetName()) == 0)
- newScene = i;
+ sumVelocities += g_buffers->velocities[i].x * g_buffers->velocities[i].x + g_buffers->velocities[i].y * g_buffers->velocities[i].y + g_buffers->velocities[i].z * g_buffers->velocities[i].z;
}
- assert(newScene != -1);
+ // Tolerance 50%
+ int benchmark_id = g_benchmarkScene - 1;
+ if (sumVelocities >(benchmarkEnergyCheck[benchmark_id] * 1.50) ||
+ sumVelocities < (benchmarkEnergyCheck[benchmark_id] * 0.50))
+ printf("Benchmark kinetic energy verification failed! Expected: [%f], Actual: [%f]\n\n", benchmarkEnergyCheck[benchmark_id], sumVelocities);
+#endif
- benchmarkIter++;
}
-
- return newScene;
}
-
-void BenchmarkInit()
+//-----------------------------------------------------------------------------
+int GoToNextScene()
{
- g_benchmarkFile.open(g_benchmarkFilename, std::ofstream::out | std::ofstream::app);
- g_benchmarkFile << "Compute Device: " << g_deviceName << std::endl;
- g_benchmarkFile << "HLSL Extensions: " << (g_extensions ? "ON" : "OFF") << std::endl << std::endl;
- g_benchmarkFile.close();
+ int sceneToSwitchTo = -1;
+
+ // Advance to next benchmark scene
+ for (int i = 0; i < int(g_scenes.size()); ++i)
+ {
+ if (strcmp(benchmarkList[g_benchmarkScene], g_scenes[i]->GetName()) == 0)
+ {
+ sceneToSwitchTo = i;
+ g_benchmarkSceneNumber = i;
+ }
+ }
+ assert(sceneToSwitchTo != -1);
+
+ g_useAsyncCompute = false;
+ g_increaseGfxLoadForAsyncComputeTesting = false;
+
+ return sceneToSwitchTo;
}
+//-----------------------------------------------------------------------------
+// Returns scene number if benchmark wants to switch scene, -1 otherwise
+//-----------------------------------------------------------------------------
+int BenchmarkUpdate()
+{
+ static TimerTotals s_totals;
+ int sceneToSwitchTo = -1;
+
+ if (!g_benchmark) return sceneToSwitchTo;
+
+ ProcessGpuTimes();
+ UpdateTotals(s_totals);
+
+ // Next frame
+ g_benchmarkFrame++;
+
+ BeginNewPhaseIfNecessary(sceneToSwitchTo, s_totals);
+
+ // Check whether at end of scene
+ if (g_benchmarkFrame == benchmarkEndFrame)
+ {
+ WriteSceneResults(s_totals);
+ // Next scene
+ g_benchmarkScene++;
+ // Go to next scene, or exit if all scenes done
+ if (g_benchmarkScene != numBenchmarks)
+ {
+ sceneToSwitchTo = GoToNextScene();
+
+ g_benchmarkFrame = 0;
+ g_frame = -1;
+ }
+ else
+ {
+ exit(0);
+ }
+ }
+
+ return sceneToSwitchTo;
+}
+//-----------------------------------------------------------------------------
+int BenchmarkInit()
+{
+ int sceneToSwitchTo = GoToNextScene();
+
+ return sceneToSwitchTo;
+}
+//-----------------------------------------------------------------------------
+void BenchmarkUpdateGraph()
+{
+}
+//-----------------------------------------------------------------------------