Commit bc797e4f authored by Jens Korinth's avatar Jens Korinth

Implement job throughput measurement in benchmark

* measures how many jobs with minimal runtime (1cc) can be scheduled per
  second, using a fixed number of threads
* benchmark will record at least 1-8 threads, more while performance
  increases
* also improved convergence and reduced update speed for the other
  measurements
parent 55ad20bf
......@@ -99,12 +99,12 @@ private:
void trigger(volatile bool& stop, uint32_t const clock_cycles, CumulativeAverage<double>& cavg) {
tapasco_res_t res;
while (! stop) {
auto tstart = high_resolution_clock::now();
auto tstart = steady_clock::now();
// if 0, use 1us - 100ms interval (clock period is 10ns)
uint32_t cc = clock_cycles > 0 ? clock_cycles : (rand() % (10000000 - 100) + 100);
if ((res = tapasco.launch_no_return(COUNTER_ID, cc)) != TAPASCO_SUCCESS)
throw Tapasco::tapasco_error(res);
microseconds const d = duration_cast<microseconds>(high_resolution_clock::now() - tstart);
microseconds const d = duration_cast<microseconds>(steady_clock::now() - tstart);
cavg.update(d.count() - cc / 100);
}
}
......
/**
* @file JobThroughput.hpp
* @brief Measures the maximal number of jobs per second.
* Requires counter cores (e.g., precision_counter); will trigger
* interrupts after 1cc runtime and count finished jobs. Useful
* upper bound for job throughput in the system.
* The design must run at 100 MHz (assumption of timing calc).
* @author J. Korinth, TU Darmstadt (jk@esa.cs.tu-darmstadt.de)
**/
#ifndef JOB_THROUGHPUT_HPP__
#define JOB_THROUGHPUT_HPP__
#include <atomic>
#include <future>
#include <vector>
#include <ncurses.h>
#include <tapasco_api.hpp>
using namespace std;
using namespace std::chrono;
using namespace tapasco;
class JobThroughput {
public:
static tapasco_func_id_t const COUNTER_ID = 14;
JobThroughput(Tapasco& tapasco): tapasco(tapasco), jobs(0) {
if (tapasco.func_instance_count(COUNTER_ID) < 16)
throw "need at least 16 instances of 'Counter' (14) in bitstream";
}
virtual ~JobThroughput() {}
double operator()(size_t const num_threads) {
CumulativeAverage<double> cavg { 0 };
jobs.store(0U);
bool stop = false;
initscr(); noecho(); curs_set(0); timeout(0);
int x, y;
getyx(stdscr, y, x);
vector<future<void> > threads;
auto const t_start = steady_clock::now();
for (size_t t = 0; t < num_threads; ++t)
threads.push_back(async(launch::async, [&]() { run(stop, jobs); }));
do {
move(y, 0);
clrtoeol();
mvprintw(y, x, "Num threads: % 2zu, jobs/second: % 12.1f, max: % 12.f, min: % 12.1f",
num_threads, cavg(), cavg.max(), cavg.min());
refresh();
usleep(5000000);
auto const j = jobs.load();
auto const t = steady_clock::now();
auto const s = duration_cast<seconds>(t - t_start);
auto const v = s.count() > 0 ? static_cast<double>(j) / static_cast<double>(s.count()) : 0.0;
if (v > 10.0) cavg.update(v);
} while(getch() == ERR && (fabs(cavg.delta()) > 10.0 || cavg.size() < 5));
stop = true;
for (auto &f : threads)
f.get();
move(y+1, 0);
endwin();
return cavg();
}
private:
void run(volatile bool& stop, atomic<uint64_t>& count) {
tapasco_res_t res;
while (! stop) {
if ((res = tapasco.launch_no_return(COUNTER_ID, 1U)) != TAPASCO_SUCCESS)
throw Tapasco::tapasco_error(res);
jobs++;
}
}
Tapasco& tapasco;
atomic<uint64_t> jobs { 0 };
};
#endif /* JOB_THROUGHPUT_HPP__ */
......@@ -40,18 +40,18 @@ public:
initscr(); noecho(); curs_set(0); timeout(0);
int x, y;
getyx(stdscr, y, x);
auto tstart = high_resolution_clock::now();
auto tstart = steady_clock::now();
double b = 0.0;
duration<double> d = high_resolution_clock::now() - tstart;
duration<double> d = steady_clock::now() - tstart;
future<void> f = async(launch::async, [&]() { transfer(stop, chunk_sz, opmask); });
do {
b = bytes.load() / (1024.0 * 1024.0);
d = high_resolution_clock::now() - tstart;
d = steady_clock::now() - tstart;
mvprintw(y, x, "Chunk size: %8.2f KiB, Mask: %s, Speed: %8.2f MiB/s",
cs, ms.c_str(), cavg());
refresh();
usleep(1000);
} while (getch() == ERR && (fabs(cavg.update(b / d.count())) > 0.01 || cavg.size() < 10000));
usleep(1000000);
} while (getch() == ERR && (fabs(cavg.update(b / d.count())) > 0.1 || cavg.size() < 30));
stop = true;
f.get();
move(y+1, 0);
......
......@@ -18,6 +18,7 @@
#include "CumulativeAverage.hpp"
#include "TransferSpeed.hpp"
#include "InterruptLatency.hpp"
#include "JobThroughput.hpp"
#include "json11.hpp"
using namespace std;
......@@ -50,16 +51,28 @@ struct interrupt_latency_t {
}; }
};
struct job_throughput_t {
size_t num_threads;
double jobs_per_sec;
Json to_json() const { return Json::object {
{"Number of threads", static_cast<double>(num_threads)},
{"Jobs per second", jobs_per_sec}
}; }
};
int main(int argc, const char *argv[]) {
Tapasco tapasco;
TransferSpeed tp { tapasco };
InterruptLatency il { tapasco };
JobThroughput jt { tapasco };
struct utsname uts;
uname(&uts);
vector<Json> speed;
struct transfer_speed_t ts;
vector<Json> latency;
struct interrupt_latency_t ls;
vector<Json> jobs;
struct job_throughput_t js;
string platform = "vc709";
if (argc < 2) {
......@@ -70,17 +83,17 @@ int main(int argc, const char *argv[]) {
cerr << "Could not get host name, guessing vc709 Platform" << endl;
else {
cout << "Host name: " << n << endl;
platform = n;
if (string(n).compare("zed") == 0 || string(n).compare("zedboard") == 0)
platform = "zedboard";
if (string(n).compare("zc706") == 0)
platform = "zc706";
cout << "Guessing " << platform << " Platform" << endl;
platform = n;
if (string(n).compare("zed") == 0 || string(n).compare("zedboard") == 0)
platform = "zedboard";
if (string(n).compare("zc706") == 0)
platform = "zc706";
cout << "Guessing " << platform << " Platform" << endl;
}
} else platform = getenv("TAPASCO_PLATFORM");
}
// measure for chunk sizes 2^8 - 2^31 (2GB) bytes
// measure for chunk sizes 2^10 (1KiB) - 2^31 (2GB) bytes
for (int i = 10; i < 32; ++i) {
ts.chunk_sz = 1 << i;
ts.speed_r = tp(ts.chunk_sz, TransferSpeed::OP_COPYFROM);
......@@ -107,6 +120,17 @@ int main(int argc, const char *argv[]) {
latency.push_back(json);
}
size_t i = 1;
double prev = -1;
js.jobs_per_sec = -1;
do {
prev = js.jobs_per_sec;
js.num_threads = i;
js.jobs_per_sec = jt(i);
++i;
jobs.push_back(js.to_json());
} while (i <= 128 && (i <= 8 || js.jobs_per_sec > prev));
// record current time
time_t tt = chrono::system_clock::to_time_t(chrono::system_clock::now());
tm tm = *localtime(&tt);
......@@ -118,14 +142,15 @@ int main(int argc, const char *argv[]) {
{"Timestamp", str.str()},
{"Host", Json::object {
{"Operating System", uts.sysname},
{"Node", uts.nodename},
{"Release", uts.release},
{"Version", uts.version},
{"Machine", uts.machine}
{"Node", uts.nodename},
{"Release", uts.release},
{"Version", uts.version},
{"Machine", uts.machine}
}
},
{"Transfer Speed", speed},
{"Interrupt Latency", latency},
{"Job Throughput", jobs},
{"Library Versions", Json::object {
{"Tapasco API", tapasco::tapasco_version()},
{"Platform API", platform::platform_version()}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment