Refactor benchmarks using Google Benchmark

This commit is contained in:
Bram Veenboer
2025-08-12 13:16:59 +02:00
parent e9a74ef283
commit 5e7aca89bb
8 changed files with 195 additions and 81 deletions

View File

@@ -1,23 +1,33 @@
FetchContent_Declare(
benchmark
GIT_REPOSITORY https://github.com/google/benchmark.git
GIT_TAG v1.9.4)
set(BENCHMARK_ENABLE_TESTING
OFF
CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(benchmark)
add_executable(benchmark_reference benchmark_reference.cpp)
target_link_libraries(benchmark_reference PRIVATE trigdx)
target_link_libraries(benchmark_reference PRIVATE trigdx benchmark::benchmark)
add_executable(benchmark_lookup benchmark_lookup.cpp)
target_link_libraries(benchmark_lookup PRIVATE trigdx)
target_link_libraries(benchmark_lookup PRIVATE trigdx benchmark::benchmark)
add_executable(benchmark_lookup_avx benchmark_lookup_avx.cpp)
target_link_libraries(benchmark_lookup_avx PRIVATE trigdx)
target_link_libraries(benchmark_lookup_avx PRIVATE trigdx benchmark::benchmark)
if(TRIGDX_USE_MKL)
add_executable(benchmark_mkl benchmark_mkl.cpp)
target_link_libraries(benchmark_mkl PRIVATE trigdx)
target_link_libraries(benchmark_mkl PRIVATE trigdx benchmark::benchmark)
endif()
if(TRIGDX_USE_GPU)
add_executable(benchmark_gpu benchmark_gpu.cpp)
target_link_libraries(benchmark_gpu PRIVATE trigdx gpu)
target_link_libraries(benchmark_gpu PRIVATE trigdx gpu benchmark::benchmark)
endif()
if(TRIGDX_USE_XSIMD)
add_executable(benchmark_lookup_xsimd benchmark_lookup_xsimd.cpp)
target_link_libraries(benchmark_lookup_xsimd PRIVATE trigdx)
target_link_libraries(benchmark_lookup_xsimd PRIVATE trigdx
benchmark::benchmark)
endif()

View File

@@ -2,8 +2,20 @@
#include "benchmark_utils.hpp"
int main() {
benchmark_sinf<GPUBackend>();
benchmark_cosf<GPUBackend>();
benchmark_sincosf<GPUBackend>();
}
BENCHMARK_TEMPLATE(benchmark_sinf, GPUBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_cosf, GPUBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_sincosf, GPUBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_MAIN();

View File

@@ -2,12 +2,29 @@
#include "benchmark_utils.hpp"
int main() {
benchmark_sinf<LookupBackend<16384>>();
benchmark_cosf<LookupBackend<16384>>();
benchmark_sincosf<LookupBackend<16384>>();
benchmark_sinf<LookupBackend<32768>>();
benchmark_cosf<LookupBackend<32768>>();
benchmark_sincosf<LookupBackend<32768>>();
template <typename Backend> void register_benchmarks() {
BENCHMARK_TEMPLATE(benchmark_sinf, Backend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_cosf, Backend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_sincosf, Backend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
}
int main(int argc, char **argv) {
::benchmark::Initialize(&argc, argv);
register_benchmarks<LookupBackend<16384>>();
register_benchmarks<LookupBackend<32768>>();
return ::benchmark::RunSpecifiedBenchmarks();
}

View File

@@ -2,12 +2,29 @@
#include "benchmark_utils.hpp"
int main() {
benchmark_sinf<LookupAVXBackend<16384>>();
benchmark_cosf<LookupAVXBackend<16384>>();
benchmark_sincosf<LookupAVXBackend<16384>>();
benchmark_sinf<LookupAVXBackend<32768>>();
benchmark_cosf<LookupAVXBackend<32768>>();
benchmark_sincosf<LookupAVXBackend<32768>>();
template <typename Backend> void register_benchmarks() {
BENCHMARK_TEMPLATE(benchmark_sinf, Backend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_cosf, Backend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_sincosf, Backend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
}
int main(int argc, char **argv) {
::benchmark::Initialize(&argc, argv);
register_benchmarks<LookupAVXBackend<16384>>();
register_benchmarks<LookupAVXBackend<32768>>();
return ::benchmark::RunSpecifiedBenchmarks();
}

View File

@@ -2,12 +2,29 @@
#include "benchmark_utils.hpp"
int main() {
benchmark_sinf<LookupXSIMDBackend<16384>>();
benchmark_cosf<LookupXSIMDBackend<16384>>();
benchmark_sincosf<LookupXSIMDBackend<16384>>();
benchmark_sinf<LookupXSIMDBackend<32768>>();
benchmark_cosf<LookupXSIMDBackend<32768>>();
benchmark_sincosf<LookupXSIMDBackend<32768>>();
template <typename Backend> void register_benchmarks() {
BENCHMARK_TEMPLATE(benchmark_sinf, Backend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_cosf, Backend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_sincosf, Backend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
}
int main(int argc, char **argv) {
::benchmark::Initialize(&argc, argv);
register_benchmarks<LookupXSIMDBackend<16384>>();
register_benchmarks<LookupXSIMDBackend<32768>>();
return ::benchmark::RunSpecifiedBenchmarks();
}

View File

@@ -2,8 +2,20 @@
#include "benchmark_utils.hpp"
int main() {
benchmark_sinf<MKLBackend>();
benchmark_cosf<MKLBackend>();
benchmark_sincosf<MKLBackend>();
}
BENCHMARK_TEMPLATE(benchmark_sinf, MKLBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_cosf, MKLBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_sincosf, MKLBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_MAIN();

View File

@@ -2,8 +2,20 @@
#include "benchmark_utils.hpp"
int main() {
benchmark_sinf<ReferenceBackend>();
benchmark_cosf<ReferenceBackend>();
benchmark_sincosf<ReferenceBackend>();
}
BENCHMARK_TEMPLATE(benchmark_sinf, ReferenceBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_cosf, ReferenceBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_sincosf, ReferenceBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_MAIN();

View File

@@ -1,76 +1,93 @@
#pragma once
#include <chrono>
#include <iomanip>
#include <iostream>
#include <cmath>
#include <string>
#include <vector>
const size_t N = 1e7;
#include <benchmark/benchmark.h>
inline void report(const std::string &name, double sec, double throughput) {
std::ios state(nullptr);
state.copyfmt(std::cout);
std::cout << std::setw(7) << name << " -> ";
std::cout << "time: ";
std::cout << std::fixed << std::setprecision(3) << std::setfill('0');
std::cout << sec << " s, ";
std::cout << "throughput: " << throughput << " M elems/sec\n";
std::cout.copyfmt(state);
}
// Default values if not overridden by range multipliers
constexpr size_t DEFAULT_N = 10'000'000;
template <typename Backend> inline void benchmark_sinf() {
template <typename Backend>
static void benchmark_sinf(benchmark::State &state) {
const size_t N = static_cast<size_t>(state.range(0));
std::vector<float> x(N), s(N);
for (size_t i = 0; i < N; ++i)
for (size_t i = 0; i < N; ++i) {
x[i] = (i % 360) * 0.0174533f; // degrees to radians
}
Backend backend;
backend.init(N);
auto start = std::chrono::high_resolution_clock::now();
backend.compute_sinf(N, x.data(), s.data());
backend.init(N);
auto end = std::chrono::high_resolution_clock::now();
state.counters["init_ms"] =
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
.count() /
1.e3;
double sec = std::chrono::duration<double>(end - start).count();
double throughput = N / sec / 1e6;
for (auto _ : state) {
backend.compute_sinf(N, x.data(), s.data());
benchmark::DoNotOptimize(s);
}
report("sinf", sec, throughput);
state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(N));
}
template <typename Backend> inline void benchmark_cosf() {
template <typename Backend>
static void benchmark_cosf(benchmark::State &state) {
const size_t N = static_cast<size_t>(state.range(0));
std::vector<float> x(N), c(N);
for (size_t i = 0; i < N; ++i)
x[i] = (i % 360) * 0.0174533f; // degrees to radians
for (size_t i = 0; i < N; ++i) {
x[i] = (i % 360) * 0.0174533f;
}
Backend backend;
backend.init(N);
auto start = std::chrono::high_resolution_clock::now();
backend.compute_cosf(N, x.data(), c.data());
backend.init(N);
auto end = std::chrono::high_resolution_clock::now();
state.counters["init_ms"] =
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
.count() /
1.e3;
double sec = std::chrono::duration<double>(end - start).count();
double throughput = N / sec / 1e6;
for (auto _ : state) {
backend.compute_cosf(N, x.data(), c.data());
benchmark::DoNotOptimize(c);
}
report("cosf", sec, throughput);
state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(N));
}
template <typename Backend> inline void benchmark_sincosf() {
template <typename Backend>
static void benchmark_sincosf(benchmark::State &state) {
const size_t N = static_cast<size_t>(state.range(0));
std::vector<float> x(N), s(N), c(N);
for (size_t i = 0; i < N; ++i)
x[i] = (i % 360) * 0.0174533f; // degrees to radians
for (size_t i = 0; i < N; ++i) {
x[i] = (i % 360) * 0.0174533f;
}
Backend backend;
backend.init(N);
auto start = std::chrono::high_resolution_clock::now();
backend.compute_sincosf(N, x.data(), s.data(), c.data());
backend.init(N);
auto end = std::chrono::high_resolution_clock::now();
state.counters["init_ms"] =
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
.count() /
1.e3;
double sec = std::chrono::duration<double>(end - start).count();
double throughput = N / sec / 1e6;
for (auto _ : state) {
backend.compute_sincosf(N, x.data(), s.data(), c.data());
benchmark::DoNotOptimize(s);
benchmark::DoNotOptimize(c);
}
report("sincosf", sec, throughput);
state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(N));
}