Merge pull request #7 from astron-rd/use-google-benchmark

Refactor benchmarks using Google Benchmark
This commit is contained in:
Bram Veenboer
2025-08-12 15:24:58 +02:00
committed by GitHub
8 changed files with 198 additions and 81 deletions

View File

@@ -1,23 +1,33 @@
FetchContent_Declare(
benchmark
GIT_REPOSITORY https://github.com/google/benchmark.git
GIT_TAG v1.9.4)
set(BENCHMARK_ENABLE_TESTING
OFF
CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(benchmark)
add_executable(benchmark_reference benchmark_reference.cpp) add_executable(benchmark_reference benchmark_reference.cpp)
target_link_libraries(benchmark_reference PRIVATE trigdx) target_link_libraries(benchmark_reference PRIVATE trigdx benchmark::benchmark)
add_executable(benchmark_lookup benchmark_lookup.cpp) add_executable(benchmark_lookup benchmark_lookup.cpp)
target_link_libraries(benchmark_lookup PRIVATE trigdx) target_link_libraries(benchmark_lookup PRIVATE trigdx benchmark::benchmark)
add_executable(benchmark_lookup_avx benchmark_lookup_avx.cpp) add_executable(benchmark_lookup_avx benchmark_lookup_avx.cpp)
target_link_libraries(benchmark_lookup_avx PRIVATE trigdx) target_link_libraries(benchmark_lookup_avx PRIVATE trigdx benchmark::benchmark)
if(TRIGDX_USE_MKL) if(TRIGDX_USE_MKL)
add_executable(benchmark_mkl benchmark_mkl.cpp) add_executable(benchmark_mkl benchmark_mkl.cpp)
target_link_libraries(benchmark_mkl PRIVATE trigdx) target_link_libraries(benchmark_mkl PRIVATE trigdx benchmark::benchmark)
endif() endif()
if(TRIGDX_USE_GPU) if(TRIGDX_USE_GPU)
add_executable(benchmark_gpu benchmark_gpu.cpp) add_executable(benchmark_gpu benchmark_gpu.cpp)
target_link_libraries(benchmark_gpu PRIVATE trigdx gpu) target_link_libraries(benchmark_gpu PRIVATE trigdx gpu benchmark::benchmark)
endif() endif()
if(TRIGDX_USE_XSIMD) if(TRIGDX_USE_XSIMD)
add_executable(benchmark_lookup_xsimd benchmark_lookup_xsimd.cpp) add_executable(benchmark_lookup_xsimd benchmark_lookup_xsimd.cpp)
target_link_libraries(benchmark_lookup_xsimd PRIVATE trigdx) target_link_libraries(benchmark_lookup_xsimd PRIVATE trigdx
benchmark::benchmark)
endif() endif()

View File

@@ -2,8 +2,20 @@
#include "benchmark_utils.hpp" #include "benchmark_utils.hpp"
int main() { BENCHMARK_TEMPLATE(benchmark_sinf, GPUBackend)
benchmark_sinf<GPUBackend>(); ->Unit(benchmark::kMillisecond)
benchmark_cosf<GPUBackend>(); ->Arg(1e5)
benchmark_sincosf<GPUBackend>(); ->Arg(1e6)
} ->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_cosf, GPUBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_sincosf, GPUBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_MAIN();

View File

@@ -2,12 +2,29 @@
#include "benchmark_utils.hpp" #include "benchmark_utils.hpp"
int main() { template <typename Backend> void register_benchmarks() {
benchmark_sinf<LookupBackend<16384>>(); BENCHMARK_TEMPLATE(benchmark_sinf, Backend)
benchmark_cosf<LookupBackend<16384>>(); ->Unit(benchmark::kMillisecond)
benchmark_sincosf<LookupBackend<16384>>(); ->Arg(1e5)
->Arg(1e6)
benchmark_sinf<LookupBackend<32768>>(); ->Arg(1e7);
benchmark_cosf<LookupBackend<32768>>(); BENCHMARK_TEMPLATE(benchmark_cosf, Backend)
benchmark_sincosf<LookupBackend<32768>>(); ->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_sincosf, Backend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
} }
int main(int argc, char **argv) {
::benchmark::Initialize(&argc, argv);
register_benchmarks<LookupBackend<16384>>();
register_benchmarks<LookupBackend<32768>>();
return ::benchmark::RunSpecifiedBenchmarks();
}

View File

@@ -2,12 +2,29 @@
#include "benchmark_utils.hpp" #include "benchmark_utils.hpp"
int main() { template <typename Backend> void register_benchmarks() {
benchmark_sinf<LookupAVXBackend<16384>>(); BENCHMARK_TEMPLATE(benchmark_sinf, Backend)
benchmark_cosf<LookupAVXBackend<16384>>(); ->Unit(benchmark::kMillisecond)
benchmark_sincosf<LookupAVXBackend<16384>>(); ->Arg(1e5)
->Arg(1e6)
benchmark_sinf<LookupAVXBackend<32768>>(); ->Arg(1e7);
benchmark_cosf<LookupAVXBackend<32768>>(); BENCHMARK_TEMPLATE(benchmark_cosf, Backend)
benchmark_sincosf<LookupAVXBackend<32768>>(); ->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_sincosf, Backend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
} }
int main(int argc, char **argv) {
::benchmark::Initialize(&argc, argv);
register_benchmarks<LookupAVXBackend<16384>>();
register_benchmarks<LookupAVXBackend<32768>>();
return ::benchmark::RunSpecifiedBenchmarks();
}

View File

@@ -2,12 +2,29 @@
#include "benchmark_utils.hpp" #include "benchmark_utils.hpp"
int main() { template <typename Backend> void register_benchmarks() {
benchmark_sinf<LookupXSIMDBackend<16384>>(); BENCHMARK_TEMPLATE(benchmark_sinf, Backend)
benchmark_cosf<LookupXSIMDBackend<16384>>(); ->Unit(benchmark::kMillisecond)
benchmark_sincosf<LookupXSIMDBackend<16384>>(); ->Arg(1e5)
->Arg(1e6)
benchmark_sinf<LookupXSIMDBackend<32768>>(); ->Arg(1e7);
benchmark_cosf<LookupXSIMDBackend<32768>>(); BENCHMARK_TEMPLATE(benchmark_cosf, Backend)
benchmark_sincosf<LookupXSIMDBackend<32768>>(); ->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_sincosf, Backend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
} }
int main(int argc, char **argv) {
::benchmark::Initialize(&argc, argv);
register_benchmarks<LookupXSIMDBackend<16384>>();
register_benchmarks<LookupXSIMDBackend<32768>>();
return ::benchmark::RunSpecifiedBenchmarks();
}

View File

@@ -2,8 +2,20 @@
#include "benchmark_utils.hpp" #include "benchmark_utils.hpp"
int main() { BENCHMARK_TEMPLATE(benchmark_sinf, MKLBackend)
benchmark_sinf<MKLBackend>(); ->Unit(benchmark::kMillisecond)
benchmark_cosf<MKLBackend>(); ->Arg(1e5)
benchmark_sincosf<MKLBackend>(); ->Arg(1e6)
} ->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_cosf, MKLBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_sincosf, MKLBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_MAIN();

View File

@@ -2,8 +2,20 @@
#include "benchmark_utils.hpp" #include "benchmark_utils.hpp"
int main() { BENCHMARK_TEMPLATE(benchmark_sinf, ReferenceBackend)
benchmark_sinf<ReferenceBackend>(); ->Unit(benchmark::kMillisecond)
benchmark_cosf<ReferenceBackend>(); ->Arg(1e5)
benchmark_sincosf<ReferenceBackend>(); ->Arg(1e6)
} ->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_cosf, ReferenceBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_TEMPLATE(benchmark_sincosf, ReferenceBackend)
->Unit(benchmark::kMillisecond)
->Arg(1e5)
->Arg(1e6)
->Arg(1e7);
BENCHMARK_MAIN();

View File

@@ -1,76 +1,96 @@
#pragma once #pragma once
#include <chrono> #include <chrono>
#include <iomanip> #include <cmath>
#include <iostream> #include <string>
#include <vector> #include <vector>
const size_t N = 1e7; #include <benchmark/benchmark.h>
inline void report(const std::string &name, double sec, double throughput) { // Default values if not overridden by range multipliers
std::ios state(nullptr); constexpr size_t DEFAULT_N = 10'000'000;
state.copyfmt(std::cout);
std::cout << std::setw(7) << name << " -> ";
std::cout << "time: ";
std::cout << std::fixed << std::setprecision(3) << std::setfill('0');
std::cout << sec << " s, ";
std::cout << "throughput: " << throughput << " M elems/sec\n";
std::cout.copyfmt(state);
}
template <typename Backend> inline void benchmark_sinf() { template <typename Backend>
static void benchmark_sinf(benchmark::State &state) {
const size_t N = static_cast<size_t>(state.range(0));
std::vector<float> x(N), s(N); std::vector<float> x(N), s(N);
for (size_t i = 0; i < N; ++i) for (size_t i = 0; i < N; ++i) {
x[i] = (i % 360) * 0.0174533f; // degrees to radians x[i] = (i % 360) * 0.0174533f; // degrees to radians
}
Backend backend; Backend backend;
backend.init(N);
auto start = std::chrono::high_resolution_clock::now(); auto start = std::chrono::high_resolution_clock::now();
backend.compute_sinf(N, x.data(), s.data()); backend.init(N);
auto end = std::chrono::high_resolution_clock::now(); auto end = std::chrono::high_resolution_clock::now();
state.counters["init_ms"] =
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
.count() /
1.e3;
double sec = std::chrono::duration<double>(end - start).count(); for (auto _ : state) {
double throughput = N / sec / 1e6; backend.compute_sinf(N, x.data(), s.data());
benchmark::DoNotOptimize(s);
}
report("sinf", sec, throughput); state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) *
static_cast<int64_t>(N));
} }
template <typename Backend> inline void benchmark_cosf() { template <typename Backend>
static void benchmark_cosf(benchmark::State &state) {
const size_t N = static_cast<size_t>(state.range(0));
std::vector<float> x(N), c(N); std::vector<float> x(N), c(N);
for (size_t i = 0; i < N; ++i) for (size_t i = 0; i < N; ++i) {
x[i] = (i % 360) * 0.0174533f; // degrees to radians x[i] = (i % 360) * 0.0174533f;
}
Backend backend; Backend backend;
backend.init(N);
auto start = std::chrono::high_resolution_clock::now(); auto start = std::chrono::high_resolution_clock::now();
backend.compute_cosf(N, x.data(), c.data()); backend.init(N);
auto end = std::chrono::high_resolution_clock::now(); auto end = std::chrono::high_resolution_clock::now();
state.counters["init_ms"] =
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
.count() /
1.e3;
double sec = std::chrono::duration<double>(end - start).count(); for (auto _ : state) {
double throughput = N / sec / 1e6; backend.compute_cosf(N, x.data(), c.data());
benchmark::DoNotOptimize(c);
}
report("cosf", sec, throughput); state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) *
static_cast<int64_t>(N));
} }
template <typename Backend> inline void benchmark_sincosf() { template <typename Backend>
static void benchmark_sincosf(benchmark::State &state) {
const size_t N = static_cast<size_t>(state.range(0));
std::vector<float> x(N), s(N), c(N); std::vector<float> x(N), s(N), c(N);
for (size_t i = 0; i < N; ++i) for (size_t i = 0; i < N; ++i) {
x[i] = (i % 360) * 0.0174533f; // degrees to radians x[i] = (i % 360) * 0.0174533f;
}
Backend backend; Backend backend;
backend.init(N);
auto start = std::chrono::high_resolution_clock::now(); auto start = std::chrono::high_resolution_clock::now();
backend.compute_sincosf(N, x.data(), s.data(), c.data()); backend.init(N);
auto end = std::chrono::high_resolution_clock::now(); auto end = std::chrono::high_resolution_clock::now();
state.counters["init_ms"] =
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
.count() /
1.e3;
double sec = std::chrono::duration<double>(end - start).count(); for (auto _ : state) {
double throughput = N / sec / 1e6; backend.compute_sincosf(N, x.data(), s.data(), c.data());
benchmark::DoNotOptimize(s);
benchmark::DoNotOptimize(c);
}
report("sincosf", sec, throughput); state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) *
static_cast<int64_t>(N));
} }