diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 3d2416e..a44e7fc 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -1,23 +1,33 @@ +FetchContent_Declare( + benchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.9.4) +set(BENCHMARK_ENABLE_TESTING + OFF + CACHE BOOL "" FORCE) +FetchContent_MakeAvailable(benchmark) + add_executable(benchmark_reference benchmark_reference.cpp) -target_link_libraries(benchmark_reference PRIVATE trigdx) +target_link_libraries(benchmark_reference PRIVATE trigdx benchmark::benchmark) add_executable(benchmark_lookup benchmark_lookup.cpp) -target_link_libraries(benchmark_lookup PRIVATE trigdx) +target_link_libraries(benchmark_lookup PRIVATE trigdx benchmark::benchmark) add_executable(benchmark_lookup_avx benchmark_lookup_avx.cpp) -target_link_libraries(benchmark_lookup_avx PRIVATE trigdx) +target_link_libraries(benchmark_lookup_avx PRIVATE trigdx benchmark::benchmark) if(TRIGDX_USE_MKL) add_executable(benchmark_mkl benchmark_mkl.cpp) - target_link_libraries(benchmark_mkl PRIVATE trigdx) + target_link_libraries(benchmark_mkl PRIVATE trigdx benchmark::benchmark) endif() if(TRIGDX_USE_GPU) add_executable(benchmark_gpu benchmark_gpu.cpp) - target_link_libraries(benchmark_gpu PRIVATE trigdx gpu) + target_link_libraries(benchmark_gpu PRIVATE trigdx gpu benchmark::benchmark) endif() if(TRIGDX_USE_XSIMD) add_executable(benchmark_lookup_xsimd benchmark_lookup_xsimd.cpp) - target_link_libraries(benchmark_lookup_xsimd PRIVATE trigdx) + target_link_libraries(benchmark_lookup_xsimd PRIVATE trigdx + benchmark::benchmark) endif() diff --git a/benchmarks/benchmark_gpu.cpp b/benchmarks/benchmark_gpu.cpp index 2edbcb2..147bcff 100644 --- a/benchmarks/benchmark_gpu.cpp +++ b/benchmarks/benchmark_gpu.cpp @@ -2,8 +2,20 @@ #include "benchmark_utils.hpp" -int main() { - benchmark_sinf(); - benchmark_cosf(); - benchmark_sincosf(); -} +BENCHMARK_TEMPLATE(benchmark_sinf, GPUBackend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); +BENCHMARK_TEMPLATE(benchmark_cosf, GPUBackend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); +BENCHMARK_TEMPLATE(benchmark_sincosf, GPUBackend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); + +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/benchmarks/benchmark_lookup.cpp b/benchmarks/benchmark_lookup.cpp index 20b2f1d..2376552 100644 --- a/benchmarks/benchmark_lookup.cpp +++ b/benchmarks/benchmark_lookup.cpp @@ -2,12 +2,29 @@ #include "benchmark_utils.hpp" -int main() { - benchmark_sinf>(); - benchmark_cosf>(); - benchmark_sincosf>(); - - benchmark_sinf>(); - benchmark_cosf>(); - benchmark_sincosf>(); +template void register_benchmarks() { + BENCHMARK_TEMPLATE(benchmark_sinf, Backend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); + BENCHMARK_TEMPLATE(benchmark_cosf, Backend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); + BENCHMARK_TEMPLATE(benchmark_sincosf, Backend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); } + +int main(int argc, char **argv) { + ::benchmark::Initialize(&argc, argv); + + register_benchmarks>(); + register_benchmarks>(); + + return ::benchmark::RunSpecifiedBenchmarks(); +} \ No newline at end of file diff --git a/benchmarks/benchmark_lookup_avx.cpp b/benchmarks/benchmark_lookup_avx.cpp index 92403a8..7dc5bf7 100644 --- a/benchmarks/benchmark_lookup_avx.cpp +++ b/benchmarks/benchmark_lookup_avx.cpp @@ -2,12 +2,29 @@ #include "benchmark_utils.hpp" -int main() { - benchmark_sinf>(); - benchmark_cosf>(); - benchmark_sincosf>(); - - benchmark_sinf>(); - benchmark_cosf>(); - benchmark_sincosf>(); +template void register_benchmarks() { + BENCHMARK_TEMPLATE(benchmark_sinf, Backend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); + BENCHMARK_TEMPLATE(benchmark_cosf, Backend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); + BENCHMARK_TEMPLATE(benchmark_sincosf, Backend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); } + +int main(int argc, char **argv) { + ::benchmark::Initialize(&argc, argv); + + register_benchmarks>(); + register_benchmarks>(); + + return ::benchmark::RunSpecifiedBenchmarks(); +} \ No newline at end of file diff --git a/benchmarks/benchmark_lookup_xsimd.cpp b/benchmarks/benchmark_lookup_xsimd.cpp index 7abb939..51cc78c 100644 --- a/benchmarks/benchmark_lookup_xsimd.cpp +++ b/benchmarks/benchmark_lookup_xsimd.cpp @@ -2,12 +2,29 @@ #include "benchmark_utils.hpp" -int main() { - benchmark_sinf>(); - benchmark_cosf>(); - benchmark_sincosf>(); - - benchmark_sinf>(); - benchmark_cosf>(); - benchmark_sincosf>(); +template void register_benchmarks() { + BENCHMARK_TEMPLATE(benchmark_sinf, Backend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); + BENCHMARK_TEMPLATE(benchmark_cosf, Backend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); + BENCHMARK_TEMPLATE(benchmark_sincosf, Backend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); } + +int main(int argc, char **argv) { + ::benchmark::Initialize(&argc, argv); + + register_benchmarks>(); + register_benchmarks>(); + + return ::benchmark::RunSpecifiedBenchmarks(); +} \ No newline at end of file diff --git a/benchmarks/benchmark_mkl.cpp b/benchmarks/benchmark_mkl.cpp index d7585f4..b027ee6 100644 --- a/benchmarks/benchmark_mkl.cpp +++ b/benchmarks/benchmark_mkl.cpp @@ -2,8 +2,20 @@ #include "benchmark_utils.hpp" -int main() { - benchmark_sinf(); - benchmark_cosf(); - benchmark_sincosf(); -} +BENCHMARK_TEMPLATE(benchmark_sinf, MKLBackend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); +BENCHMARK_TEMPLATE(benchmark_cosf, MKLBackend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); +BENCHMARK_TEMPLATE(benchmark_sincosf, MKLBackend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); + +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/benchmarks/benchmark_reference.cpp b/benchmarks/benchmark_reference.cpp index 9b2c6c9..a897a72 100644 --- a/benchmarks/benchmark_reference.cpp +++ b/benchmarks/benchmark_reference.cpp @@ -2,8 +2,20 @@ #include "benchmark_utils.hpp" -int main() { - benchmark_sinf(); - benchmark_cosf(); - benchmark_sincosf(); -} +BENCHMARK_TEMPLATE(benchmark_sinf, ReferenceBackend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); +BENCHMARK_TEMPLATE(benchmark_cosf, ReferenceBackend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); +BENCHMARK_TEMPLATE(benchmark_sincosf, ReferenceBackend) + ->Unit(benchmark::kMillisecond) + ->Arg(1e5) + ->Arg(1e6) + ->Arg(1e7); + +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/benchmarks/benchmark_utils.hpp b/benchmarks/benchmark_utils.hpp index e354c3f..617ad6c 100644 --- a/benchmarks/benchmark_utils.hpp +++ b/benchmarks/benchmark_utils.hpp @@ -1,76 +1,96 @@ #pragma once #include -#include -#include +#include +#include #include -const size_t N = 1e7; +#include -inline void report(const std::string &name, double sec, double throughput) { - std::ios state(nullptr); - state.copyfmt(std::cout); - std::cout << std::setw(7) << name << " -> "; - std::cout << "time: "; - std::cout << std::fixed << std::setprecision(3) << std::setfill('0'); - std::cout << sec << " s, "; - std::cout << "throughput: " << throughput << " M elems/sec\n"; - std::cout.copyfmt(state); -} +// Default values if not overridden by range multipliers +constexpr size_t DEFAULT_N = 10'000'000; -template inline void benchmark_sinf() { +template +static void benchmark_sinf(benchmark::State &state) { + const size_t N = static_cast(state.range(0)); std::vector x(N), s(N); - for (size_t i = 0; i < N; ++i) + for (size_t i = 0; i < N; ++i) { x[i] = (i % 360) * 0.0174533f; // degrees to radians + } Backend backend; - backend.init(N); auto start = std::chrono::high_resolution_clock::now(); - backend.compute_sinf(N, x.data(), s.data()); + backend.init(N); auto end = std::chrono::high_resolution_clock::now(); + state.counters["init_ms"] = + std::chrono::duration_cast(end - start) + .count() / + 1.e3; - double sec = std::chrono::duration(end - start).count(); - double throughput = N / sec / 1e6; + for (auto _ : state) { + backend.compute_sinf(N, x.data(), s.data()); + benchmark::DoNotOptimize(s); + } - report("sinf", sec, throughput); + state.SetItemsProcessed(static_cast(state.iterations()) * + static_cast(N)); } -template inline void benchmark_cosf() { +template +static void benchmark_cosf(benchmark::State &state) { + const size_t N = static_cast(state.range(0)); std::vector x(N), c(N); - for (size_t i = 0; i < N; ++i) - x[i] = (i % 360) * 0.0174533f; // degrees to radians + for (size_t i = 0; i < N; ++i) { + x[i] = (i % 360) * 0.0174533f; + } Backend backend; - backend.init(N); auto start = std::chrono::high_resolution_clock::now(); - backend.compute_cosf(N, x.data(), c.data()); + backend.init(N); auto end = std::chrono::high_resolution_clock::now(); + state.counters["init_ms"] = + std::chrono::duration_cast(end - start) + .count() / + 1.e3; - double sec = std::chrono::duration(end - start).count(); - double throughput = N / sec / 1e6; + for (auto _ : state) { + backend.compute_cosf(N, x.data(), c.data()); + benchmark::DoNotOptimize(c); + } - report("cosf", sec, throughput); + state.SetItemsProcessed(static_cast(state.iterations()) * + static_cast(N)); } -template inline void benchmark_sincosf() { +template +static void benchmark_sincosf(benchmark::State &state) { + const size_t N = static_cast(state.range(0)); std::vector x(N), s(N), c(N); - for (size_t i = 0; i < N; ++i) - x[i] = (i % 360) * 0.0174533f; // degrees to radians + for (size_t i = 0; i < N; ++i) { + x[i] = (i % 360) * 0.0174533f; + } Backend backend; - backend.init(N); auto start = std::chrono::high_resolution_clock::now(); - backend.compute_sincosf(N, x.data(), s.data(), c.data()); + backend.init(N); auto end = std::chrono::high_resolution_clock::now(); + state.counters["init_ms"] = + std::chrono::duration_cast(end - start) + .count() / + 1.e3; - double sec = std::chrono::duration(end - start).count(); - double throughput = N / sec / 1e6; + for (auto _ : state) { + backend.compute_sincosf(N, x.data(), s.data(), c.data()); + benchmark::DoNotOptimize(s); + benchmark::DoNotOptimize(c); + } - report("sincosf", sec, throughput); + state.SetItemsProcessed(static_cast(state.iterations()) * + static_cast(N)); }