Merge pull request #7 from astron-rd/use-google-benchmark
Refactor benchmarks using Google Benchmark
This commit is contained in:
@@ -1,23 +1,33 @@
|
||||
FetchContent_Declare(
|
||||
benchmark
|
||||
GIT_REPOSITORY https://github.com/google/benchmark.git
|
||||
GIT_TAG v1.9.4)
|
||||
set(BENCHMARK_ENABLE_TESTING
|
||||
OFF
|
||||
CACHE BOOL "" FORCE)
|
||||
FetchContent_MakeAvailable(benchmark)
|
||||
|
||||
add_executable(benchmark_reference benchmark_reference.cpp)
|
||||
target_link_libraries(benchmark_reference PRIVATE trigdx)
|
||||
target_link_libraries(benchmark_reference PRIVATE trigdx benchmark::benchmark)
|
||||
|
||||
add_executable(benchmark_lookup benchmark_lookup.cpp)
|
||||
target_link_libraries(benchmark_lookup PRIVATE trigdx)
|
||||
target_link_libraries(benchmark_lookup PRIVATE trigdx benchmark::benchmark)
|
||||
|
||||
add_executable(benchmark_lookup_avx benchmark_lookup_avx.cpp)
|
||||
target_link_libraries(benchmark_lookup_avx PRIVATE trigdx)
|
||||
target_link_libraries(benchmark_lookup_avx PRIVATE trigdx benchmark::benchmark)
|
||||
|
||||
if(TRIGDX_USE_MKL)
|
||||
add_executable(benchmark_mkl benchmark_mkl.cpp)
|
||||
target_link_libraries(benchmark_mkl PRIVATE trigdx)
|
||||
target_link_libraries(benchmark_mkl PRIVATE trigdx benchmark::benchmark)
|
||||
endif()
|
||||
|
||||
if(TRIGDX_USE_GPU)
|
||||
add_executable(benchmark_gpu benchmark_gpu.cpp)
|
||||
target_link_libraries(benchmark_gpu PRIVATE trigdx gpu)
|
||||
target_link_libraries(benchmark_gpu PRIVATE trigdx gpu benchmark::benchmark)
|
||||
endif()
|
||||
|
||||
if(TRIGDX_USE_XSIMD)
|
||||
add_executable(benchmark_lookup_xsimd benchmark_lookup_xsimd.cpp)
|
||||
target_link_libraries(benchmark_lookup_xsimd PRIVATE trigdx)
|
||||
target_link_libraries(benchmark_lookup_xsimd PRIVATE trigdx
|
||||
benchmark::benchmark)
|
||||
endif()
|
||||
|
||||
@@ -2,8 +2,20 @@
|
||||
|
||||
#include "benchmark_utils.hpp"
|
||||
|
||||
int main() {
|
||||
benchmark_sinf<GPUBackend>();
|
||||
benchmark_cosf<GPUBackend>();
|
||||
benchmark_sincosf<GPUBackend>();
|
||||
}
|
||||
BENCHMARK_TEMPLATE(benchmark_sinf, GPUBackend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
BENCHMARK_TEMPLATE(benchmark_cosf, GPUBackend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
BENCHMARK_TEMPLATE(benchmark_sincosf, GPUBackend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
@@ -2,12 +2,29 @@
|
||||
|
||||
#include "benchmark_utils.hpp"
|
||||
|
||||
int main() {
|
||||
benchmark_sinf<LookupBackend<16384>>();
|
||||
benchmark_cosf<LookupBackend<16384>>();
|
||||
benchmark_sincosf<LookupBackend<16384>>();
|
||||
|
||||
benchmark_sinf<LookupBackend<32768>>();
|
||||
benchmark_cosf<LookupBackend<32768>>();
|
||||
benchmark_sincosf<LookupBackend<32768>>();
|
||||
template <typename Backend> void register_benchmarks() {
|
||||
BENCHMARK_TEMPLATE(benchmark_sinf, Backend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
BENCHMARK_TEMPLATE(benchmark_cosf, Backend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
BENCHMARK_TEMPLATE(benchmark_sincosf, Backend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
::benchmark::Initialize(&argc, argv);
|
||||
|
||||
register_benchmarks<LookupBackend<16384>>();
|
||||
register_benchmarks<LookupBackend<32768>>();
|
||||
|
||||
return ::benchmark::RunSpecifiedBenchmarks();
|
||||
}
|
||||
@@ -2,12 +2,29 @@
|
||||
|
||||
#include "benchmark_utils.hpp"
|
||||
|
||||
int main() {
|
||||
benchmark_sinf<LookupAVXBackend<16384>>();
|
||||
benchmark_cosf<LookupAVXBackend<16384>>();
|
||||
benchmark_sincosf<LookupAVXBackend<16384>>();
|
||||
|
||||
benchmark_sinf<LookupAVXBackend<32768>>();
|
||||
benchmark_cosf<LookupAVXBackend<32768>>();
|
||||
benchmark_sincosf<LookupAVXBackend<32768>>();
|
||||
template <typename Backend> void register_benchmarks() {
|
||||
BENCHMARK_TEMPLATE(benchmark_sinf, Backend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
BENCHMARK_TEMPLATE(benchmark_cosf, Backend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
BENCHMARK_TEMPLATE(benchmark_sincosf, Backend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
::benchmark::Initialize(&argc, argv);
|
||||
|
||||
register_benchmarks<LookupAVXBackend<16384>>();
|
||||
register_benchmarks<LookupAVXBackend<32768>>();
|
||||
|
||||
return ::benchmark::RunSpecifiedBenchmarks();
|
||||
}
|
||||
@@ -2,12 +2,29 @@
|
||||
|
||||
#include "benchmark_utils.hpp"
|
||||
|
||||
int main() {
|
||||
benchmark_sinf<LookupXSIMDBackend<16384>>();
|
||||
benchmark_cosf<LookupXSIMDBackend<16384>>();
|
||||
benchmark_sincosf<LookupXSIMDBackend<16384>>();
|
||||
|
||||
benchmark_sinf<LookupXSIMDBackend<32768>>();
|
||||
benchmark_cosf<LookupXSIMDBackend<32768>>();
|
||||
benchmark_sincosf<LookupXSIMDBackend<32768>>();
|
||||
template <typename Backend> void register_benchmarks() {
|
||||
BENCHMARK_TEMPLATE(benchmark_sinf, Backend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
BENCHMARK_TEMPLATE(benchmark_cosf, Backend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
BENCHMARK_TEMPLATE(benchmark_sincosf, Backend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
::benchmark::Initialize(&argc, argv);
|
||||
|
||||
register_benchmarks<LookupXSIMDBackend<16384>>();
|
||||
register_benchmarks<LookupXSIMDBackend<32768>>();
|
||||
|
||||
return ::benchmark::RunSpecifiedBenchmarks();
|
||||
}
|
||||
@@ -2,8 +2,20 @@
|
||||
|
||||
#include "benchmark_utils.hpp"
|
||||
|
||||
int main() {
|
||||
benchmark_sinf<MKLBackend>();
|
||||
benchmark_cosf<MKLBackend>();
|
||||
benchmark_sincosf<MKLBackend>();
|
||||
}
|
||||
BENCHMARK_TEMPLATE(benchmark_sinf, MKLBackend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
BENCHMARK_TEMPLATE(benchmark_cosf, MKLBackend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
BENCHMARK_TEMPLATE(benchmark_sincosf, MKLBackend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
@@ -2,8 +2,20 @@
|
||||
|
||||
#include "benchmark_utils.hpp"
|
||||
|
||||
int main() {
|
||||
benchmark_sinf<ReferenceBackend>();
|
||||
benchmark_cosf<ReferenceBackend>();
|
||||
benchmark_sincosf<ReferenceBackend>();
|
||||
}
|
||||
BENCHMARK_TEMPLATE(benchmark_sinf, ReferenceBackend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
BENCHMARK_TEMPLATE(benchmark_cosf, ReferenceBackend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
BENCHMARK_TEMPLATE(benchmark_sincosf, ReferenceBackend)
|
||||
->Unit(benchmark::kMillisecond)
|
||||
->Arg(1e5)
|
||||
->Arg(1e6)
|
||||
->Arg(1e7);
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
@@ -1,76 +1,96 @@
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
const size_t N = 1e7;
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
inline void report(const std::string &name, double sec, double throughput) {
|
||||
std::ios state(nullptr);
|
||||
state.copyfmt(std::cout);
|
||||
std::cout << std::setw(7) << name << " -> ";
|
||||
std::cout << "time: ";
|
||||
std::cout << std::fixed << std::setprecision(3) << std::setfill('0');
|
||||
std::cout << sec << " s, ";
|
||||
std::cout << "throughput: " << throughput << " M elems/sec\n";
|
||||
std::cout.copyfmt(state);
|
||||
}
|
||||
// Default values if not overridden by range multipliers
|
||||
constexpr size_t DEFAULT_N = 10'000'000;
|
||||
|
||||
template <typename Backend> inline void benchmark_sinf() {
|
||||
template <typename Backend>
|
||||
static void benchmark_sinf(benchmark::State &state) {
|
||||
const size_t N = static_cast<size_t>(state.range(0));
|
||||
std::vector<float> x(N), s(N);
|
||||
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
x[i] = (i % 360) * 0.0174533f; // degrees to radians
|
||||
|
||||
Backend backend;
|
||||
backend.init(N);
|
||||
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
backend.compute_sinf(N, x.data(), s.data());
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
double sec = std::chrono::duration<double>(end - start).count();
|
||||
double throughput = N / sec / 1e6;
|
||||
|
||||
report("sinf", sec, throughput);
|
||||
}
|
||||
|
||||
template <typename Backend> inline void benchmark_cosf() {
|
||||
Backend backend;
|
||||
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
backend.init(N);
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
state.counters["init_ms"] =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
|
||||
.count() /
|
||||
1.e3;
|
||||
|
||||
for (auto _ : state) {
|
||||
backend.compute_sinf(N, x.data(), s.data());
|
||||
benchmark::DoNotOptimize(s);
|
||||
}
|
||||
|
||||
state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) *
|
||||
static_cast<int64_t>(N));
|
||||
}
|
||||
|
||||
template <typename Backend>
|
||||
static void benchmark_cosf(benchmark::State &state) {
|
||||
const size_t N = static_cast<size_t>(state.range(0));
|
||||
std::vector<float> x(N), c(N);
|
||||
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
x[i] = (i % 360) * 0.0174533f; // degrees to radians
|
||||
|
||||
Backend backend;
|
||||
backend.init(N);
|
||||
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
backend.compute_cosf(N, x.data(), c.data());
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
double sec = std::chrono::duration<double>(end - start).count();
|
||||
double throughput = N / sec / 1e6;
|
||||
|
||||
report("cosf", sec, throughput);
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
x[i] = (i % 360) * 0.0174533f;
|
||||
}
|
||||
|
||||
template <typename Backend> inline void benchmark_sincosf() {
|
||||
Backend backend;
|
||||
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
backend.init(N);
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
state.counters["init_ms"] =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
|
||||
.count() /
|
||||
1.e3;
|
||||
|
||||
for (auto _ : state) {
|
||||
backend.compute_cosf(N, x.data(), c.data());
|
||||
benchmark::DoNotOptimize(c);
|
||||
}
|
||||
|
||||
state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) *
|
||||
static_cast<int64_t>(N));
|
||||
}
|
||||
|
||||
template <typename Backend>
|
||||
static void benchmark_sincosf(benchmark::State &state) {
|
||||
const size_t N = static_cast<size_t>(state.range(0));
|
||||
std::vector<float> x(N), s(N), c(N);
|
||||
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
x[i] = (i % 360) * 0.0174533f; // degrees to radians
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
x[i] = (i % 360) * 0.0174533f;
|
||||
}
|
||||
|
||||
Backend backend;
|
||||
backend.init(N);
|
||||
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
backend.compute_sincosf(N, x.data(), s.data(), c.data());
|
||||
backend.init(N);
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
state.counters["init_ms"] =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
|
||||
.count() /
|
||||
1.e3;
|
||||
|
||||
double sec = std::chrono::duration<double>(end - start).count();
|
||||
double throughput = N / sec / 1e6;
|
||||
|
||||
report("sincosf", sec, throughput);
|
||||
for (auto _ : state) {
|
||||
backend.compute_sincosf(N, x.data(), s.data(), c.data());
|
||||
benchmark::DoNotOptimize(s);
|
||||
benchmark::DoNotOptimize(c);
|
||||
}
|
||||
|
||||
state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) *
|
||||
static_cast<int64_t>(N));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user