From cd048e5581ad58e8168ee293da3f7df94bd1016b Mon Sep 17 00:00:00 2001 From: Bram Veenboer Date: Tue, 12 Aug 2025 16:44:17 +0200 Subject: [PATCH 1/6] Remove unused DEFAULT_N --- benchmarks/benchmark_utils.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/benchmarks/benchmark_utils.hpp b/benchmarks/benchmark_utils.hpp index 617ad6c..137ae24 100644 --- a/benchmarks/benchmark_utils.hpp +++ b/benchmarks/benchmark_utils.hpp @@ -7,9 +7,6 @@ #include -// Default values if not overridden by range multipliers -constexpr size_t DEFAULT_N = 10'000'000; - template static void benchmark_sinf(benchmark::State &state) { const size_t N = static_cast(state.range(0)); From f6575599fd44534ca6d61f864a422987cf611235 Mon Sep 17 00:00:00 2001 From: Bram Veenboer Date: Tue, 12 Aug 2025 16:46:22 +0200 Subject: [PATCH 2/6] Cleanup data initialization of benchmarks --- benchmarks/benchmark_utils.hpp | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/benchmarks/benchmark_utils.hpp b/benchmarks/benchmark_utils.hpp index 137ae24..228b88e 100644 --- a/benchmarks/benchmark_utils.hpp +++ b/benchmarks/benchmark_utils.hpp @@ -7,14 +7,17 @@ #include +void init_x(std::vector &x) { + for (size_t i = 0; i < x.size(); ++i) { + x[i] = (i % 360) * 0.0174533f; // degrees to radians + } +} + template static void benchmark_sinf(benchmark::State &state) { const size_t N = static_cast(state.range(0)); std::vector x(N), s(N); - - for (size_t i = 0; i < N; ++i) { - x[i] = (i % 360) * 0.0174533f; // degrees to radians - } + init_x(x); Backend backend; @@ -39,10 +42,7 @@ template static void benchmark_cosf(benchmark::State &state) { const size_t N = static_cast(state.range(0)); std::vector x(N), c(N); - - for (size_t i = 0; i < N; ++i) { - x[i] = (i % 360) * 0.0174533f; - } + init_x(x); Backend backend; @@ -67,10 +67,7 @@ template static void benchmark_sincosf(benchmark::State &state) { const size_t N = static_cast(state.range(0)); std::vector x(N), s(N), c(N); - - for (size_t i = 0; i < N; ++i) { - x[i] = (i % 360) * 0.0174533f; - } + init_x(x); Backend backend; From 5338f3e1350ec61eb579d62ad6a1ef2565c3bbed Mon Sep 17 00:00:00 2001 From: Bram Veenboer Date: Tue, 12 Aug 2025 16:47:40 +0200 Subject: [PATCH 3/6] Cleanup data initialization of tests --- tests/test_utils.hpp | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/tests/test_utils.hpp b/tests/test_utils.hpp index e366bef..4447d65 100644 --- a/tests/test_utils.hpp +++ b/tests/test_utils.hpp @@ -9,12 +9,15 @@ const size_t N = 1e7; +void init_x(std::vector &x) { + for (size_t i = 0; i < x.size(); ++i) { + x[i] = (i % 360) * 0.0174533f; // degrees to radians + } +} + template inline void test_sinf(float tol) { std::vector x(N), s_ref(N), s(N); - - for (size_t i = 0; i < N; ++i) { - x[i] = float(i) * 0.01f; - } + init_x(x); ReferenceBackend ref; Backend backend; @@ -30,10 +33,7 @@ template inline void test_sinf(float tol) { template inline void test_cosf(float tol) { std::vector x(N), c_ref(N), c(N); - - for (size_t i = 0; i < N; ++i) { - x[i] = float(i) * 0.01f; - } + init_x(x); ReferenceBackend ref; Backend backend; @@ -49,10 +49,7 @@ template inline void test_cosf(float tol) { template inline void test_sincosf(float tol) { std::vector x(N), s_ref(N), c_ref(N), s(N), c(N); - - for (size_t i = 0; i < N; ++i) { - x[i] = float(i) * 0.01f; - } + init_x(x); ReferenceBackend ref; Backend backend; From ebb6d50c0bccdd47af8e49d130917781b9f61ab7 Mon Sep 17 00:00:00 2001 From: Bram Veenboer Date: Tue, 12 Aug 2025 16:48:11 +0200 Subject: [PATCH 4/6] Bugfix gpu input data copy --- src/gpu.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gpu.cpp b/src/gpu.cpp index 38ef92b..b1a4175 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -45,21 +45,23 @@ struct GPUBackend::Impl { const size_t bytes = n * sizeof(float); std::memcpy(h_x, x, bytes); cudaMemcpy(d_x, h_x, bytes, cudaMemcpyHostToDevice); - launch_sincosf_kernel(d_x, d_s, d_c, n); + launch_sinf_kernel(d_x, d_s, n); cudaMemcpy(h_s, d_s, bytes, cudaMemcpyDeviceToHost); std::memcpy(s, h_s, bytes); } void compute_cosf(size_t n, const float *x, float *c) const { const size_t bytes = n * sizeof(float); + std::memcpy(h_x, x, bytes); cudaMemcpy(d_x, h_x, bytes, cudaMemcpyHostToDevice); - launch_sincosf_kernel(d_x, d_s, d_c, n); + launch_cosf_kernel(d_x, d_c, n); cudaMemcpy(h_c, d_c, bytes, cudaMemcpyDeviceToHost); std::memcpy(c, h_c, bytes); } void compute_sincosf(size_t n, const float *x, float *s, float *c) const { const size_t bytes = n * sizeof(float); + std::memcpy(h_x, x, bytes); cudaMemcpy(d_x, h_x, bytes, cudaMemcpyHostToDevice); launch_sincosf_kernel(d_x, d_s, d_c, n); cudaMemcpy(h_s, d_s, bytes, cudaMemcpyDeviceToHost); From c7ab463b43a2dc82a6c36c2891ce90143fc24d0b Mon Sep 17 00:00:00 2001 From: Bram Veenboer Date: Tue, 12 Aug 2025 17:02:51 +0200 Subject: [PATCH 5/6] Bugfix gpu output data copy --- src/gpu.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gpu.cpp b/src/gpu.cpp index b1a4175..b6be96f 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -66,6 +66,8 @@ struct GPUBackend::Impl { launch_sincosf_kernel(d_x, d_s, d_c, n); cudaMemcpy(h_s, d_s, bytes, cudaMemcpyDeviceToHost); cudaMemcpy(h_c, d_c, bytes, cudaMemcpyDeviceToHost); + std::memcpy(s, h_s, bytes); + std::memcpy(c, h_c, bytes); } float *h_x = nullptr; From 33f98abc48b3106cdbe00a734d39f91ffe1ebe11 Mon Sep 17 00:00:00 2001 From: Bram Veenboer Date: Tue, 12 Aug 2025 17:10:55 +0200 Subject: [PATCH 6/6] Set tolerance for CUDA tests to 1e-6 --- tests/test_gpu.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_gpu.cpp b/tests/test_gpu.cpp index 6e06419..2fbd28f 100644 --- a/tests/test_gpu.cpp +++ b/tests/test_gpu.cpp @@ -3,8 +3,8 @@ #include "test_utils.hpp" -TEST_CASE("sinf") { test_sinf(1e-1f); } +TEST_CASE("sinf") { test_sinf(1e-6f); } -TEST_CASE("cosf") { test_cosf(1e-1f); } +TEST_CASE("cosf") { test_cosf(1e-6f); } -TEST_CASE("sincosf") { test_sincosf(1e-1f); } +TEST_CASE("sincosf") { test_sincosf(1e-6f); }