diff --git a/benchmarks/benchmark_utils.hpp b/benchmarks/benchmark_utils.hpp index 617ad6c..228b88e 100644 --- a/benchmarks/benchmark_utils.hpp +++ b/benchmarks/benchmark_utils.hpp @@ -7,17 +7,17 @@ #include -// Default values if not overridden by range multipliers -constexpr size_t DEFAULT_N = 10'000'000; +void init_x(std::vector &x) { + for (size_t i = 0; i < x.size(); ++i) { + x[i] = (i % 360) * 0.0174533f; // degrees to radians + } +} template static void benchmark_sinf(benchmark::State &state) { const size_t N = static_cast(state.range(0)); std::vector x(N), s(N); - - for (size_t i = 0; i < N; ++i) { - x[i] = (i % 360) * 0.0174533f; // degrees to radians - } + init_x(x); Backend backend; @@ -42,10 +42,7 @@ template static void benchmark_cosf(benchmark::State &state) { const size_t N = static_cast(state.range(0)); std::vector x(N), c(N); - - for (size_t i = 0; i < N; ++i) { - x[i] = (i % 360) * 0.0174533f; - } + init_x(x); Backend backend; @@ -70,10 +67,7 @@ template static void benchmark_sincosf(benchmark::State &state) { const size_t N = static_cast(state.range(0)); std::vector x(N), s(N), c(N); - - for (size_t i = 0; i < N; ++i) { - x[i] = (i % 360) * 0.0174533f; - } + init_x(x); Backend backend; diff --git a/src/gpu.cpp b/src/gpu.cpp index 38ef92b..b6be96f 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -45,25 +45,29 @@ struct GPUBackend::Impl { const size_t bytes = n * sizeof(float); std::memcpy(h_x, x, bytes); cudaMemcpy(d_x, h_x, bytes, cudaMemcpyHostToDevice); - launch_sincosf_kernel(d_x, d_s, d_c, n); + launch_sinf_kernel(d_x, d_s, n); cudaMemcpy(h_s, d_s, bytes, cudaMemcpyDeviceToHost); std::memcpy(s, h_s, bytes); } void compute_cosf(size_t n, const float *x, float *c) const { const size_t bytes = n * sizeof(float); + std::memcpy(h_x, x, bytes); cudaMemcpy(d_x, h_x, bytes, cudaMemcpyHostToDevice); - launch_sincosf_kernel(d_x, d_s, d_c, n); + launch_cosf_kernel(d_x, d_c, n); cudaMemcpy(h_c, d_c, bytes, cudaMemcpyDeviceToHost); std::memcpy(c, h_c, bytes); } void compute_sincosf(size_t n, const float *x, float *s, float *c) const { const size_t bytes = n * sizeof(float); + std::memcpy(h_x, x, bytes); cudaMemcpy(d_x, h_x, bytes, cudaMemcpyHostToDevice); launch_sincosf_kernel(d_x, d_s, d_c, n); cudaMemcpy(h_s, d_s, bytes, cudaMemcpyDeviceToHost); cudaMemcpy(h_c, d_c, bytes, cudaMemcpyDeviceToHost); + std::memcpy(s, h_s, bytes); + std::memcpy(c, h_c, bytes); } float *h_x = nullptr; diff --git a/tests/test_gpu.cpp b/tests/test_gpu.cpp index 6e06419..2fbd28f 100644 --- a/tests/test_gpu.cpp +++ b/tests/test_gpu.cpp @@ -3,8 +3,8 @@ #include "test_utils.hpp" -TEST_CASE("sinf") { test_sinf(1e-1f); } +TEST_CASE("sinf") { test_sinf(1e-6f); } -TEST_CASE("cosf") { test_cosf(1e-1f); } +TEST_CASE("cosf") { test_cosf(1e-6f); } -TEST_CASE("sincosf") { test_sincosf(1e-1f); } +TEST_CASE("sincosf") { test_sincosf(1e-6f); } diff --git a/tests/test_utils.hpp b/tests/test_utils.hpp index e366bef..4447d65 100644 --- a/tests/test_utils.hpp +++ b/tests/test_utils.hpp @@ -9,12 +9,15 @@ const size_t N = 1e7; +void init_x(std::vector &x) { + for (size_t i = 0; i < x.size(); ++i) { + x[i] = (i % 360) * 0.0174533f; // degrees to radians + } +} + template inline void test_sinf(float tol) { std::vector x(N), s_ref(N), s(N); - - for (size_t i = 0; i < N; ++i) { - x[i] = float(i) * 0.01f; - } + init_x(x); ReferenceBackend ref; Backend backend; @@ -30,10 +33,7 @@ template inline void test_sinf(float tol) { template inline void test_cosf(float tol) { std::vector x(N), c_ref(N), c(N); - - for (size_t i = 0; i < N; ++i) { - x[i] = float(i) * 0.01f; - } + init_x(x); ReferenceBackend ref; Backend backend; @@ -49,10 +49,7 @@ template inline void test_cosf(float tol) { template inline void test_sincosf(float tol) { std::vector x(N), s_ref(N), c_ref(N), s(N), c(N); - - for (size_t i = 0; i < N; ++i) { - x[i] = float(i) * 0.01f; - } + init_x(x); ReferenceBackend ref; Backend backend;