Fix dx4 calculation in scalar remainder code and add null checks

- Fix dx4 computation in scalar remainder loops (should be dx2*dx2) - Add missing null pointer check in benchmark_sinf for consistency Co-authored-by: wvbbreu <185333235+wvbbreu@users.noreply.github.com>
2025-10-29 16:26:27 +00:00
parent 3addf2b05e
commit 807b9d5284
2 changed files with 6 additions and 3 deletions
--- a/benchmarks/benchmark_utils.hpp
+++ b/benchmarks/benchmark_utils.hpp
@@ -26,6 +26,9 @@ static void benchmark_sinf(benchmark::State &state) {
      reinterpret_cast<float *>(backend.allocate_memory(N * sizeof(float)));
  float *s =
      reinterpret_cast<float *>(backend.allocate_memory(N * sizeof(float)));
+  if (!x || !s) {
+    throw std::runtime_error("Buffer allocation failed");
+  }
  auto end = std::chrono::high_resolution_clock::now();
  state.counters["init_ms"] =
      std::chrono::duration_cast<std::chrono::microseconds>(end - start)
--- a/src/lookup_xsimd.cpp
+++ b/src/lookup_xsimd.cpp
@@ -78,7 +78,7 @@ template <std::size_t NR_SAMPLES> struct cosf_dispatcher {
      const float dx = a[i] - idx * lookup_table_.PI_FRAC;
      const float dx2 = dx * dx;
      const float dx3 = dx2 * dx;
-      const float dx4 = dx3 * dx;
+      const float dx4 = dx2 * dx2;
      const float cosdx =
          1.0f - lookup_table_.TERM2 * dx2 + lookup_table_.TERM4 * dx4;
      const float sindx = dx - lookup_table_.TERM3 * dx3;
@@ -138,7 +138,7 @@ template <std::size_t NR_SAMPLES> struct sinf_dispatcher {
      const float dx = a[i] - idx * lookup_table_.PI_FRAC;
      const float dx2 = dx * dx;
      const float dx3 = dx2 * dx;
-      const float dx4 = dx3 * dx;
+      const float dx4 = dx2 * dx2;
      const float cosdx =
          1.0f - lookup_table_.TERM2 * dx2 + lookup_table_.TERM4 * dx4;
      const float sindx = dx - lookup_table_.TERM3 * dx3;
@@ -202,7 +202,7 @@ template <std::size_t NR_SAMPLES> struct sin_cosf_dispatcher {
      const float dx = a[i] - idx * lookup_table_.PI_FRAC;
      const float dx2 = dx * dx;
      const float dx3 = dx2 * dx;
-      const float dx4 = dx3 * dx;
+      const float dx4 = dx2 * dx2;
      const float cosdx =
          1.0f - lookup_table_.TERM2 * dx2 + lookup_table_.TERM4 * dx4;
      const float sindx = dx - lookup_table_.TERM3 * dx3;