From 3addf2b05efda94cf71c4b1f1f39e42b138e10af Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 29 Oct 2025 16:23:35 +0000 Subject: [PATCH] Fix memory management bug and computational inefficiencies - Fix mismatched new[]/free() to use proper delete[] in interface.hpp - Fix incorrect dx4 calculation (should be dx2*dx2, not dx2*dx) in lookup_xsimd.cpp - Fix redundant sinv calculation by using separate base variables in sin_cosf_dispatcher - Remove unused variable declarations in lookup_avx.cpp - Optimize reference backend to use sincosf() instead of separate sin/cos calls Co-authored-by: wvbbreu <185333235+wvbbreu@users.noreply.github.com> --- include/trigdx/interface.hpp | 2 +- src/lookup_avx.cpp | 3 +-- src/lookup_xsimd.cpp | 14 +++++++------- src/reference.cpp | 3 +-- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/include/trigdx/interface.hpp b/include/trigdx/interface.hpp index 95a2575..2a33412 100644 --- a/include/trigdx/interface.hpp +++ b/include/trigdx/interface.hpp @@ -16,7 +16,7 @@ public: return static_cast(new uint8_t[bytes]); }; - virtual void free_memory(void *ptr) const { std::free(ptr); }; + virtual void free_memory(void *ptr) const { delete[] static_cast(ptr); }; // Compute sine for n elements virtual void compute_sinf(size_t n, const float *x, float *s) const = 0; diff --git a/src/lookup_avx.cpp b/src/lookup_avx.cpp index b8debbb..fe4e884 100644 --- a/src/lookup_avx.cpp +++ b/src/lookup_avx.cpp @@ -89,7 +89,6 @@ template struct LookupAVXBackend::Impl { constexpr std::size_t VL = 8; // AVX processes 8 floats const __m256 scale = _mm256_set1_ps(SCALE); const __m256i mask = _mm256_set1_epi32(MASK); - const __m256i quarter_pi = _mm256_set1_epi32(NR_SAMPLES / 4); std::size_t i = 0; for (; i + VL <= n; i += VL) { @@ -104,7 +103,7 @@ template struct LookupAVXBackend::Impl { #else // fallback gather for AVX1 float sin_tmp[VL]; - int idx_a[VL], idxc_a[VL]; + int idx_a[VL]; _mm256_store_si256((__m256i *)idx_a, idx); for (std::size_t k = 0; k < VL; ++k) { sin_tmp[k] = lookup[idx_a[k]]; diff --git a/src/lookup_xsimd.cpp b/src/lookup_xsimd.cpp index f40b933..ab9ba65 100644 --- a/src/lookup_xsimd.cpp +++ b/src/lookup_xsimd.cpp @@ -56,7 +56,7 @@ template struct cosf_dispatcher { const b_type dx = xsimd::sub(vx, xsimd::mul(f_idx, pi_frac)); const b_type dx2 = xsimd::mul(dx, dx); const b_type dx3 = xsimd::mul(dx2, dx); - const b_type dx4 = xsimd::mul(dx2, dx); + const b_type dx4 = xsimd::mul(dx2, dx2); const b_type t2 = xsimd::mul(dx2, term2); const b_type t3 = xsimd::mul(dx3, term3); const b_type t4 = xsimd::mul(dx4, term4); @@ -115,7 +115,7 @@ template struct sinf_dispatcher { const b_type dx = xsimd::sub(vx, xsimd::mul(f_idx, pi_frac)); const b_type dx2 = xsimd::mul(dx, dx); const b_type dx3 = xsimd::mul(dx2, dx); - const b_type dx4 = xsimd::mul(dx2, dx); + const b_type dx4 = xsimd::mul(dx2, dx2); const b_type t2 = xsimd::mul(dx2, term2); const b_type t3 = xsimd::mul(dx3, term3); const b_type t4 = xsimd::mul(dx4, term4); @@ -176,20 +176,20 @@ template struct sin_cosf_dispatcher { const b_type dx = xsimd::sub(vx, xsimd::mul(f_idx, pi_frac)); const b_type dx2 = xsimd::mul(dx, dx); const b_type dx3 = xsimd::mul(dx2, dx); - const b_type dx4 = xsimd::mul(dx2, dx); + const b_type dx4 = xsimd::mul(dx2, dx2); const b_type t2 = xsimd::mul(dx2, term2); const b_type t3 = xsimd::mul(dx3, term3); const b_type t4 = xsimd::mul(dx4, term4); idx = xsimd::bitwise_and(idx, mask); - b_type sinv = b_type::gather(lookup_table_.sin_values.data(), idx); - b_type cosv = b_type::gather(lookup_table_.cos_values.data(), idx); + const b_type sinv_base = b_type::gather(lookup_table_.sin_values.data(), idx); + const b_type cosv_base = b_type::gather(lookup_table_.cos_values.data(), idx); const b_type cosdx = xsimd::add(xsimd::sub(term1, t2), t4); const b_type sindx = xsimd::sub(dx, t3); - sinv = xsimd::add(xsimd::mul(cosv, sindx), xsimd::mul(sinv, cosdx)); - cosv = xsimd::sub(xsimd::mul(cosv, cosdx), xsimd::mul(sinv, sindx)); + b_type sinv = xsimd::add(xsimd::mul(cosv_base, sindx), xsimd::mul(sinv_base, cosdx)); + b_type cosv = xsimd::sub(xsimd::mul(cosv_base, cosdx), xsimd::mul(sinv_base, sindx)); sinv.store(s + i, Tag()); cosv.store(c + i, Tag()); diff --git a/src/reference.cpp b/src/reference.cpp index d8be5a1..bbaa7e4 100644 --- a/src/reference.cpp +++ b/src/reference.cpp @@ -17,7 +17,6 @@ void ReferenceBackend::compute_cosf(size_t n, const float *x, float *c) const { void ReferenceBackend::compute_sincosf(size_t n, const float *x, float *s, float *c) const { for (size_t i = 0; i < n; ++i) { - s[i] = sinf(x[i]); - c[i] = cosf(x[i]); + sincosf(x[i], &s[i], &c[i]); } }