Add allocate_memory and free_memory
This commit is contained in:
@@ -7,8 +7,8 @@
|
|||||||
|
|
||||||
#include <benchmark/benchmark.h>
|
#include <benchmark/benchmark.h>
|
||||||
|
|
||||||
void init_x(std::vector<float> &x) {
|
void init_x(float *x, size_t n) {
|
||||||
for (size_t i = 0; i < x.size(); ++i) {
|
for (size_t i = 0; i < n; ++i) {
|
||||||
x[i] = (i % 360) * 0.0174533f; // degrees to radians
|
x[i] = (i % 360) * 0.0174533f; // degrees to radians
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -16,24 +16,31 @@ void init_x(std::vector<float> &x) {
|
|||||||
template <typename Backend>
|
template <typename Backend>
|
||||||
static void benchmark_sinf(benchmark::State &state) {
|
static void benchmark_sinf(benchmark::State &state) {
|
||||||
const size_t N = static_cast<size_t>(state.range(0));
|
const size_t N = static_cast<size_t>(state.range(0));
|
||||||
std::vector<float> x(N), s(N);
|
|
||||||
init_x(x);
|
|
||||||
|
|
||||||
Backend backend;
|
Backend backend;
|
||||||
|
|
||||||
auto start = std::chrono::high_resolution_clock::now();
|
auto start = std::chrono::high_resolution_clock::now();
|
||||||
backend.init(N);
|
backend.init(N);
|
||||||
|
float *x =
|
||||||
|
reinterpret_cast<float *>(backend.allocate_memory(N * sizeof(float)));
|
||||||
|
float *s =
|
||||||
|
reinterpret_cast<float *>(backend.allocate_memory(N * sizeof(float)));
|
||||||
auto end = std::chrono::high_resolution_clock::now();
|
auto end = std::chrono::high_resolution_clock::now();
|
||||||
state.counters["init_ms"] =
|
state.counters["init_ms"] =
|
||||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
|
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
|
||||||
.count() /
|
.count() /
|
||||||
1.e3;
|
1.e3;
|
||||||
|
|
||||||
|
init_x(x, N);
|
||||||
|
|
||||||
for (auto _ : state) {
|
for (auto _ : state) {
|
||||||
backend.compute_sinf(N, x.data(), s.data());
|
backend.compute_sinf(N, x, s);
|
||||||
benchmark::DoNotOptimize(s);
|
benchmark::DoNotOptimize(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
backend.free_memory(x);
|
||||||
|
backend.free_memory(s);
|
||||||
|
|
||||||
state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) *
|
state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) *
|
||||||
static_cast<int64_t>(N));
|
static_cast<int64_t>(N));
|
||||||
}
|
}
|
||||||
@@ -41,24 +48,31 @@ static void benchmark_sinf(benchmark::State &state) {
|
|||||||
template <typename Backend>
|
template <typename Backend>
|
||||||
static void benchmark_cosf(benchmark::State &state) {
|
static void benchmark_cosf(benchmark::State &state) {
|
||||||
const size_t N = static_cast<size_t>(state.range(0));
|
const size_t N = static_cast<size_t>(state.range(0));
|
||||||
std::vector<float> x(N), c(N);
|
|
||||||
init_x(x);
|
|
||||||
|
|
||||||
Backend backend;
|
Backend backend;
|
||||||
|
|
||||||
auto start = std::chrono::high_resolution_clock::now();
|
auto start = std::chrono::high_resolution_clock::now();
|
||||||
backend.init(N);
|
backend.init(N);
|
||||||
|
float *x =
|
||||||
|
reinterpret_cast<float *>(backend.allocate_memory(N * sizeof(float)));
|
||||||
|
float *c =
|
||||||
|
reinterpret_cast<float *>(backend.allocate_memory(N * sizeof(float)));
|
||||||
auto end = std::chrono::high_resolution_clock::now();
|
auto end = std::chrono::high_resolution_clock::now();
|
||||||
state.counters["init_ms"] =
|
state.counters["init_ms"] =
|
||||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
|
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
|
||||||
.count() /
|
.count() /
|
||||||
1.e3;
|
1.e3;
|
||||||
|
|
||||||
|
init_x(x, N);
|
||||||
|
|
||||||
for (auto _ : state) {
|
for (auto _ : state) {
|
||||||
backend.compute_cosf(N, x.data(), c.data());
|
backend.compute_cosf(N, x, c);
|
||||||
benchmark::DoNotOptimize(c);
|
benchmark::DoNotOptimize(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
backend.free_memory(x);
|
||||||
|
backend.free_memory(c);
|
||||||
|
|
||||||
state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) *
|
state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) *
|
||||||
static_cast<int64_t>(N));
|
static_cast<int64_t>(N));
|
||||||
}
|
}
|
||||||
@@ -66,25 +80,35 @@ static void benchmark_cosf(benchmark::State &state) {
|
|||||||
template <typename Backend>
|
template <typename Backend>
|
||||||
static void benchmark_sincosf(benchmark::State &state) {
|
static void benchmark_sincosf(benchmark::State &state) {
|
||||||
const size_t N = static_cast<size_t>(state.range(0));
|
const size_t N = static_cast<size_t>(state.range(0));
|
||||||
std::vector<float> x(N), s(N), c(N);
|
|
||||||
init_x(x);
|
|
||||||
|
|
||||||
Backend backend;
|
Backend backend;
|
||||||
|
|
||||||
auto start = std::chrono::high_resolution_clock::now();
|
auto start = std::chrono::high_resolution_clock::now();
|
||||||
backend.init(N);
|
backend.init(N);
|
||||||
|
float *x =
|
||||||
|
reinterpret_cast<float *>(backend.allocate_memory(N * sizeof(float)));
|
||||||
|
float *s =
|
||||||
|
reinterpret_cast<float *>(backend.allocate_memory(N * sizeof(float)));
|
||||||
|
float *c =
|
||||||
|
reinterpret_cast<float *>(backend.allocate_memory(N * sizeof(float)));
|
||||||
auto end = std::chrono::high_resolution_clock::now();
|
auto end = std::chrono::high_resolution_clock::now();
|
||||||
state.counters["init_ms"] =
|
state.counters["init_ms"] =
|
||||||
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
|
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
|
||||||
.count() /
|
.count() /
|
||||||
1.e3;
|
1.e3;
|
||||||
|
|
||||||
|
init_x(x, N);
|
||||||
|
|
||||||
for (auto _ : state) {
|
for (auto _ : state) {
|
||||||
backend.compute_sincosf(N, x.data(), s.data(), c.data());
|
backend.compute_sincosf(N, x, s, c);
|
||||||
benchmark::DoNotOptimize(s);
|
benchmark::DoNotOptimize(s);
|
||||||
benchmark::DoNotOptimize(c);
|
benchmark::DoNotOptimize(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
backend.free_memory(x);
|
||||||
|
backend.free_memory(s);
|
||||||
|
backend.free_memory(c);
|
||||||
|
|
||||||
state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) *
|
state.SetItemsProcessed(static_cast<int64_t>(state.iterations()) *
|
||||||
static_cast<int64_t>(N));
|
static_cast<int64_t>(N));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ public:
|
|||||||
~GPUBackend() override;
|
~GPUBackend() override;
|
||||||
|
|
||||||
void init(size_t n = 0) override;
|
void init(size_t n = 0) override;
|
||||||
|
void *allocate_memory(size_t bytes) const override;
|
||||||
|
void free_memory(void *ptr) const override;
|
||||||
void compute_sinf(size_t n, const float *x, float *s) const override;
|
void compute_sinf(size_t n, const float *x, float *s) const override;
|
||||||
void compute_cosf(size_t n, const float *x, float *c) const override;
|
void compute_cosf(size_t n, const float *x, float *c) const override;
|
||||||
void compute_sincosf(size_t n, const float *x, float *s,
|
void compute_sincosf(size_t n, const float *x, float *s,
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
// Base interface for all math backends
|
// Base interface for all math backends
|
||||||
class Backend {
|
class Backend {
|
||||||
@@ -10,6 +11,12 @@ public:
|
|||||||
// Optional initialization
|
// Optional initialization
|
||||||
virtual void init(size_t n = 0) {}
|
virtual void init(size_t n = 0) {}
|
||||||
|
|
||||||
|
virtual void *allocate_memory(size_t bytes) const {
|
||||||
|
return std::malloc(bytes);
|
||||||
|
};
|
||||||
|
|
||||||
|
virtual void free_memory(void *ptr) const { std::free(ptr); };
|
||||||
|
|
||||||
// Compute sine for n elements
|
// Compute sine for n elements
|
||||||
virtual void compute_sinf(size_t n, const float *x, float *s) const = 0;
|
virtual void compute_sinf(size_t n, const float *x, float *s) const = 0;
|
||||||
|
|
||||||
|
|||||||
16
src/gpu.cpp
16
src/gpu.cpp
@@ -19,9 +19,17 @@ struct GPUBackend::Impl {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void *allocate_memory(size_t bytes) const {
|
||||||
|
void *ptr;
|
||||||
|
cudaMallocHost(&ptr, bytes);
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_memory(void *ptr) const { cudaFreeHost(ptr); }
|
||||||
|
|
||||||
void init(size_t n) {
|
void init(size_t n) {
|
||||||
const size_t bytes = n * sizeof(float);
|
const size_t bytes = n * sizeof(float);
|
||||||
cudaMallocHost(&h_x, bytes);
|
h_x = reinterpret_cast<float *>(allocate_memory(bytes));
|
||||||
cudaMalloc(&d_x, bytes);
|
cudaMalloc(&d_x, bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -71,6 +79,12 @@ GPUBackend::~GPUBackend() = default;
|
|||||||
|
|
||||||
void GPUBackend::init(size_t n) { impl->init(n); }
|
void GPUBackend::init(size_t n) { impl->init(n); }
|
||||||
|
|
||||||
|
void *GPUBackend::allocate_memory(size_t bytes) const {
|
||||||
|
return impl->allocate_memory(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBackend::free_memory(void *ptr) const { impl->free_memory(ptr); }
|
||||||
|
|
||||||
void GPUBackend::compute_sinf(size_t n, const float *x, float *s) const {
|
void GPUBackend::compute_sinf(size_t n, const float *x, float *s) const {
|
||||||
impl->compute_sinf(n, x, s);
|
impl->compute_sinf(n, x, s);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user