From 89d37a04aa9ba618bb1c6d3024d2262feb1ee09e Mon Sep 17 00:00:00 2001 From: Josh Bailey Date: Thu, 23 Nov 2023 03:33:10 +0000 Subject: [PATCH] not enough copies. --- lib/libvkfft.cc | 4 ++-- lib/vkfft_impl.cc | 4 ++-- lib/vkfft_short_impl.cc | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/libvkfft.cc b/lib/libvkfft.cc index 9a5df01b..eb925a68 100644 --- a/lib/libvkfft.cc +++ b/lib/libvkfft.cc @@ -117,10 +117,10 @@ VkFFTResult _transferDataToCPU(char *cpu_arr) { return VKFFT_ERROR_MALLOC_FAILED; if (_shift) { const size_t halfFftBufferSize = fftBufferSize / 2; - for (int i = 0; i < vkConfiguration.numberBatches; ++i) { + for (int i = 0; i < vkConfiguration.numberBatches; + ++i, cpu_arr += fftBufferSize, data += fftBufferSize) { memcpy(cpu_arr + halfFftBufferSize, data, halfFftBufferSize); memcpy(cpu_arr, data + halfFftBufferSize, halfFftBufferSize); - cpu_arr += fftBufferSize; } } else { memcpy(cpu_arr, data, stagingBufferSize); diff --git a/lib/vkfft_impl.cc b/lib/vkfft_impl.cc index 1b38172c..adba0464 100644 --- a/lib/vkfft_impl.cc +++ b/lib/vkfft_impl.cc @@ -230,9 +230,9 @@ int vkfft_impl::work(int noutput_items, gr_vector_const_void_star &input_items, const gr_complex *const in = reinterpret_cast(input_items[0]); gr_complex *const out = reinterpret_cast(output_items[0]); + size_t buffer_index = 0; - for (int i = 0; i < noutput_items; ++i) { - const int buffer_index = i * vlen_; + for (int i = 0; i < noutput_items; ++i, buffer_index += vlen_) { vkfft_offload((char *)&in[buffer_index], (char *)&out[buffer_index]); } diff --git a/lib/vkfft_short_impl.cc b/lib/vkfft_short_impl.cc index fbaad7ba..ca8a85d2 100644 --- a/lib/vkfft_short_impl.cc +++ b/lib/vkfft_short_impl.cc @@ -246,9 +246,9 @@ int vkfft_short_impl::work(int noutput_items, reinterpret_cast(input_items[0]); gr_complex *const out = reinterpret_cast(output_items[0]); auto *buffer = input_buffer_.get(); + size_t buffer_index = 0; - for (int i = 0; i < noutput_items; ++i) { - const int buffer_index = i * vlen_ * 2; + for (int i = 0; i < noutput_items; ++i, buffer_index += vlen_ * 2) { _converter->conv(&in[buffer_index], &buffer[0], vlen_); vkfft_offload((char *)&buffer[0], (char *)&out[buffer_index]); }