From 3176ba11d864eda976ac7ba4807b7134e83949f7 Mon Sep 17 00:00:00 2001 From: morris Date: Tue, 15 Jun 2021 19:14:15 +0800 Subject: [PATCH 1/3] gdma: update DMA soc data for esp32-s3 --- components/driver/include/esp_private/gdma.h | 3 +- components/hal/esp32s3/include/hal/gdma_ll.h | 38 ++++--------------- components/hal/include/hal/dma_types.h | 8 +++- .../esp_crypto_shared_gdma.c | 15 +++----- .../soc/esp32s3/include/soc/gdma_channel.h | 1 + components/soc/esp32s3/include/soc/soc_caps.h | 4 +- 6 files changed, 25 insertions(+), 44 deletions(-) diff --git a/components/driver/include/esp_private/gdma.h b/components/driver/include/esp_private/gdma.h index 5b548e0868e..43973900e5d 100644 --- a/components/driver/include/esp_private/gdma.h +++ b/components/driver/include/esp_private/gdma.h @@ -38,7 +38,8 @@ typedef enum { GDMA_TRIG_PERIPH_ADC, /*!< GDMA trigger peripheral: ADC */ GDMA_TRIG_PERIPH_DAC, /*!< GDMA trigger peripheral: DAC */ GDMA_TRIG_PERIPH_LCD, /*!< GDMA trigger peripheral: LCD */ - GDMA_TRIG_PERIPH_CAM /*!< GDMA trigger peripheral: CAM */ + GDMA_TRIG_PERIPH_CAM, /*!< GDMA trigger peripheral: CAM */ + GDMA_TRIG_PERIPH_RMT, /*!< GDMA trigger peripheral: RMT */ } gdma_trigger_peripheral_t; /** diff --git a/components/hal/esp32s3/include/hal/gdma_ll.h b/components/hal/esp32s3/include/hal/gdma_ll.h index 7709cc12359..194081dbb9d 100644 --- a/components/hal/esp32s3/include/hal/gdma_ll.h +++ b/components/hal/esp32s3/include/hal/gdma_ll.h @@ -48,9 +48,12 @@ extern "C" { #define GDMA_LL_EVENT_RX_SUC_EOF (1<<1) #define GDMA_LL_EVENT_RX_DONE (1<<0) -/* Memory block size value supported by TX channel */ -#define GDMA_LL_OUT_EXT_MEM_BK_SIZE_16B (0) -#define GDMA_LL_OUT_EXT_MEM_BK_SIZE_32B (1) +#define GDMA_LL_L2FIFO_BASE_SIZE (16) // Basic size of GDMA Level 2 FIFO + +/* Memory block size value supported by channel */ +#define GDMA_LL_EXT_MEM_BK_SIZE_16B (0) +#define GDMA_LL_EXT_MEM_BK_SIZE_32B (1) +#define GDMA_LL_EXT_MEM_BK_SIZE_64B (2) ///////////////////////////////////// Common ///////////////////////////////////////// /** @@ -146,7 +149,7 @@ static inline void gdma_ll_rx_reset_channel(gdma_dev_t *dev, uint32_t channel) /** * @brief Set DMA RX channel memory block size - * @param size_index Supported value: GDMA_IN_EXT_MEM_BK_SIZE_16B, GDMA_IN_EXT_MEM_BK_SIZE_32B + * @param size_index Supported value: GDMA_LL_EXT_MEM_BK_SIZE_16B/32B/64B */ static inline void gdma_ll_rx_set_block_size_psram(gdma_dev_t *dev, uint32_t channel, uint32_t size_index) { @@ -300,19 +303,6 @@ static inline void gdma_ll_rx_connect_to_periph(gdma_dev_t *dev, uint32_t channe dev->channel[channel].in.peri_sel.sel = periph_id; } -/** - * @brief Extend the L2 FIFO size for RX channel - * @note By default, the L2 FIFO size is SOC_GDMA_L2_FIFO_BASE_SIZE Bytes. Suggest to extend it to twice the block size when accessing PSRAM. - * @note `size_in_bytes` should aligned to 8 and larger than SOC_GDMA_L2_FIFO_BASE_SIZE - */ -static inline void gdma_ll_rx_extend_l2_fifo_size_to(gdma_dev_t *dev, uint32_t channel, uint32_t size_in_bytes) -{ - if (size_in_bytes > SOC_GDMA_L2_FIFO_BASE_SIZE) { - dev->sram_size[channel].in.in_size = (size_in_bytes - SOC_GDMA_L2_FIFO_BASE_SIZE) / 8; - } -} - - ///////////////////////////////////// TX ///////////////////////////////////////// /** * @brief Get DMA TX channel interrupt status word @@ -401,7 +391,7 @@ static inline void gdma_ll_tx_reset_channel(gdma_dev_t *dev, uint32_t channel) /** * @brief Set DMA TX channel memory block size - * @param size_index Supported value: GDMA_OUT_EXT_MEM_BK_SIZE_16B, GDMA_OUT_EXT_MEM_BK_SIZE_32B + * @param size_index Supported value: GDMA_LL_EXT_MEM_BK_SIZE_16B/32B/64B */ static inline void gdma_ll_tx_set_block_size_psram(gdma_dev_t *dev, uint32_t channel, uint32_t size_index) { @@ -531,18 +521,6 @@ static inline void gdma_ll_tx_connect_to_periph(gdma_dev_t *dev, uint32_t channe dev->channel[channel].out.peri_sel.sel = periph_id; } -/** - * @brief Extend the L2 FIFO size for TX channel - * @note By default, the L2 FIFO size is SOC_GDMA_L2_FIFO_BASE_SIZE Bytes. Suggest to extend it to twice the block size when accessing PSRAM. - * @note `size_in_bytes` should aligned to 8 and larger than SOC_GDMA_L2_FIFO_BASE_SIZE - */ -static inline void gdma_ll_tx_extend_fifo_size_to(gdma_dev_t *dev, uint32_t channel, uint32_t size_in_bytes) -{ - if (size_in_bytes > SOC_GDMA_L2_FIFO_BASE_SIZE) { - dev->sram_size[channel].out.out_size = (size_in_bytes - SOC_GDMA_L2_FIFO_BASE_SIZE) / 8; - } -} - #ifdef __cplusplus } #endif diff --git a/components/hal/include/hal/dma_types.h b/components/hal/include/hal/dma_types.h index 7583bf6f0f2..66c8677e98f 100644 --- a/components/hal/include/hal/dma_types.h +++ b/components/hal/include/hal/dma_types.h @@ -14,12 +14,12 @@ #pragma once +#include + #ifdef __cplusplus extern "C" { #endif -#include - /** * @brief Type of DMA descriptor * @@ -43,3 +43,7 @@ _Static_assert(sizeof(dma_descriptor_t) == 12, "dma_descriptor_t should occupy 1 #define DMA_DESCRIPTOR_BUFFER_OWNER_CPU (0) /*!< DMA buffer is allowed to be accessed by CPU */ #define DMA_DESCRIPTOR_BUFFER_OWNER_DMA (1) /*!< DMA buffer is allowed to be accessed by DMA engine */ #define DMA_DESCRIPTOR_BUFFER_MAX_SIZE (4095) /*!< Maximum size of the buffer that can be attached to descriptor */ + +#ifdef __cplusplus +} +#endif diff --git a/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c b/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c index fb209d72f55..81287998123 100644 --- a/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c +++ b/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c @@ -54,7 +54,7 @@ static inline esp_err_t crypto_shared_gdma_new_channel(gdma_channel_alloc_config } -#if SOC_GDMA_SUPPORT_EXTMEM +#if SOC_GDMA_SUPPORT_PSRAM /* Initialize external memory specific DMA configs */ static void esp_crypto_shared_dma_init_extmem(void) { @@ -64,13 +64,10 @@ static void esp_crypto_shared_dma_init_extmem(void) gdma_get_channel_id(tx_channel, &tx_ch_id); gdma_get_channel_id(rx_channel, &rx_ch_id); - /* An L2 FIFO bigger than 40 bytes is need when accessing external ram */ - gdma_ll_tx_extend_fifo_size_to(&GDMA, tx_ch_id, 40); - gdma_ll_rx_extend_l2_fifo_size_to(&GDMA, rx_ch_id, 40); - gdma_ll_tx_set_block_size_psram(&GDMA, tx_ch_id, GDMA_LL_OUT_EXT_MEM_BK_SIZE_16B); - gdma_ll_rx_set_block_size_psram(&GDMA, rx_ch_id, GDMA_LL_OUT_EXT_MEM_BK_SIZE_16B); + gdma_ll_tx_set_block_size_psram(&GDMA, tx_ch_id, GDMA_LL_EXT_MEM_BK_SIZE_16B); + gdma_ll_rx_set_block_size_psram(&GDMA, rx_ch_id, GDMA_LL_EXT_MEM_BK_SIZE_16B); } -#endif //SOC_GDMA_SUPPORT_EXTMEM +#endif //SOC_GDMA_SUPPORT_PSRAM /* Initialize GDMA module and channels */ static esp_err_t crypto_shared_gdma_init(void) @@ -96,9 +93,9 @@ static esp_err_t crypto_shared_gdma_init(void) goto err; } -#if SOC_GDMA_SUPPORT_EXTMEM +#if SOC_GDMA_SUPPORT_PSRAM esp_crypto_shared_dma_init_extmem(); -#endif //SOC_GDMA_SUPPORT_EXTMEM +#endif //SOC_GDMA_SUPPORT_PSRAM gdma_connect(rx_channel, GDMA_MAKE_TRIGGER(GDMA_TRIG_PERIPH_AES, 0)); gdma_connect(tx_channel, GDMA_MAKE_TRIGGER(GDMA_TRIG_PERIPH_AES, 0)); diff --git a/components/soc/esp32s3/include/soc/gdma_channel.h b/components/soc/esp32s3/include/soc/gdma_channel.h index d9c820d512a..2aaf59c35ab 100644 --- a/components/soc/esp32s3/include/soc/gdma_channel.h +++ b/components/soc/esp32s3/include/soc/gdma_channel.h @@ -27,3 +27,4 @@ #define SOC_GDMA_TRIG_PERIPH_SHA0 (7) #define SOC_GDMA_TRIG_PERIPH_ADC0 (8) #define SOC_GDMA_TRIG_PERIPH_DAC0 (8) +#define SOC_GDMA_TRIG_PERIPH_RMT0 (9) diff --git a/components/soc/esp32s3/include/soc/soc_caps.h b/components/soc/esp32s3/include/soc/soc_caps.h index 8e39d4bc763..8a927216216 100644 --- a/components/soc/esp32s3/include/soc/soc_caps.h +++ b/components/soc/esp32s3/include/soc/soc_caps.h @@ -44,8 +44,8 @@ /*-------------------------- GDMA CAPS ---------------------------------------*/ #define SOC_GDMA_GROUPS (1) // Number of GDMA groups #define SOC_GDMA_PAIRS_PER_GROUP (5) // Number of GDMA pairs in each group -#define SOC_GDMA_L2_FIFO_BASE_SIZE (16) // Basic size of GDMA Level 2 FIFO -#define SOC_GDMA_SUPPORT_EXTMEM (1) // GDMA can access external PSRAM +#define SOC_GDMA_SUPPORT_PSRAM (1) // GDMA can access external PSRAM +#define SOC_GDMA_PSRAM_MIN_ALIGN (16) // Minimal alignment for PSRAM transaction /*-------------------------- GPIO CAPS ---------------------------------------*/ #include "gpio_caps.h" From d31b1f79e6ee33c938d28397af1d29f353650262 Mon Sep 17 00:00:00 2001 From: morris Date: Wed, 23 Jun 2021 15:11:58 +0800 Subject: [PATCH 2/3] async_mcp: apply general esp_check macros --- components/esp_hw_support/esp_async_memcpy.c | 45 +++++++------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/components/esp_hw_support/esp_async_memcpy.c b/components/esp_hw_support/esp_async_memcpy.c index 38848faad94..326a5f4f78e 100644 --- a/components/esp_hw_support/esp_async_memcpy.c +++ b/components/esp_hw_support/esp_async_memcpy.c @@ -14,7 +14,7 @@ #include "freertos/FreeRTOS.h" #include "freertos/semphr.h" #include "hal/dma_types.h" -#include "esp_compiler.h" +#include "esp_check.h" #include "esp_heap_caps.h" #include "esp_log.h" #include "esp_async_memcpy.h" @@ -22,17 +22,6 @@ static const char *TAG = "async_memcpy"; -#define ASMCP_CHECK(a, msg, tag, ret, ...) \ - do \ - { \ - if (unlikely(!(a))) \ - { \ - ESP_LOGE(TAG, "%s(%d): " msg, __FUNCTION__, __LINE__, ##__VA_ARGS__); \ - ret_code = ret; \ - goto tag; \ - } \ - } while (0) - /** * @brief Type of async mcp stream * mcp stream inherits DMA descriptor, besides that, it has a callback function member @@ -62,17 +51,17 @@ typedef struct async_memcpy_context_t { esp_err_t esp_async_memcpy_install(const async_memcpy_config_t *config, async_memcpy_t *asmcp) { - esp_err_t ret_code = ESP_OK; + esp_err_t ret = ESP_OK; async_memcpy_context_t *mcp_hdl = NULL; - ASMCP_CHECK(config, "configuration can't be null", err, ESP_ERR_INVALID_ARG); - ASMCP_CHECK(asmcp, "can't assign mcp handle to null", err, ESP_ERR_INVALID_ARG); + ESP_GOTO_ON_FALSE(config, ESP_ERR_INVALID_ARG, err, TAG, "configuration can't be null"); + ESP_GOTO_ON_FALSE(asmcp, ESP_ERR_INVALID_ARG, err, TAG, "can't assign mcp handle to null"); // context memory size + stream pool size size_t total_malloc_size = sizeof(async_memcpy_context_t) + sizeof(async_memcpy_stream_t) * config->backlog * 2; // to work when cache is disabled, the driver handle should located in SRAM mcp_hdl = heap_caps_calloc(1, total_malloc_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); - ASMCP_CHECK(mcp_hdl, "allocate context memory failed", err, ESP_ERR_NO_MEM); + ESP_GOTO_ON_FALSE(mcp_hdl, ESP_ERR_NO_MEM, err, TAG, "allocate context memory failed"); mcp_hdl->flags = config->flags; mcp_hdl->out_streams = mcp_hdl->streams_pool; @@ -109,20 +98,19 @@ esp_err_t esp_async_memcpy_install(const async_memcpy_config_t *config, async_me if (asmcp) { *asmcp = NULL; } - return ret_code; + return ret; } esp_err_t esp_async_memcpy_uninstall(async_memcpy_t asmcp) { - esp_err_t ret_code = ESP_OK; - ASMCP_CHECK(asmcp, "mcp handle can't be null", err, ESP_ERR_INVALID_ARG); + esp_err_t ret = ESP_OK; + ESP_GOTO_ON_FALSE(asmcp, ESP_ERR_INVALID_ARG, err, TAG, "mcp handle can't be null"); async_memcpy_impl_stop(&asmcp->mcp_impl); async_memcpy_impl_deinit(&asmcp->mcp_impl); free(asmcp); - return ESP_OK; err: - return ret_code; + return ret; } static int async_memcpy_prepare_receive(async_memcpy_t asmcp, void *buffer, size_t size, dma_descriptor_t **start_desc, dma_descriptor_t **end_desc) @@ -226,16 +214,16 @@ static bool async_memcpy_get_next_rx_descriptor(async_memcpy_t asmcp, dma_descri esp_err_t esp_async_memcpy(async_memcpy_t asmcp, void *dst, void *src, size_t n, async_memcpy_isr_cb_t cb_isr, void *cb_args) { - esp_err_t ret_code = ESP_OK; + esp_err_t ret = ESP_OK; dma_descriptor_t *rx_start_desc = NULL; dma_descriptor_t *rx_end_desc = NULL; dma_descriptor_t *tx_start_desc = NULL; dma_descriptor_t *tx_end_desc = NULL; size_t rx_prepared_size = 0; size_t tx_prepared_size = 0; - ASMCP_CHECK(asmcp, "mcp handle can't be null", err, ESP_ERR_INVALID_ARG); - ASMCP_CHECK(async_memcpy_impl_is_buffer_address_valid(&asmcp->mcp_impl, src, dst), "buffer address not valid", err, ESP_ERR_INVALID_ARG); - ASMCP_CHECK(n <= DMA_DESCRIPTOR_BUFFER_MAX_SIZE * asmcp->max_stream_num, "buffer size too large", err, ESP_ERR_INVALID_ARG); + ESP_GOTO_ON_FALSE(asmcp, ESP_ERR_INVALID_ARG, err, TAG, "mcp handle can't be null"); + ESP_GOTO_ON_FALSE(async_memcpy_impl_is_buffer_address_valid(&asmcp->mcp_impl, src, dst), ESP_ERR_INVALID_ARG, err, TAG, "buffer address not valid"); + ESP_GOTO_ON_FALSE(n <= DMA_DESCRIPTOR_BUFFER_MAX_SIZE * asmcp->max_stream_num, ESP_ERR_INVALID_ARG, err, TAG, "buffer size too large"); // Prepare TX and RX descriptor portENTER_CRITICAL_SAFE(&asmcp->spinlock); @@ -268,12 +256,11 @@ esp_err_t esp_async_memcpy(async_memcpy_t asmcp, void *dst, void *src, size_t n, // It's unlikely that we have space for rx descriptor but no space for tx descriptor // Both tx and rx descriptor should move in the same pace - ASMCP_CHECK(rx_prepared_size == n, "out of rx descriptor", err, ESP_FAIL); - ASMCP_CHECK(tx_prepared_size == n, "out of tx descriptor", err, ESP_FAIL); + ESP_GOTO_ON_FALSE(rx_prepared_size == n, ESP_FAIL, err, TAG, "out of rx descriptor"); + ESP_GOTO_ON_FALSE(tx_prepared_size == n, ESP_FAIL, err, TAG, "out of tx descriptor"); - return ESP_OK; err: - return ret_code; + return ret; } IRAM_ATTR void async_memcpy_isr_on_rx_done_event(async_memcpy_impl_t *impl) From d9819bc7aef8de1b0cdc29bb6b9acaa633ec31b1 Mon Sep 17 00:00:00 2001 From: morris Date: Wed, 23 Jun 2021 14:10:07 +0800 Subject: [PATCH 3/3] gdma: set transfer ability --- components/driver/gdma.c | 63 +++++ components/driver/include/esp_private/gdma.h | 30 ++- components/esp_hw_support/esp_async_memcpy.c | 44 ++- .../esp_hw_support/include/esp_async_memcpy.h | 14 +- .../port/async_memcpy_impl_gdma.c | 26 +- .../port/include/esp_async_memcpy_impl.h | 2 + .../esp_hw_support/test/test_async_memcpy.c | 255 +++++++++++++----- .../esp_crypto_shared_gdma.c | 18 +- docs/en/api-reference/system/async_memcpy.rst | 2 + .../test_utils/include/test_utils.h | 2 +- 10 files changed, 360 insertions(+), 96 deletions(-) diff --git a/components/driver/gdma.c b/components/driver/gdma.c index b4cc76c1f97..7e64c15ed35 100644 --- a/components/driver/gdma.c +++ b/components/driver/gdma.c @@ -74,6 +74,8 @@ struct gdma_channel_t { intr_handle_t intr; // per-channel interrupt handle gdma_channel_direction_t direction; // channel direction int periph_id; // Peripheral instance ID, indicates which peripheral is connected to this GDMA channel + size_t sram_alignment; // alignment for memory in SRAM + size_t psram_alignment; // alignment for memory in PSRAM esp_err_t (*del)(gdma_channel_t *channel); // channel deletion function, it's polymorphic, see `gdma_del_tx_channel` or `gdma_del_rx_channel` }; @@ -271,6 +273,67 @@ esp_err_t gdma_disconnect(gdma_channel_handle_t dma_chan) return ret; } +esp_err_t gdma_set_transfer_ability(gdma_channel_handle_t dma_chan, const gdma_transfer_ability_t *ability) +{ + esp_err_t ret = ESP_OK; + gdma_pair_t *pair = NULL; + gdma_group_t *group = NULL; + bool en_burst = true; + ESP_GOTO_ON_FALSE(dma_chan, ESP_ERR_INVALID_ARG, err, TAG, "invalid argument"); + pair = dma_chan->pair; + group = pair->group; + size_t sram_alignment = ability->sram_trans_align; + size_t psram_alignment = ability->psram_trans_align; + // alignment should be 2^n + ESP_GOTO_ON_FALSE((sram_alignment & (sram_alignment - 1)) == 0, ESP_ERR_INVALID_ARG, err, TAG, "invalid sram alignment: %zu", sram_alignment); + +#if SOC_GDMA_SUPPORT_PSRAM + int block_size_index = 0; + switch (psram_alignment) { + case 64: // 64 Bytes alignment + block_size_index = GDMA_LL_EXT_MEM_BK_SIZE_64B; + break; + case 32: // 32 Bytes alignment + block_size_index = GDMA_LL_EXT_MEM_BK_SIZE_32B; + break; + case 16: // 16 Bytes alignment + block_size_index = GDMA_LL_EXT_MEM_BK_SIZE_16B; + break; + case 0: // no alignment is requirement + block_size_index = GDMA_LL_EXT_MEM_BK_SIZE_16B; + psram_alignment = SOC_GDMA_PSRAM_MIN_ALIGN; // fall back to minimal alignment + break; + default: + ESP_GOTO_ON_FALSE(false, ESP_ERR_INVALID_ARG, err, TAG, "invalid psram alignment: %zu", psram_alignment); + break; + } +#endif // #if SOC_GDMA_SUPPORT_PSRAM + + if (dma_chan->direction == GDMA_CHANNEL_DIRECTION_TX) { + // TX channel can always enable burst mode, no matter data alignment + gdma_ll_tx_enable_data_burst(group->hal.dev, pair->pair_id, true); + gdma_ll_tx_enable_descriptor_burst(group->hal.dev, pair->pair_id, true); +#if SOC_GDMA_SUPPORT_PSRAM + gdma_ll_tx_set_block_size_psram(group->hal.dev, pair->pair_id, block_size_index); +#endif // #if SOC_GDMA_SUPPORT_PSRAM + } else { + // RX channel burst mode depends on specific data alignment + en_burst = sram_alignment >= 4; + gdma_ll_rx_enable_data_burst(group->hal.dev, pair->pair_id, en_burst); + gdma_ll_rx_enable_descriptor_burst(group->hal.dev, pair->pair_id, en_burst); +#if SOC_GDMA_SUPPORT_PSRAM + gdma_ll_rx_set_block_size_psram(group->hal.dev, pair->pair_id, block_size_index); +#endif // #if SOC_GDMA_SUPPORT_PSRAM + } + + dma_chan->sram_alignment = sram_alignment; + dma_chan->psram_alignment = psram_alignment; + ESP_LOGD(TAG, "%s channel (%d,%d), (%zu:%zu) bytes aligned, burst %s", dma_chan->direction == GDMA_CHANNEL_DIRECTION_TX ? "tx" : "rx", + group->group_id, pair->pair_id, sram_alignment, psram_alignment, en_burst ? "enabled" : "disabled"); +err: + return ret; +} + esp_err_t gdma_apply_strategy(gdma_channel_handle_t dma_chan, const gdma_strategy_config_t *config) { esp_err_t ret = ESP_OK; diff --git a/components/driver/include/esp_private/gdma.h b/components/driver/include/esp_private/gdma.h index 43973900e5d..88a45b56cfe 100644 --- a/components/driver/include/esp_private/gdma.h +++ b/components/driver/include/esp_private/gdma.h @@ -59,10 +59,23 @@ typedef struct { gdma_channel_handle_t sibling_chan; /*!< DMA sibling channel handle (NULL means having sibling is not necessary) */ gdma_channel_direction_t direction; /*!< DMA channel direction */ struct { - int reserve_sibling: 1; /*!< If set, DMA channel allocator would prefer to allocate new channel in a new pair, and reserve sibling channel for future use */ + int reserve_sibling: 1; /*!< If set, DMA channel allocator would prefer to allocate new channel in a new pair, and reserve sibling channel for future use */ } flags; } gdma_channel_alloc_config_t; +/** + * @brief GDMA transfer ability + * + * @note The alignment set in this structure is **not** a guarantee that gdma driver will take care of the nonalignment cases. + * Actually the GDMA driver has no knowledge about the DMA buffer (address and size) used by upper layer. + * So it's the responsibility of the **upper layer** to take care of the buffer address and size. + * + */ +typedef struct { + size_t sram_trans_align; /*!< DMA transfer alignment for memory in SRAM, in bytes. The driver enables/disables burst mode based on this value. 0 means no alignment is required */ + size_t psram_trans_align; /*!< DMA transfer alignment for memory in PSRAM, in bytes. The driver sets proper burst block size based on the alignment value. 0 means no alignment is required */ +} gdma_transfer_ability_t; + /** * @brief Type of GDMA event data * @@ -80,6 +93,9 @@ typedef struct { * @param event_data GDMA event data * @param user_data User registered data from `gdma_register_tx_event_callbacks` or `gdma_register_rx_event_callbacks` * + * @return Whether a task switch is needed after the callback function returns, + * this is usually due to the callback wakes up some high priority task. + * */ typedef bool (*gdma_event_callback_t)(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data); @@ -172,6 +188,18 @@ esp_err_t gdma_connect(gdma_channel_handle_t dma_chan, gdma_trigger_t trig_perip */ esp_err_t gdma_disconnect(gdma_channel_handle_t dma_chan); +/** + * @brief Set DMA channel transfer ability + * + * @param[in] dma_chan GDMA channel handle, allocated by `gdma_new_channel` + * @param[in] ability Transfer ability, e.g. alignment + * @return + * - ESP_OK: Set DMA channel transfer ability successfully + * - ESP_ERR_INVALID_ARG: Set DMA channel transfer ability failed because of invalid argument + * - ESP_FAIL: Set DMA channel transfer ability failed because of other error + */ +esp_err_t gdma_set_transfer_ability(gdma_channel_handle_t dma_chan, const gdma_transfer_ability_t *ability); + /** * @brief Apply channel strategy for GDMA channel * diff --git a/components/esp_hw_support/esp_async_memcpy.c b/components/esp_hw_support/esp_async_memcpy.c index 326a5f4f78e..465bf821973 100644 --- a/components/esp_hw_support/esp_async_memcpy.c +++ b/components/esp_hw_support/esp_async_memcpy.c @@ -11,6 +11,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + +#include #include "freertos/FreeRTOS.h" #include "freertos/semphr.h" #include "hal/dma_types.h" @@ -22,6 +24,8 @@ static const char *TAG = "async_memcpy"; +#define ALIGN_DOWN(val, align) ((val) & ~((align) - 1)) + /** * @brief Type of async mcp stream * mcp stream inherits DMA descriptor, besides that, it has a callback function member @@ -43,7 +47,8 @@ typedef struct async_memcpy_context_t { dma_descriptor_t *tx_desc; // pointer to the next free TX descriptor dma_descriptor_t *rx_desc; // pointer to the next free RX descriptor dma_descriptor_t *next_rx_desc_to_check; // pointer to the next RX descriptor to recycle - uint32_t max_stream_num; // maximum number of streams + uint32_t max_stream_num; // maximum number of streams + size_t max_dma_buffer_size; // maximum DMA buffer size async_memcpy_stream_t *out_streams; // pointer to the first TX stream async_memcpy_stream_t *in_streams; // pointer to the first RX stream async_memcpy_stream_t streams_pool[0]; // stream pool (TX + RX), the size is configured during driver installation @@ -82,9 +87,14 @@ esp_err_t esp_async_memcpy_install(const async_memcpy_config_t *config, async_me mcp_hdl->rx_desc = &mcp_hdl->in_streams[0].desc; mcp_hdl->next_rx_desc_to_check = &mcp_hdl->in_streams[0].desc; mcp_hdl->spinlock = (portMUX_TYPE)portMUX_INITIALIZER_UNLOCKED; + mcp_hdl->mcp_impl.sram_trans_align = config->sram_trans_align; + mcp_hdl->mcp_impl.psram_trans_align = config->psram_trans_align; + size_t trans_align = MAX(config->sram_trans_align, config->psram_trans_align); + mcp_hdl->max_dma_buffer_size = trans_align ? ALIGN_DOWN(DMA_DESCRIPTOR_BUFFER_MAX_SIZE, trans_align) : DMA_DESCRIPTOR_BUFFER_MAX_SIZE; // initialize implementation layer - async_memcpy_impl_init(&mcp_hdl->mcp_impl); + ret = async_memcpy_impl_init(&mcp_hdl->mcp_impl); + ESP_GOTO_ON_ERROR(ret, err, TAG, "DMA M2M init failed"); *asmcp = mcp_hdl; @@ -121,14 +131,14 @@ static int async_memcpy_prepare_receive(async_memcpy_t asmcp, void *buffer, size dma_descriptor_t *start = desc; dma_descriptor_t *end = desc; - while (size > DMA_DESCRIPTOR_BUFFER_MAX_SIZE) { + while (size > asmcp->max_dma_buffer_size) { if (desc->dw0.owner != DMA_DESCRIPTOR_BUFFER_OWNER_DMA) { desc->dw0.suc_eof = 0; - desc->dw0.size = DMA_DESCRIPTOR_BUFFER_MAX_SIZE; + desc->dw0.size = asmcp->max_dma_buffer_size; desc->buffer = &buf[prepared_length]; desc = desc->next; // move to next descriptor - prepared_length += DMA_DESCRIPTOR_BUFFER_MAX_SIZE; - size -= DMA_DESCRIPTOR_BUFFER_MAX_SIZE; + prepared_length += asmcp->max_dma_buffer_size; + size -= asmcp->max_dma_buffer_size; } else { // out of RX descriptors goto _exit; @@ -162,15 +172,15 @@ static int async_memcpy_prepare_transmit(async_memcpy_t asmcp, void *buffer, siz dma_descriptor_t *start = desc; dma_descriptor_t *end = desc; - while (len > DMA_DESCRIPTOR_BUFFER_MAX_SIZE) { + while (len > asmcp->max_dma_buffer_size) { if (desc->dw0.owner != DMA_DESCRIPTOR_BUFFER_OWNER_DMA) { desc->dw0.suc_eof = 0; // not the end of the transaction - desc->dw0.size = DMA_DESCRIPTOR_BUFFER_MAX_SIZE; - desc->dw0.length = DMA_DESCRIPTOR_BUFFER_MAX_SIZE; + desc->dw0.size = asmcp->max_dma_buffer_size; + desc->dw0.length = asmcp->max_dma_buffer_size; desc->buffer = &buf[prepared_length]; desc = desc->next; // move to next descriptor - prepared_length += DMA_DESCRIPTOR_BUFFER_MAX_SIZE; - len -= DMA_DESCRIPTOR_BUFFER_MAX_SIZE; + prepared_length += asmcp->max_dma_buffer_size; + len -= asmcp->max_dma_buffer_size; } else { // out of TX descriptors goto _exit; @@ -222,14 +232,20 @@ esp_err_t esp_async_memcpy(async_memcpy_t asmcp, void *dst, void *src, size_t n, size_t rx_prepared_size = 0; size_t tx_prepared_size = 0; ESP_GOTO_ON_FALSE(asmcp, ESP_ERR_INVALID_ARG, err, TAG, "mcp handle can't be null"); - ESP_GOTO_ON_FALSE(async_memcpy_impl_is_buffer_address_valid(&asmcp->mcp_impl, src, dst), ESP_ERR_INVALID_ARG, err, TAG, "buffer address not valid"); - ESP_GOTO_ON_FALSE(n <= DMA_DESCRIPTOR_BUFFER_MAX_SIZE * asmcp->max_stream_num, ESP_ERR_INVALID_ARG, err, TAG, "buffer size too large"); + ESP_GOTO_ON_FALSE(async_memcpy_impl_is_buffer_address_valid(&asmcp->mcp_impl, src, dst), ESP_ERR_INVALID_ARG, err, TAG, "buffer address not valid: %p -> %p", src, dst); + ESP_GOTO_ON_FALSE(n <= asmcp->max_dma_buffer_size * asmcp->max_stream_num, ESP_ERR_INVALID_ARG, err, TAG, "buffer size too large"); + if (asmcp->mcp_impl.sram_trans_align) { + ESP_GOTO_ON_FALSE(((n & (asmcp->mcp_impl.sram_trans_align - 1)) == 0), ESP_ERR_INVALID_ARG, err, TAG, "copy size should align to %d bytes", asmcp->mcp_impl.sram_trans_align); + } + if (asmcp->mcp_impl.psram_trans_align) { + ESP_GOTO_ON_FALSE(((n & (asmcp->mcp_impl.psram_trans_align - 1)) == 0), ESP_ERR_INVALID_ARG, err, TAG, "copy size should align to %d bytes", asmcp->mcp_impl.psram_trans_align); + } // Prepare TX and RX descriptor portENTER_CRITICAL_SAFE(&asmcp->spinlock); rx_prepared_size = async_memcpy_prepare_receive(asmcp, dst, n, &rx_start_desc, &rx_end_desc); tx_prepared_size = async_memcpy_prepare_transmit(asmcp, src, n, &tx_start_desc, &tx_end_desc); - if ((rx_prepared_size == n) && (tx_prepared_size == n)) { + if (rx_start_desc && tx_start_desc && (rx_prepared_size == n) && (tx_prepared_size == n)) { // register user callback to the last descriptor async_memcpy_stream_t *mcp_stream = __containerof(rx_end_desc, async_memcpy_stream_t, desc); mcp_stream->cb = cb_isr; diff --git a/components/esp_hw_support/include/esp_async_memcpy.h b/components/esp_hw_support/include/esp_async_memcpy.h index 67194e44c6c..e95f9638e76 100644 --- a/components/esp_hw_support/include/esp_async_memcpy.h +++ b/components/esp_hw_support/include/esp_async_memcpy.h @@ -54,8 +54,10 @@ typedef bool (*async_memcpy_isr_cb_t)(async_memcpy_t mcp_hdl, async_memcpy_event * */ typedef struct { - uint32_t backlog; /*!< Maximum number of streams that can be handled simultaneously */ - uint32_t flags; /*!< Extra flags to control async memcpy feature */ + uint32_t backlog; /*!< Maximum number of streams that can be handled simultaneously */ + size_t sram_trans_align; /*!< DMA transfer alignment (both in size and address) for SRAM memory */ + size_t psram_trans_align; /*!< DMA transfer alignment (both in size and address) for PSRAM memory */ + uint32_t flags; /*!< Extra flags to control async memcpy feature */ } async_memcpy_config_t; /** @@ -63,9 +65,11 @@ typedef struct { * */ #define ASYNC_MEMCPY_DEFAULT_CONFIG() \ - { \ - .backlog = 8, \ - .flags = 0, \ + { \ + .backlog = 8, \ + .sram_trans_align = 0, \ + .psram_trans_align = 0, \ + .flags = 0, \ } /** diff --git a/components/esp_hw_support/port/async_memcpy_impl_gdma.c b/components/esp_hw_support/port/async_memcpy_impl_gdma.c index 0c3a2f599b5..7ecf7b68662 100644 --- a/components/esp_hw_support/port/async_memcpy_impl_gdma.c +++ b/components/esp_hw_support/port/async_memcpy_impl_gdma.c @@ -61,9 +61,21 @@ esp_err_t async_memcpy_impl_init(async_memcpy_impl_t *impl) gdma_strategy_config_t strategy_config = { .auto_update_desc = true, - .owner_check = true + .owner_check = true, }; + gdma_transfer_ability_t transfer_ability = { + .sram_trans_align = impl->sram_trans_align, + .psram_trans_align = impl->psram_trans_align, + }; + ret = gdma_set_transfer_ability(impl->tx_channel, &transfer_ability); + if (ret != ESP_OK) { + goto err; + } + ret = gdma_set_transfer_ability(impl->rx_channel, &transfer_ability); + if (ret != ESP_OK) { + goto err; + } gdma_apply_strategy(impl->tx_channel, &strategy_config); gdma_apply_strategy(impl->rx_channel, &strategy_config); @@ -108,5 +120,15 @@ esp_err_t async_memcpy_impl_restart(async_memcpy_impl_t *impl) bool async_memcpy_impl_is_buffer_address_valid(async_memcpy_impl_t *impl, void *src, void *dst) { - return true; + bool valid = true; + if (esp_ptr_external_ram(dst)) { + if (impl->psram_trans_align) { + valid = valid && (((intptr_t)dst & (impl->psram_trans_align - 1)) == 0); + } + } else { + if (impl->sram_trans_align) { + valid = valid && (((intptr_t)dst & (impl->sram_trans_align - 1)) == 0); + } + } + return valid; } diff --git a/components/esp_hw_support/port/include/esp_async_memcpy_impl.h b/components/esp_hw_support/port/include/esp_async_memcpy_impl.h index 80aac5c0fe4..83a382de752 100644 --- a/components/esp_hw_support/port/include/esp_async_memcpy_impl.h +++ b/components/esp_hw_support/port/include/esp_async_memcpy_impl.h @@ -46,6 +46,8 @@ typedef struct { gdma_channel_handle_t rx_channel; #endif intptr_t rx_eof_addr; + size_t sram_trans_align; + size_t psram_trans_align; bool isr_need_yield; // if current isr needs a yield for higher priority task } async_memcpy_impl_t; diff --git a/components/esp_hw_support/test/test_async_memcpy.c b/components/esp_hw_support/test/test_async_memcpy.c index 7c002e6796f..19c2251b332 100644 --- a/components/esp_hw_support/test/test_async_memcpy.c +++ b/components/esp_hw_support/test/test_async_memcpy.c @@ -12,37 +12,75 @@ #include "ccomp_timer.h" #include "esp_async_memcpy.h" #include "soc/soc_caps.h" +#include "hal/dma_types.h" #if SOC_CP_DMA_SUPPORTED || SOC_GDMA_SUPPORTED #define ALIGN_UP(addr, align) (((addr) + (align)-1) & ~((align)-1)) +#define ALIGN_DOWN(size, align) ((size) & ~((align) - 1)) -static void async_memcpy_setup_testbench(uint32_t seed, uint32_t *buffer_size, uint8_t **src_buf, uint8_t **dst_buf, uint8_t **from_addr, uint8_t **to_addr, uint32_t align) +typedef struct { + uint32_t seed; + uint32_t buffer_size; + uint8_t *src_buf; + uint8_t *dst_buf; + uint8_t *from_addr; + uint8_t *to_addr; + uint32_t align; + uint32_t offset; + bool src_in_psram; + bool dst_in_psram; +} memcpy_testbench_context_t; + +static void async_memcpy_setup_testbench(memcpy_testbench_context_t *test_context) { - srand(seed); + srand(test_context->seed); printf("allocating memory buffer...\r\n"); - // memory copy from/to PSRAM is not allowed - *src_buf = heap_caps_malloc(*buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); - *dst_buf = heap_caps_calloc(1, *buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); - - TEST_ASSERT_NOT_NULL_MESSAGE(*src_buf, "allocate source buffer failed"); - TEST_ASSERT_NOT_NULL_MESSAGE(*dst_buf, "allocate destination buffer failed"); - - *from_addr = (uint8_t *)ALIGN_UP((uint32_t)(*src_buf), 4); - *to_addr = (uint8_t *)ALIGN_UP((uint32_t)(*dst_buf), 4); - uint8_t gap = MAX(*from_addr - *src_buf, *to_addr - *dst_buf); - *buffer_size -= gap; - - *from_addr += align; - *to_addr += align; - *buffer_size -= align; - - printf("...size %d Bytes, src@%p, dst@%p\r\n", *buffer_size, *from_addr, *to_addr); + uint32_t buffer_size = test_context->buffer_size; + uint8_t *src_buf = NULL; + uint8_t *dst_buf = NULL; + uint8_t *from_addr = NULL; + uint8_t *to_addr = NULL; +#if CONFIG_SPIRAM && SOC_GDMA_SUPPORT_PSRAM + if (test_context->src_in_psram) { + src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_SPIRAM); + } else { + src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); + } + if (test_context->dst_in_psram) { + dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_SPIRAM); + } else { + dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); + } +#else + src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); + dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); +#endif + TEST_ASSERT_NOT_NULL_MESSAGE(src_buf, "allocate source buffer failed"); + TEST_ASSERT_NOT_NULL_MESSAGE(dst_buf, "allocate destination buffer failed"); + // address alignment + from_addr = (uint8_t *)ALIGN_UP((uint32_t)(src_buf), test_context->align); + to_addr = (uint8_t *)ALIGN_UP((uint32_t)(dst_buf), test_context->align); + uint8_t gap = MAX(from_addr - src_buf, to_addr - dst_buf); + buffer_size -= gap; + // size alignment + buffer_size = ALIGN_DOWN(buffer_size, test_context->align); + // adding extra offset + from_addr += test_context->offset; + to_addr += test_context->offset; + buffer_size -= test_context->offset; + printf("...size %d Bytes, src@%p, dst@%p\r\n", buffer_size, from_addr, to_addr); printf("fill src buffer with random data\r\n"); - for (int i = 0; i < *buffer_size; i++) { - (*from_addr)[i] = rand() % 256; + for (int i = 0; i < buffer_size; i++) { + from_addr[i] = rand() % 256; } + // return value + test_context->buffer_size = buffer_size; + test_context->src_buf = src_buf; + test_context->dst_buf = dst_buf; + test_context->from_addr = from_addr; + test_context->to_addr = to_addr; } static void async_memcpy_verify_and_clear_testbench(uint32_t seed, uint32_t buffer_size, uint8_t *src_buf, uint8_t *dst_buf, uint8_t *from_addr, uint8_t *to_addr) @@ -91,18 +129,18 @@ TEST_CASE("memory copy by DMA one by one", "[async mcp]") TEST_ESP_OK(esp_async_memcpy_install(&config, &driver)); uint32_t test_buffer_len[] = {256, 512, 1024, 2048, 4096, 5011}; - uint8_t *sbuf = NULL; - uint8_t *dbuf = NULL; - uint8_t *from = NULL; - uint8_t *to = NULL; + memcpy_testbench_context_t test_context = { + .align = 4, + }; for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) { // Test different align edge - for (int align = 0; align < 4; align++) { - async_memcpy_setup_testbench(i, &test_buffer_len[i], &sbuf, &dbuf, &from, &to, align); - TEST_ESP_OK(esp_async_memcpy(driver, to, from, test_buffer_len[i], NULL, NULL)); - async_memcpy_verify_and_clear_testbench(i, test_buffer_len[i], sbuf, dbuf, from, to); - + for (int off = 0; off < 4; off++) { + test_context.buffer_size = test_buffer_len[i]; + test_context.seed = i; + async_memcpy_setup_testbench(&test_context); + TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, NULL, NULL)); + async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr); vTaskDelay(pdMS_TO_TICKS(100)); } } @@ -117,86 +155,177 @@ TEST_CASE("memory copy by DMA on the fly", "[async mcp]") TEST_ESP_OK(esp_async_memcpy_install(&config, &driver)); uint32_t test_buffer_len[] = {512, 1024, 2048, 4096, 5011}; - uint8_t *sbufs[] = {0, 0, 0, 0, 0}; - uint8_t *dbufs[] = {0, 0, 0, 0, 0}; - uint8_t *froms[] = {0, 0, 0, 0, 0}; - uint8_t *tos[] = {0, 0, 0, 0, 0}; + memcpy_testbench_context_t test_context[] = { + {.align = 4}, {.align = 4}, {.align = 4}, {.align = 4}, {.align = 4}, + }; // Aligned case - for (int i = 0; i < sizeof(sbufs) / sizeof(sbufs[0]); i++) { - async_memcpy_setup_testbench(i, &test_buffer_len[i], &sbufs[i], &dbufs[i], &froms[i], &tos[i], 0); + for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) { + test_context[i].seed = i; + test_context[i].buffer_size = test_buffer_len[i]; + async_memcpy_setup_testbench(&test_context[i]); } for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) { - TEST_ESP_OK(esp_async_memcpy(driver, tos[i], froms[i], test_buffer_len[i], NULL, NULL)); + TEST_ESP_OK(esp_async_memcpy(driver, test_context[i].to_addr, test_context[i].from_addr, test_context[i].buffer_size, NULL, NULL)); } - for (int i = 0; i < sizeof(sbufs) / sizeof(sbufs[0]); i++) { - async_memcpy_verify_and_clear_testbench(i, test_buffer_len[i], sbufs[i], dbufs[i], froms[i], tos[i]); + for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) { + async_memcpy_verify_and_clear_testbench(i, test_context[i].buffer_size, test_context[i].src_buf, test_context[i].dst_buf, test_context[i].from_addr, test_context[i].to_addr); } // Non-aligned case - for (int i = 0; i < sizeof(sbufs) / sizeof(sbufs[0]); i++) { - async_memcpy_setup_testbench(i, &test_buffer_len[i], &sbufs[i], &dbufs[i], &froms[i], &tos[i], 3); + for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) { + test_context[i].seed = i; + test_context[i].buffer_size = test_buffer_len[i]; + test_context[i].offset = 3; + async_memcpy_setup_testbench(&test_context[i]); } for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) { - TEST_ESP_OK(esp_async_memcpy(driver, tos[i], froms[i], test_buffer_len[i], NULL, NULL)); + TEST_ESP_OK(esp_async_memcpy(driver, test_context[i].to_addr, test_context[i].from_addr, test_context[i].buffer_size, NULL, NULL)); } - for (int i = 0; i < sizeof(sbufs) / sizeof(sbufs[0]); i++) { - async_memcpy_verify_and_clear_testbench(i, test_buffer_len[i], sbufs[i], dbufs[i], froms[i], tos[i]); + for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) { + async_memcpy_verify_and_clear_testbench(i, test_context[i].buffer_size, test_context[i].src_buf, test_context[i].dst_buf, test_context[i].from_addr, test_context[i].to_addr); } TEST_ESP_OK(esp_async_memcpy_uninstall(driver)); } -#define TEST_ASYNC_MEMCPY_BENCH_COUNTS (16) -static uint32_t test_async_memcpy_bench_len = 4095; -static int count = 0; +#define TEST_ASYNC_MEMCPY_BENCH_COUNTS (16) +static int s_count = 0; static IRAM_ATTR bool test_async_memcpy_isr_cb(async_memcpy_t mcp_hdl, async_memcpy_event_t *event, void *cb_args) { SemaphoreHandle_t sem = (SemaphoreHandle_t)cb_args; BaseType_t high_task_wakeup = pdFALSE; - count++; - if (count == TEST_ASYNC_MEMCPY_BENCH_COUNTS) { + s_count++; + if (s_count == TEST_ASYNC_MEMCPY_BENCH_COUNTS) { xSemaphoreGiveFromISR(sem, &high_task_wakeup); } return high_task_wakeup == pdTRUE; } -TEST_CASE("memory copy by DMA with callback", "[async mcp]") +static void memcpy_performance_test(uint32_t buffer_size) { SemaphoreHandle_t sem = xSemaphoreCreateBinary(); async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG(); - config.backlog = TEST_ASYNC_MEMCPY_BENCH_COUNTS; + config.backlog = (buffer_size / DMA_DESCRIPTOR_BUFFER_MAX_SIZE + 1) * TEST_ASYNC_MEMCPY_BENCH_COUNTS; + config.sram_trans_align = 4; // at least 4 bytes aligned for SRAM transfer + config.psram_trans_align = 64; // at least 64 bytes aligned for PSRAM transfer async_memcpy_t driver = NULL; + int64_t elapse_us = 0; + float throughput = 0.0; TEST_ESP_OK(esp_async_memcpy_install(&config, &driver)); - uint8_t *sbuf = NULL; - uint8_t *dbuf = NULL; - uint8_t *from = NULL; - uint8_t *to = NULL; - - async_memcpy_setup_testbench(0, &test_async_memcpy_bench_len, &sbuf, &dbuf, &from, &to, 0); - count = 0; + // 1. SRAM->SRAM + memcpy_testbench_context_t test_context = { + .align = config.psram_trans_align, + .buffer_size = buffer_size, + .src_in_psram = false, + .dst_in_psram = false, + }; + async_memcpy_setup_testbench(&test_context); + s_count = 0; ccomp_timer_start(); for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) { - TEST_ESP_OK(esp_async_memcpy(driver, to, from, test_async_memcpy_bench_len, test_async_memcpy_isr_cb, sem)); + TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem)); } + // wait for done semaphore + TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000))); + elapse_us = ccomp_timer_stop(); + throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us; + IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: SRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size); + ccomp_timer_start(); + for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) { + memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size); + } + elapse_us = ccomp_timer_stop(); + throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us; + IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: SRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size); + async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr); +#if CONFIG_SPIRAM && SOC_GDMA_SUPPORT_PSRAM + // 2. PSRAM->PSRAM + test_context.src_in_psram = true; + test_context.dst_in_psram = true; + async_memcpy_setup_testbench(&test_context); + s_count = 0; + ccomp_timer_start(); + for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) { + TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem)); + } // wait for done semaphore TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000))); - esp_rom_printf("memcpy %d Bytes data by HW costs %lldus\r\n", test_async_memcpy_bench_len, ccomp_timer_stop() / TEST_ASYNC_MEMCPY_BENCH_COUNTS); + elapse_us = ccomp_timer_stop(); + throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us; + IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: PSRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size); + ccomp_timer_start(); + for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) { + memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size); + } + elapse_us = ccomp_timer_stop(); + throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us; + IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: PSRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size); + async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr); + // 3. PSRAM->SRAM + test_context.src_in_psram = true; + test_context.dst_in_psram = false; + async_memcpy_setup_testbench(&test_context); + s_count = 0; ccomp_timer_start(); for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) { - memcpy(to, from, test_async_memcpy_bench_len); + TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem)); } - esp_rom_printf("memcpy %d Bytes data by SW costs %lldus\r\n", test_async_memcpy_bench_len, ccomp_timer_stop() / TEST_ASYNC_MEMCPY_BENCH_COUNTS); + // wait for done semaphore + TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000))); + elapse_us = ccomp_timer_stop(); + throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us; + IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: PSRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size); + ccomp_timer_start(); + for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) { + memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size); + } + elapse_us = ccomp_timer_stop(); + throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us; + IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: PSRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size); + async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr); - async_memcpy_verify_and_clear_testbench(0, test_async_memcpy_bench_len, sbuf, dbuf, from, to); + // 4. SRAM->PSRAM + test_context.src_in_psram = false; + test_context.dst_in_psram = true; + async_memcpy_setup_testbench(&test_context); + s_count = 0; + ccomp_timer_start(); + for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) { + TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem)); + } + // wait for done semaphore + TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000))); + elapse_us = ccomp_timer_stop(); + throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us; + IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: SRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size); + ccomp_timer_start(); + for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) { + memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size); + } + elapse_us = ccomp_timer_stop(); + throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us; + IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: SRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size); + async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr); +#endif TEST_ESP_OK(esp_async_memcpy_uninstall(driver)); vSemaphoreDelete(sem); } +TEST_CASE("memory copy performance test 40KB", "[async mcp]") +{ + memcpy_performance_test(40 * 1024); +} + +TEST_CASE("memory copy performance test 4KB", "[async mcp]") +{ + memcpy_performance_test(4 * 1024); +} + #endif //SOC_CP_DMA_SUPPORTED || SOC_GDMA_SUPPORTED diff --git a/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c b/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c index 81287998123..ec35725fdfe 100644 --- a/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c +++ b/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c @@ -37,7 +37,7 @@ static inline esp_err_t crypto_shared_gdma_new_channel(gdma_channel_alloc_config esp_err_t ret; int time_waited_ms = 0; - while(1) { + while (1) { ret = gdma_new_channel(channel_config, channel); if (ret == ESP_OK) { @@ -58,14 +58,12 @@ static inline esp_err_t crypto_shared_gdma_new_channel(gdma_channel_alloc_config /* Initialize external memory specific DMA configs */ static void esp_crypto_shared_dma_init_extmem(void) { - int tx_ch_id = 0; - int rx_ch_id = 0; - - gdma_get_channel_id(tx_channel, &tx_ch_id); - gdma_get_channel_id(rx_channel, &rx_ch_id); - - gdma_ll_tx_set_block_size_psram(&GDMA, tx_ch_id, GDMA_LL_EXT_MEM_BK_SIZE_16B); - gdma_ll_rx_set_block_size_psram(&GDMA, rx_ch_id, GDMA_LL_EXT_MEM_BK_SIZE_16B); + gdma_transfer_ability_t transfer_ability = { + .sram_trans_align = 4, + .psram_trans_align = 16, + }; + gdma_set_transfer_ability(tx_channel, &transfer_ability); + gdma_set_transfer_ability(rx_channel, &transfer_ability); } #endif //SOC_GDMA_SUPPORT_PSRAM @@ -137,7 +135,7 @@ esp_err_t esp_crypto_shared_gdma_start(const lldesc_t *input, const lldesc_t *ou return ESP_ERR_INVALID_ARG; } - /* tx channel is reset by gdma_connect(), also reset rx to ensure a known state */ + /* tx channel is reset by gdma_connect(), also reset rx to ensure a known state */ gdma_get_channel_id(tx_channel, &rx_ch_id); gdma_ll_rx_reset_channel(&GDMA, rx_ch_id); diff --git a/docs/en/api-reference/system/async_memcpy.rst b/docs/en/api-reference/system/async_memcpy.rst index d71566a900b..37b2db3e0de 100644 --- a/docs/en/api-reference/system/async_memcpy.rst +++ b/docs/en/api-reference/system/async_memcpy.rst @@ -22,6 +22,8 @@ Configure and Install driver Driver configuration is described in :cpp:type:`async_memcpy_config_t`: :cpp:member:`backlog`: This is used to configured the maximum number of DMA operation that can be working at the background at the same time. +:cpp:member:`sram_trans_align`: Declare SRAM alignment for both data address and copy size, set to zero if the data has no restriction in alignment. If set to a quadruple value (i.e. 4X), the driver will enable the burst mode internally, which is helpful for some performance related application. +:cpp:member:`psram_trans_align`: Declare PSRAM alignment for both data address and copy size. User has to give it a valid value (only 16, 32, 64 are supported) if the destination of memcpy is located in PSRAM. The default alignment (i.e. 16) will be applied if it's set to zero. Internally, the driver configures the size of block used by DMA to access PSRAM, according to the alignment. :cpp:member:`flags`: This is used to enable some special driver features. :c:macro:`ASYNC_MEMCPY_DEFAULT_CONFIG` provides a default configuration, which specifies the backlog to 8. diff --git a/tools/unit-test-app/components/test_utils/include/test_utils.h b/tools/unit-test-app/components/test_utils/include/test_utils.h index 56e73569018..029a0eeb349 100644 --- a/tools/unit-test-app/components/test_utils/include/test_utils.h +++ b/tools/unit-test-app/components/test_utils/include/test_utils.h @@ -73,7 +73,7 @@ extern "C" { /* @brief macro to print IDF performance - * @param mode : performance item name. a string pointer. + * @param item : performance item name. a string pointer. * @param value_fmt: print format and unit of the value, for example: "%02fms", "%dKB" * @param value : the performance value. */