test_async_memcpy.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. /*
  2. * SPDX-FileCopyrightText: 2021 Espressif Systems (Shanghai) CO LTD
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include <sys/param.h>
  10. #include "esp_heap_caps.h"
  11. #include "esp_rom_sys.h"
  12. #include "freertos/FreeRTOS.h"
  13. #include "freertos/task.h"
  14. #include "freertos/semphr.h"
  15. #include "unity.h"
  16. #include "test_utils.h"
  17. #include "ccomp_timer.h"
  18. #include "esp_async_memcpy.h"
  19. #include "soc/soc_caps.h"
  20. #include "hal/dma_types.h"
  21. #if SOC_CP_DMA_SUPPORTED || SOC_GDMA_SUPPORTED
  22. #define ALIGN_UP(addr, align) (((addr) + (align)-1) & ~((align)-1))
  23. #define ALIGN_DOWN(size, align) ((size) & ~((align) - 1))
  24. typedef struct {
  25. uint32_t seed;
  26. uint32_t buffer_size;
  27. uint8_t *src_buf;
  28. uint8_t *dst_buf;
  29. uint8_t *from_addr;
  30. uint8_t *to_addr;
  31. uint32_t align;
  32. uint32_t offset;
  33. bool src_in_psram;
  34. bool dst_in_psram;
  35. } memcpy_testbench_context_t;
  36. static void async_memcpy_setup_testbench(memcpy_testbench_context_t *test_context)
  37. {
  38. srand(test_context->seed);
  39. printf("allocating memory buffer...\r\n");
  40. uint32_t buffer_size = test_context->buffer_size;
  41. uint8_t *src_buf = NULL;
  42. uint8_t *dst_buf = NULL;
  43. uint8_t *from_addr = NULL;
  44. uint8_t *to_addr = NULL;
  45. #if CONFIG_SPIRAM && SOC_GDMA_SUPPORT_PSRAM
  46. if (test_context->src_in_psram) {
  47. src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_SPIRAM);
  48. } else {
  49. src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
  50. }
  51. if (test_context->dst_in_psram) {
  52. dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_SPIRAM);
  53. } else {
  54. dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
  55. }
  56. #else
  57. src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
  58. dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
  59. #endif
  60. TEST_ASSERT_NOT_NULL_MESSAGE(src_buf, "allocate source buffer failed");
  61. TEST_ASSERT_NOT_NULL_MESSAGE(dst_buf, "allocate destination buffer failed");
  62. // address alignment
  63. from_addr = (uint8_t *)ALIGN_UP((uint32_t)(src_buf), test_context->align);
  64. to_addr = (uint8_t *)ALIGN_UP((uint32_t)(dst_buf), test_context->align);
  65. uint8_t gap = MAX(from_addr - src_buf, to_addr - dst_buf);
  66. buffer_size -= gap;
  67. // size alignment
  68. buffer_size = ALIGN_DOWN(buffer_size, test_context->align);
  69. // adding extra offset
  70. from_addr += test_context->offset;
  71. to_addr += test_context->offset;
  72. buffer_size -= test_context->offset;
  73. printf("...size %d Bytes, src@%p, dst@%p\r\n", buffer_size, from_addr, to_addr);
  74. printf("fill src buffer with random data\r\n");
  75. for (int i = 0; i < buffer_size; i++) {
  76. from_addr[i] = rand() % 256;
  77. }
  78. // return value
  79. test_context->buffer_size = buffer_size;
  80. test_context->src_buf = src_buf;
  81. test_context->dst_buf = dst_buf;
  82. test_context->from_addr = from_addr;
  83. test_context->to_addr = to_addr;
  84. }
  85. static void async_memcpy_verify_and_clear_testbench(uint32_t seed, uint32_t buffer_size, uint8_t *src_buf, uint8_t *dst_buf, uint8_t *from_addr, uint8_t *to_addr)
  86. {
  87. srand(seed);
  88. for (int i = 0; i < buffer_size; i++) {
  89. // check if source date has been copied to destination and source data not broken
  90. TEST_ASSERT_EQUAL_MESSAGE(rand() % 256, to_addr[i], "destination data doesn't match generator data");
  91. }
  92. srand(seed);
  93. for (int i = 0; i < buffer_size; i++) {
  94. // check if source data has been copied to destination
  95. TEST_ASSERT_EQUAL_MESSAGE(rand() % 256, to_addr[i], "destination data doesn't match source data");
  96. }
  97. free(src_buf);
  98. free(dst_buf);
  99. }
  100. TEST_CASE("memory copy the same buffer with different content", "[async mcp]")
  101. {
  102. async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG();
  103. config.backlog = 1;
  104. async_memcpy_t driver = NULL;
  105. TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
  106. uint8_t sbuf[256] = {0};
  107. uint8_t dbuf[256] = {0};
  108. for (int j = 0; j < 20; j++) {
  109. TEST_ESP_OK(esp_async_memcpy(driver, dbuf, sbuf, 256, NULL, NULL));
  110. for (int i = 0; i < 256; i++) {
  111. if (sbuf[i] != dbuf[i]) {
  112. printf("location[%d]:s=%d,d=%d\r\n", i, sbuf[i], dbuf[i]);
  113. TEST_FAIL_MESSAGE("destination data doesn't match source data");
  114. } else {
  115. sbuf[i] += 1;
  116. }
  117. }
  118. }
  119. TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
  120. }
  121. TEST_CASE("memory copy by DMA one by one", "[async mcp]")
  122. {
  123. async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG();
  124. config.backlog = 4;
  125. async_memcpy_t driver = NULL;
  126. TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
  127. uint32_t test_buffer_len[] = {256, 512, 1024, 2048, 4096, 5011};
  128. memcpy_testbench_context_t test_context = {
  129. .align = 4,
  130. };
  131. for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
  132. // Test different align edge
  133. for (int off = 0; off < 4; off++) {
  134. test_context.buffer_size = test_buffer_len[i];
  135. test_context.seed = i;
  136. async_memcpy_setup_testbench(&test_context);
  137. TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, NULL, NULL));
  138. async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
  139. vTaskDelay(pdMS_TO_TICKS(100));
  140. }
  141. }
  142. TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
  143. }
  144. TEST_CASE("memory copy by DMA on the fly", "[async mcp]")
  145. {
  146. async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG();
  147. async_memcpy_t driver = NULL;
  148. TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
  149. uint32_t test_buffer_len[] = {512, 1024, 2048, 4096, 5011};
  150. memcpy_testbench_context_t test_context[] = {
  151. {.align = 4}, {.align = 4}, {.align = 4}, {.align = 4}, {.align = 4},
  152. };
  153. // Aligned case
  154. for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
  155. test_context[i].seed = i;
  156. test_context[i].buffer_size = test_buffer_len[i];
  157. async_memcpy_setup_testbench(&test_context[i]);
  158. }
  159. for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
  160. TEST_ESP_OK(esp_async_memcpy(driver, test_context[i].to_addr, test_context[i].from_addr, test_context[i].buffer_size, NULL, NULL));
  161. }
  162. for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
  163. async_memcpy_verify_and_clear_testbench(i, test_context[i].buffer_size, test_context[i].src_buf, test_context[i].dst_buf, test_context[i].from_addr, test_context[i].to_addr);
  164. }
  165. // Non-aligned case
  166. for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
  167. test_context[i].seed = i;
  168. test_context[i].buffer_size = test_buffer_len[i];
  169. test_context[i].offset = 3;
  170. async_memcpy_setup_testbench(&test_context[i]);
  171. }
  172. for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
  173. TEST_ESP_OK(esp_async_memcpy(driver, test_context[i].to_addr, test_context[i].from_addr, test_context[i].buffer_size, NULL, NULL));
  174. }
  175. for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
  176. async_memcpy_verify_and_clear_testbench(i, test_context[i].buffer_size, test_context[i].src_buf, test_context[i].dst_buf, test_context[i].from_addr, test_context[i].to_addr);
  177. }
  178. TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
  179. }
  180. #define TEST_ASYNC_MEMCPY_BENCH_COUNTS (16)
  181. static int s_count = 0;
  182. static IRAM_ATTR bool test_async_memcpy_isr_cb(async_memcpy_t mcp_hdl, async_memcpy_event_t *event, void *cb_args)
  183. {
  184. SemaphoreHandle_t sem = (SemaphoreHandle_t)cb_args;
  185. BaseType_t high_task_wakeup = pdFALSE;
  186. s_count++;
  187. if (s_count == TEST_ASYNC_MEMCPY_BENCH_COUNTS) {
  188. xSemaphoreGiveFromISR(sem, &high_task_wakeup);
  189. }
  190. return high_task_wakeup == pdTRUE;
  191. }
  192. static void memcpy_performance_test(uint32_t buffer_size)
  193. {
  194. SemaphoreHandle_t sem = xSemaphoreCreateBinary();
  195. async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG();
  196. config.backlog = (buffer_size / DMA_DESCRIPTOR_BUFFER_MAX_SIZE + 1) * TEST_ASYNC_MEMCPY_BENCH_COUNTS;
  197. config.sram_trans_align = 4; // at least 4 bytes aligned for SRAM transfer
  198. config.psram_trans_align = 64; // at least 64 bytes aligned for PSRAM transfer
  199. async_memcpy_t driver = NULL;
  200. int64_t elapse_us = 0;
  201. float throughput = 0.0;
  202. TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
  203. // 1. SRAM->SRAM
  204. memcpy_testbench_context_t test_context = {
  205. .align = config.psram_trans_align,
  206. .buffer_size = buffer_size,
  207. .src_in_psram = false,
  208. .dst_in_psram = false,
  209. };
  210. async_memcpy_setup_testbench(&test_context);
  211. s_count = 0;
  212. ccomp_timer_start();
  213. for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
  214. TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
  215. }
  216. // wait for done semaphore
  217. TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
  218. elapse_us = ccomp_timer_stop();
  219. throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
  220. IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: SRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size);
  221. ccomp_timer_start();
  222. for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
  223. memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size);
  224. }
  225. elapse_us = ccomp_timer_stop();
  226. throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
  227. IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: SRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size);
  228. async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
  229. #if CONFIG_SPIRAM && SOC_GDMA_SUPPORT_PSRAM
  230. // 2. PSRAM->PSRAM
  231. test_context.src_in_psram = true;
  232. test_context.dst_in_psram = true;
  233. async_memcpy_setup_testbench(&test_context);
  234. s_count = 0;
  235. ccomp_timer_start();
  236. for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
  237. TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
  238. }
  239. // wait for done semaphore
  240. TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
  241. elapse_us = ccomp_timer_stop();
  242. throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
  243. IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: PSRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size);
  244. ccomp_timer_start();
  245. for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
  246. memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size);
  247. }
  248. elapse_us = ccomp_timer_stop();
  249. throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
  250. IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: PSRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size);
  251. async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
  252. // 3. PSRAM->SRAM
  253. test_context.src_in_psram = true;
  254. test_context.dst_in_psram = false;
  255. async_memcpy_setup_testbench(&test_context);
  256. s_count = 0;
  257. ccomp_timer_start();
  258. for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
  259. TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
  260. }
  261. // wait for done semaphore
  262. TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
  263. elapse_us = ccomp_timer_stop();
  264. throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
  265. IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: PSRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size);
  266. ccomp_timer_start();
  267. for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
  268. memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size);
  269. }
  270. elapse_us = ccomp_timer_stop();
  271. throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
  272. IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: PSRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size);
  273. async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
  274. // 4. SRAM->PSRAM
  275. test_context.src_in_psram = false;
  276. test_context.dst_in_psram = true;
  277. async_memcpy_setup_testbench(&test_context);
  278. s_count = 0;
  279. ccomp_timer_start();
  280. for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
  281. TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
  282. }
  283. // wait for done semaphore
  284. TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
  285. elapse_us = ccomp_timer_stop();
  286. throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
  287. IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: SRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size);
  288. ccomp_timer_start();
  289. for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
  290. memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size);
  291. }
  292. elapse_us = ccomp_timer_stop();
  293. throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
  294. IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: SRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size);
  295. async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
  296. #endif
  297. TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
  298. vSemaphoreDelete(sem);
  299. }
  300. TEST_CASE("memory copy performance test 40KB", "[async mcp]")
  301. {
  302. memcpy_performance_test(40 * 1024);
  303. }
  304. TEST_CASE("memory copy performance test 4KB", "[async mcp]")
  305. {
  306. memcpy_performance_test(4 * 1024);
  307. }
  308. #endif //SOC_CP_DMA_SUPPORTED || SOC_GDMA_SUPPORTED