esp_himem.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. /*
  2. * SPDX-FileCopyrightText: 2018-2023 Espressif Systems (Shanghai) CO LTD
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. #include "freertos/FreeRTOS.h"
  7. #include "freertos/task.h"
  8. #include "esp_psram.h"
  9. #include "esp_private/esp_psram_extram.h"
  10. #include "esp32/rom/cache.h"
  11. #include "sdkconfig.h"
  12. #include "esp32/himem.h"
  13. #include "soc/soc.h"
  14. #include "esp_log.h"
  15. #include "esp_check.h"
  16. #include "../esp_psram_impl.h"
  17. /*
  18. So, why does the API look this way and is so inflexible to not allow any maps beyond the full 32K chunks? Most of
  19. it has to do with the fact that the cache works on the *virtual* addresses What this comes down to is that while it's
  20. allowed to map a range of physical memory into the address space two times, there's no cache consistency between the
  21. two regions.
  22. This means that a write to region A may or may not show up, perhaps delayed, in region B, as it depends on
  23. the time that the writeback to SPI RAM is done on A and the time before the corresponding cache line is invalidated
  24. on B. Note that this goes for every 32-byte cache line: this implies that if a program writes to address X and Y within
  25. A, the write to Y may show up before the write to X does.
  26. It gets even worse when both A and B are written: theoretically, a write to a 32-byte cache line in A can be entirely
  27. undone because of a write to a different addres in B that happens to be in the same 32-byte cache line.
  28. Because of these reasons, we do not allow double mappings at all. This, however, has other implications that make
  29. supporting ranges not really useful. Because the lack of double mappings, applications will need to do their own
  30. management of mapped regions, meaning they will normally map in and out blocks at a time anyway, as mapping more
  31. fluent regions would result in the chance of accidentally mapping two overlapping regions. As this is the case,
  32. to keep the code simple, at the moment we just force these blocks to be equal to the 32K MMU page size. The API
  33. itself does allow for more granular allocations, so if there's a pressing need for a more complex solution in the
  34. future, we can do this.
  35. Note: In the future, we can expand on this api to do a memcpy() between SPI RAM and (internal) memory using the SPI1
  36. peripheral. This needs support for SPI1 to be in the SPI driver, however.
  37. */
  38. #if CONFIG_SPIRAM_BANKSWITCH_ENABLE
  39. #define SPIRAM_BANKSWITCH_RESERVE CONFIG_SPIRAM_BANKSWITCH_RESERVE
  40. #else
  41. #define SPIRAM_BANKSWITCH_RESERVE 0
  42. #endif
  43. #define CACHE_BLOCKSIZE (32*1024)
  44. //Start of the virtual address range reserved for himem use
  45. #define VIRT_HIMEM_RANGE_START (SOC_EXTRAM_DATA_LOW+(128-SPIRAM_BANKSWITCH_RESERVE)*CACHE_BLOCKSIZE)
  46. //Start MMU block reserved for himem use
  47. #define VIRT_HIMEM_RANGE_BLOCKSTART (128-SPIRAM_BANKSWITCH_RESERVE)
  48. //Start physical block
  49. #define PHYS_HIMEM_BLOCKSTART (128-SPIRAM_BANKSWITCH_RESERVE)
  50. #define TAG "esp_himem"
  51. // Metadata for a block of physical RAM
  52. typedef struct {
  53. unsigned int is_alloced: 1;
  54. unsigned int is_mapped: 1;
  55. } ramblock_t;
  56. //Metadata for a 32-K memory address range
  57. typedef struct {
  58. unsigned int is_alloced: 1;
  59. unsigned int is_mapped: 1;
  60. unsigned int ram_block: 16;
  61. } rangeblock_t;
  62. static ramblock_t *s_ram_descriptor = NULL;
  63. static rangeblock_t *s_range_descriptor = NULL;
  64. static int s_ramblockcnt = 0;
  65. static const int s_rangeblockcnt = SPIRAM_BANKSWITCH_RESERVE;
  66. //Handle for a window of address space
  67. typedef struct esp_himem_rangedata_t {
  68. int block_ct;
  69. int block_start;
  70. } esp_himem_rangedata_t;
  71. //Handle for a range of physical memory
  72. typedef struct esp_himem_ramdata_t {
  73. int block_ct;
  74. uint16_t *block;
  75. } esp_himem_ramdata_t;
  76. static portMUX_TYPE spinlock = portMUX_INITIALIZER_UNLOCKED;
  77. static inline int ramblock_idx_valid(int ramblock_idx)
  78. {
  79. return (ramblock_idx >= 0 && ramblock_idx < s_ramblockcnt);
  80. }
  81. static inline int rangeblock_idx_valid(int rangeblock_idx)
  82. {
  83. return (rangeblock_idx >= 0 && rangeblock_idx < s_rangeblockcnt);
  84. }
  85. static void set_bank(int virt_bank, int phys_bank, int ct)
  86. {
  87. int r __attribute__((unused));
  88. r = cache_sram_mmu_set(0, 0, SOC_EXTRAM_DATA_LOW + CACHE_BLOCKSIZE * virt_bank, phys_bank * CACHE_BLOCKSIZE, 32, ct);
  89. assert(r == 0);
  90. r = cache_sram_mmu_set(1, 0, SOC_EXTRAM_DATA_LOW + CACHE_BLOCKSIZE * virt_bank, phys_bank * CACHE_BLOCKSIZE, 32, ct);
  91. assert(r == 0);
  92. }
  93. size_t esp_himem_get_phys_size(void)
  94. {
  95. int paddr_start = (4096 * 1024) - (CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE);
  96. uint32_t psram_available_size = 0;
  97. esp_psram_impl_get_available_size(&psram_available_size);
  98. return psram_available_size - paddr_start;
  99. }
  100. size_t esp_himem_get_free_size(void)
  101. {
  102. size_t ret = 0;
  103. for (int i = 0; i < s_ramblockcnt; i++) {
  104. if (!s_ram_descriptor[i].is_alloced) {
  105. ret += CACHE_BLOCKSIZE;
  106. }
  107. }
  108. return ret;
  109. }
  110. size_t esp_himem_reserved_area_size(void)
  111. {
  112. return CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE;
  113. }
  114. void __attribute__((constructor)) esp_himem_init(void)
  115. {
  116. if (SPIRAM_BANKSWITCH_RESERVE == 0) {
  117. return;
  118. }
  119. uint32_t maxram = 0;
  120. esp_psram_impl_get_available_size(&maxram);
  121. //catch double init
  122. ESP_RETURN_ON_FALSE(s_ram_descriptor == NULL,, TAG, "already initialized"); //Looks weird; last arg is empty so it expands to 'return ;'
  123. ESP_RETURN_ON_FALSE(s_range_descriptor == NULL,, TAG, "already initialized");
  124. //need to have some reserved banks
  125. ESP_RETURN_ON_FALSE(SPIRAM_BANKSWITCH_RESERVE != 0,, TAG, "No banks reserved for himem");
  126. //Start and end of physical reserved memory. Note it starts slightly under
  127. //the 4MiB mark as the reserved banks can't have an unity mapping to be used by malloc
  128. //anymore; we treat them as himem instead.
  129. int paddr_start = (4096 * 1024) - (CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE);
  130. int paddr_end = maxram;
  131. s_ramblockcnt = ((paddr_end - paddr_start) / CACHE_BLOCKSIZE);
  132. //Allocate data structures
  133. s_ram_descriptor = calloc(sizeof(ramblock_t), s_ramblockcnt);
  134. s_range_descriptor = calloc(sizeof(rangeblock_t), SPIRAM_BANKSWITCH_RESERVE);
  135. if (s_ram_descriptor == NULL || s_range_descriptor == NULL) {
  136. ESP_EARLY_LOGE(TAG, "Cannot allocate memory for meta info. Not initializing!");
  137. free(s_ram_descriptor);
  138. free(s_range_descriptor);
  139. return;
  140. }
  141. ESP_EARLY_LOGI(TAG, "Initialized. Using last %d 32KB address blocks for bank switching on %d KB of physical memory.",
  142. SPIRAM_BANKSWITCH_RESERVE, (paddr_end - paddr_start) / 1024);
  143. }
  144. //Allocate count not-necessarily consecutive physical RAM blocks, return numbers in blocks[]. Return
  145. //true if blocks can be allocated, false if not.
  146. static bool allocate_blocks(int count, uint16_t *blocks_out)
  147. {
  148. int n = 0;
  149. for (int i = 0; i < s_ramblockcnt && n != count; i++) {
  150. if (!s_ram_descriptor[i].is_alloced) {
  151. blocks_out[n] = i;
  152. n++;
  153. }
  154. }
  155. if (n == count) {
  156. //All blocks could be allocated. Mark as in use.
  157. for (int i = 0; i < count; i++) {
  158. s_ram_descriptor[blocks_out[i]].is_alloced = true;
  159. assert(s_ram_descriptor[blocks_out[i]].is_mapped == false);
  160. }
  161. return true;
  162. } else {
  163. //Error allocating blocks
  164. return false;
  165. }
  166. }
  167. esp_err_t esp_himem_alloc(size_t size, esp_himem_handle_t *handle_out)
  168. {
  169. if (size % CACHE_BLOCKSIZE != 0) {
  170. return ESP_ERR_INVALID_SIZE;
  171. }
  172. int blocks = size / CACHE_BLOCKSIZE;
  173. esp_himem_ramdata_t *r = calloc(sizeof(esp_himem_ramdata_t), 1);
  174. if (!r) {
  175. goto nomem;
  176. }
  177. r->block = calloc(sizeof(uint16_t), blocks);
  178. if (!r->block) {
  179. goto nomem;
  180. }
  181. portENTER_CRITICAL(&spinlock);
  182. int ok = allocate_blocks(blocks, r->block);
  183. portEXIT_CRITICAL(&spinlock);
  184. if (!ok) {
  185. goto nomem;
  186. }
  187. r->block_ct = blocks;
  188. *handle_out = r;
  189. return ESP_OK;
  190. nomem:
  191. if (r) {
  192. free(r->block);
  193. }
  194. free(r);
  195. return ESP_ERR_NO_MEM;
  196. }
  197. esp_err_t esp_himem_free(esp_himem_handle_t handle)
  198. {
  199. //Check if any of the blocks is still mapped; fail if this is the case.
  200. for (int i = 0; i < handle->block_ct; i++) {
  201. assert(ramblock_idx_valid(handle->block[i]));
  202. ESP_RETURN_ON_FALSE(!s_ram_descriptor[handle->block[i]].is_mapped, ESP_ERR_INVALID_ARG, TAG, "block in range still mapped");
  203. }
  204. //Mark blocks as free
  205. portENTER_CRITICAL(&spinlock);
  206. for (int i = 0; i < handle->block_ct; i++) {
  207. s_ram_descriptor[handle->block[i]].is_alloced = false;
  208. }
  209. portEXIT_CRITICAL(&spinlock);
  210. //Free handle
  211. free(handle->block);
  212. free(handle);
  213. return ESP_OK;
  214. }
  215. esp_err_t esp_himem_alloc_map_range(size_t size, esp_himem_rangehandle_t *handle_out)
  216. {
  217. ESP_RETURN_ON_FALSE(s_ram_descriptor != NULL, ESP_ERR_INVALID_STATE, TAG, "Himem not available!");
  218. ESP_RETURN_ON_FALSE(size % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_SIZE, TAG, "requested size not aligned to blocksize");
  219. int blocks = size / CACHE_BLOCKSIZE;
  220. esp_himem_rangedata_t *r = calloc(sizeof(esp_himem_rangedata_t), 1);
  221. if (!r) {
  222. return ESP_ERR_NO_MEM;
  223. }
  224. r->block_ct = blocks;
  225. r->block_start = -1;
  226. int start_free = 0;
  227. portENTER_CRITICAL(&spinlock);
  228. for (int i = 0; i < s_rangeblockcnt; i++) {
  229. if (s_range_descriptor[i].is_alloced) {
  230. start_free = i + 1; //optimistically assume next block is free...
  231. } else if (i - start_free == blocks - 1) {
  232. //We found a span of blocks that's big enough to allocate the requested range in.
  233. r->block_start = start_free;
  234. break;
  235. }
  236. }
  237. if (r->block_start == -1) {
  238. //Couldn't find enough free blocks
  239. free(r);
  240. portEXIT_CRITICAL(&spinlock);
  241. return ESP_ERR_NO_MEM;
  242. }
  243. //Range is found. Mark the blocks as in use.
  244. for (int i = 0; i < blocks; i++) {
  245. s_range_descriptor[r->block_start + i].is_alloced = 1;
  246. }
  247. portEXIT_CRITICAL(&spinlock);
  248. //All done.
  249. *handle_out = r;
  250. return ESP_OK;
  251. }
  252. esp_err_t esp_himem_free_map_range(esp_himem_rangehandle_t handle)
  253. {
  254. //Check if any of the blocks in the range have a mapping
  255. for (int i = 0; i < handle->block_ct; i++) {
  256. assert(rangeblock_idx_valid(handle->block_start + i));
  257. assert(s_range_descriptor[i + handle->block_start].is_alloced == 1); //should be, if handle is valid
  258. ESP_RETURN_ON_FALSE(!s_range_descriptor[i + handle->block_start].is_mapped, ESP_ERR_INVALID_ARG, TAG, "memory still mapped to range");
  259. }
  260. //We should be good to free this. Mark blocks as free.
  261. portENTER_CRITICAL(&spinlock);
  262. for (int i = 0; i < handle->block_ct; i++) {
  263. s_range_descriptor[i + handle->block_start].is_alloced = 0;
  264. }
  265. portEXIT_CRITICAL(&spinlock);
  266. free(handle);
  267. return ESP_OK;
  268. }
  269. esp_err_t esp_himem_map(esp_himem_handle_t handle, esp_himem_rangehandle_t range, size_t ram_offset, size_t range_offset, size_t len, int flags, void **out_ptr)
  270. {
  271. int ram_block = ram_offset / CACHE_BLOCKSIZE;
  272. int range_block = range_offset / CACHE_BLOCKSIZE;
  273. int blockcount = len / CACHE_BLOCKSIZE;
  274. ESP_RETURN_ON_FALSE(s_ram_descriptor != NULL, ESP_ERR_INVALID_STATE, TAG, "Himem not available!");
  275. //Offsets and length must be block-aligned
  276. ESP_RETURN_ON_FALSE(ram_offset % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "ram offset not aligned to blocksize");
  277. ESP_RETURN_ON_FALSE(range_offset % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "range not aligned to blocksize");
  278. ESP_RETURN_ON_FALSE(len % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "length not aligned to blocksize");
  279. //ram and range should be within allocated range
  280. ESP_RETURN_ON_FALSE(ram_block + blockcount <= handle->block_ct, ESP_ERR_INVALID_SIZE, TAG, "args not in range of phys ram handle");
  281. ESP_RETURN_ON_FALSE(range_block + blockcount <= range->block_ct, ESP_ERR_INVALID_SIZE, TAG, "args not in range of range handle");
  282. //Check if ram blocks aren't already mapped, and if memory range is unmapped
  283. for (int i = 0; i < blockcount; i++) {
  284. ESP_RETURN_ON_FALSE(!s_ram_descriptor[handle->block[i + ram_block]].is_mapped, ESP_ERR_INVALID_STATE, TAG, "ram already mapped");
  285. ESP_RETURN_ON_FALSE(!s_range_descriptor[range->block_start + i + range_block].is_mapped, ESP_ERR_INVALID_STATE, TAG, "range already mapped");
  286. }
  287. //Map and mark as mapped
  288. portENTER_CRITICAL(&spinlock);
  289. for (int i = 0; i < blockcount; i++) {
  290. assert(ramblock_idx_valid(handle->block[i + ram_block]));
  291. s_ram_descriptor[handle->block[i + ram_block]].is_mapped = 1;
  292. s_range_descriptor[range->block_start + i + range_block].is_mapped = 1;
  293. s_range_descriptor[range->block_start + i + range_block].ram_block = handle->block[i + ram_block];
  294. }
  295. portEXIT_CRITICAL(&spinlock);
  296. for (int i = 0; i < blockcount; i++) {
  297. set_bank(VIRT_HIMEM_RANGE_BLOCKSTART + range->block_start + i + range_block, handle->block[i + ram_block] + PHYS_HIMEM_BLOCKSTART, 1);
  298. }
  299. //Set out pointer
  300. *out_ptr = (void *)(VIRT_HIMEM_RANGE_START + (range->block_start + range_block) * CACHE_BLOCKSIZE);
  301. return ESP_OK;
  302. }
  303. esp_err_t esp_himem_unmap(esp_himem_rangehandle_t range, void *ptr, size_t len)
  304. {
  305. //Note: doesn't actually unmap, just clears cache and marks blocks as unmapped.
  306. //Future optimization: could actually lazy-unmap here: essentially, do nothing and only clear the cache when we re-use
  307. //the block for a different physical address.
  308. int range_offset = (uint32_t)ptr - VIRT_HIMEM_RANGE_START;
  309. int range_block = (range_offset / CACHE_BLOCKSIZE) - range->block_start;
  310. int blockcount = len / CACHE_BLOCKSIZE;
  311. ESP_RETURN_ON_FALSE(range_offset % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "range offset not block-aligned");
  312. ESP_RETURN_ON_FALSE(len % CACHE_BLOCKSIZE == 0, ESP_ERR_INVALID_ARG, TAG, "map length not block-aligned");
  313. ESP_RETURN_ON_FALSE(range_block + blockcount <= range->block_ct, ESP_ERR_INVALID_ARG, TAG, "range out of bounds for handle");
  314. portENTER_CRITICAL(&spinlock);
  315. for (int i = 0; i < blockcount; i++) {
  316. int ramblock = s_range_descriptor[range->block_start + i + range_block].ram_block;
  317. assert(ramblock_idx_valid(ramblock));
  318. s_ram_descriptor[ramblock].is_mapped = 0;
  319. s_range_descriptor[range->block_start + i + range_block].is_mapped = 0;
  320. }
  321. esp_psram_extram_writeback_cache();
  322. portEXIT_CRITICAL(&spinlock);
  323. return ESP_OK;
  324. }