xt_asm_utils.h 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. /*
  2. * SPDX-FileCopyrightText: 2017, Intel Corporation
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. *
  6. * SPDX-FileContributor: 2016-2022 Espressif Systems (Shanghai) CO LTD
  7. */
  8. /* File adapted to use on IDF FreeRTOS component, extracted
  9. * originally from zephyr RTOS code base:
  10. * https://github.com/zephyrproject-rtos/zephyr/blob/dafd348/arch/xtensa/include/xtensa-asm2-s.h
  11. */
  12. #ifndef __XT_ASM_UTILS_H
  13. #define __XT_ASM_UTILS_H
  14. /*
  15. * SPILL_ALL_WINDOWS
  16. *
  17. * Spills all windowed registers (i.e. registers not visible as
  18. * A0-A15) to their ABI-defined spill regions on the stack.
  19. *
  20. * Unlike the Xtensa HAL implementation, this code requires that the
  21. * EXCM and WOE bit be enabled in PS, and relies on repeated hardware
  22. * exception handling to do the register spills. The trick is to do a
  23. * noop write to the high registers, which the hardware will trap
  24. * (into an overflow exception) in the case where those registers are
  25. * already used by an existing call frame. Then it rotates the window
  26. * and repeats until all but the A0-A3 registers of the original frame
  27. * are guaranteed to be spilled, eventually rotating back around into
  28. * the original frame. Advantages:
  29. *
  30. * - Vastly smaller code size
  31. *
  32. * - More easily maintained if changes are needed to window over/underflow
  33. * exception handling.
  34. *
  35. * - Requires no scratch registers to do its work, so can be used safely in any
  36. * context.
  37. *
  38. * - If the WOE bit is not enabled (for example, in code written for
  39. * the CALL0 ABI), this becomes a silent noop and operates compatbily.
  40. *
  41. * - Hilariously it's ACTUALLY FASTER than the HAL routine. And not
  42. * just a little bit, it's MUCH faster. With a mostly full register
  43. * file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill
  44. * registers with this vs. 279 (!) to do it with
  45. * xthal_spill_windows().
  46. */
  47. .macro SPILL_ALL_WINDOWS
  48. #if XCHAL_NUM_AREGS == 64
  49. and a12, a12, a12
  50. rotw 3
  51. and a12, a12, a12
  52. rotw 3
  53. and a12, a12, a12
  54. rotw 3
  55. and a12, a12, a12
  56. rotw 3
  57. and a12, a12, a12
  58. rotw 4
  59. #elif XCHAL_NUM_AREGS == 32
  60. and a12, a12, a12
  61. rotw 3
  62. and a12, a12, a12
  63. rotw 3
  64. and a4, a4, a4
  65. rotw 2
  66. #else
  67. #error Unrecognized XCHAL_NUM_AREGS
  68. #endif
  69. .endm
  70. /*
  71. --------------------------------------------------------------------------------
  72. Macro spinlock_take
  73. This macro will repeatedley attempt to atomically set a spinlock variable
  74. using the s32c1i instruciton. A spinlock is considered free if its value is 0.
  75. Entry:
  76. - "reg_A/B" as scratch registers
  77. - "lock_var" spinlock variable's symbol
  78. - Interrupts must already be disabled by caller
  79. Exit:
  80. - Spinlock set to current core's ID (PRID)
  81. - "reg_A/B" clobbered
  82. --------------------------------------------------------------------------------
  83. */
  84. #if portNUM_PROCESSORS > 1
  85. .macro spinlock_take reg_A reg_B lock_var
  86. movi \reg_A, \lock_var /* reg_A = &lock_var */
  87. .L_spinlock_loop:
  88. movi \reg_B, 0 /* Load spinlock free value (0) into SCOMPARE1 */
  89. wsr \reg_B, SCOMPARE1
  90. rsync /* Ensure that SCOMPARE1 is set before s32c1i executes */
  91. rsr \reg_B, PRID /* Load the current core's ID into reg_B */
  92. s32c1i \reg_B, \reg_A, 0 /* Attempt *lock_var = reg_B */
  93. bnez \reg_B, .L_spinlock_loop /* If the write was successful (i.e., lock was free), 0 will have been written back to reg_B */
  94. .endm
  95. #endif /* portNUM_PROCESSORS > 1 */
  96. /*
  97. --------------------------------------------------------------------------------
  98. Macro spinlock_release
  99. This macro will release a spinlock variable previously taken by the
  100. spinlock_take macro.
  101. Entry:
  102. - "reg_A/B" as scratch registers
  103. - "lock_var" spinlock variable's symbol
  104. - Interrupts must already be disabled by caller
  105. Exit:
  106. - "reg_A/B" clobbered
  107. --------------------------------------------------------------------------------
  108. */
  109. #if portNUM_PROCESSORS > 1
  110. .macro spinlock_release reg_A reg_B lock_var
  111. movi \reg_A, \lock_var /* reg_A = &lock_var */
  112. movi \reg_B, 0
  113. s32i \reg_B, \reg_A, 0 /* Release the spinlock (*reg_A = 0) */
  114. .endm
  115. #endif /* portNUM_PROCESSORS > 1 */
  116. #endif /* __XT_ASM_UTILS_H */