xt_asm_utils.h 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. /*
  2. * Copyright (c) 2017, Intel Corporation
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. /* Copyright 2015-2018 Espressif Systems (Shanghai) PTE LTD
  7. *
  8. * Licensed under the Apache License, Version 2.0 (the "License");
  9. * you may not use this file except in compliance with the License.
  10. * You may obtain a copy of the License at
  11. *
  12. * http://www.apache.org/licenses/LICENSE-2.0
  13. *
  14. * Unless required by applicable law or agreed to in writing, software
  15. * distributed under the License is distributed on an "AS IS" BASIS,
  16. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17. * See the License for the specific language governing permissions and
  18. * limitations under the License.
  19. */
  20. /* File adapted to use on IDF FreeRTOS component, extracted
  21. * originally from zephyr RTOS code base:
  22. * https://github.com/zephyrproject-rtos/zephyr/blob/dafd348/arch/xtensa/include/xtensa-asm2-s.h
  23. */
  24. #ifndef __XT_ASM_UTILS_H
  25. #define __XT_ASM_UTILS_H
  26. /*
  27. * SPILL_ALL_WINDOWS
  28. *
  29. * Spills all windowed registers (i.e. registers not visible as
  30. * A0-A15) to their ABI-defined spill regions on the stack.
  31. *
  32. * Unlike the Xtensa HAL implementation, this code requires that the
  33. * EXCM and WOE bit be enabled in PS, and relies on repeated hardware
  34. * exception handling to do the register spills. The trick is to do a
  35. * noop write to the high registers, which the hardware will trap
  36. * (into an overflow exception) in the case where those registers are
  37. * already used by an existing call frame. Then it rotates the window
  38. * and repeats until all but the A0-A3 registers of the original frame
  39. * are guaranteed to be spilled, eventually rotating back around into
  40. * the original frame. Advantages:
  41. *
  42. * - Vastly smaller code size
  43. *
  44. * - More easily maintained if changes are needed to window over/underflow
  45. * exception handling.
  46. *
  47. * - Requires no scratch registers to do its work, so can be used safely in any
  48. * context.
  49. *
  50. * - If the WOE bit is not enabled (for example, in code written for
  51. * the CALL0 ABI), this becomes a silent noop and operates compatbily.
  52. *
  53. * - Hilariously it's ACTUALLY FASTER than the HAL routine. And not
  54. * just a little bit, it's MUCH faster. With a mostly full register
  55. * file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill
  56. * registers with this vs. 279 (!) to do it with
  57. * xthal_spill_windows().
  58. */
  59. .macro SPILL_ALL_WINDOWS
  60. #if XCHAL_NUM_AREGS == 64
  61. and a12, a12, a12
  62. rotw 3
  63. and a12, a12, a12
  64. rotw 3
  65. and a12, a12, a12
  66. rotw 3
  67. and a12, a12, a12
  68. rotw 3
  69. and a12, a12, a12
  70. rotw 4
  71. #elif XCHAL_NUM_AREGS == 32
  72. and a12, a12, a12
  73. rotw 3
  74. and a12, a12, a12
  75. rotw 3
  76. and a4, a4, a4
  77. rotw 2
  78. #else
  79. #error Unrecognized XCHAL_NUM_AREGS
  80. #endif
  81. .endm
  82. #endif