coreasm.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939
  1. /*
  2. * xtensa/coreasm.h -- assembler-specific definitions that depend on CORE configuration
  3. *
  4. * Source for configuration-independent binaries (which link in a
  5. * configuration-specific HAL library) must NEVER include this file.
  6. * It is perfectly normal, however, for the HAL itself to include this file.
  7. *
  8. * This file must NOT include xtensa/config/system.h. Any assembler
  9. * header file that depends on system information should likely go
  10. * in a new systemasm.h (or sysasm.h) header file.
  11. *
  12. * NOTE: macro beqi32 is NOT configuration-dependent, and is placed
  13. * here until we have a proper configuration-independent header file.
  14. */
  15. /* $Id: //depot/rel/Eaglenest/Xtensa/OS/include/xtensa/coreasm.h#3 $ */
  16. /*
  17. * Copyright (c) 2000-2014 Tensilica Inc.
  18. *
  19. * Permission is hereby granted, free of charge, to any person obtaining
  20. * a copy of this software and associated documentation files (the
  21. * "Software"), to deal in the Software without restriction, including
  22. * without limitation the rights to use, copy, modify, merge, publish,
  23. * distribute, sublicense, and/or sell copies of the Software, and to
  24. * permit persons to whom the Software is furnished to do so, subject to
  25. * the following conditions:
  26. *
  27. * The above copyright notice and this permission notice shall be included
  28. * in all copies or substantial portions of the Software.
  29. *
  30. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  31. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  32. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  33. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  34. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  35. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  36. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  37. */
  38. #ifndef XTENSA_COREASM_H
  39. #define XTENSA_COREASM_H
  40. /*
  41. * Tell header files this is assembly source, so they can avoid non-assembler
  42. * definitions (eg. C types etc):
  43. */
  44. #ifndef _ASMLANGUAGE /* conditionalize to avoid cpp warnings (3rd parties might use same macro) */
  45. #define _ASMLANGUAGE
  46. #endif
  47. #include <xtensa/config/core.h>
  48. #include <xtensa/config/specreg.h>
  49. #include <xtensa/config/system.h>
  50. /*
  51. * Assembly-language specific definitions (assembly macros, etc.).
  52. */
  53. /*----------------------------------------------------------------------
  54. * find_ms_setbit
  55. *
  56. * This macro finds the most significant bit that is set in <as>
  57. * and return its index + <base> in <ad>, or <base> - 1 if <as> is zero.
  58. * The index counts starting at zero for the lsbit, so the return
  59. * value ranges from <base>-1 (no bit set) to <base>+31 (msbit set).
  60. *
  61. * Parameters:
  62. * <ad> destination address register (any register)
  63. * <as> source address register
  64. * <at> temporary address register (must be different than <as>)
  65. * <base> constant value added to result (usually 0 or 1)
  66. * On entry:
  67. * <ad> = undefined if different than <as>
  68. * <as> = value whose most significant set bit is to be found
  69. * <at> = undefined
  70. * no other registers are used by this macro.
  71. * On exit:
  72. * <ad> = <base> + index of msbit set in original <as>,
  73. * = <base> - 1 if original <as> was zero.
  74. * <as> clobbered (if not <ad>)
  75. * <at> clobbered (if not <ad>)
  76. * Example:
  77. * find_ms_setbit a0, a4, a0, 0 -- return in a0 index of msbit set in a4
  78. */
  79. .macro find_ms_setbit ad, as, at, base
  80. #if XCHAL_HAVE_NSA
  81. movi \at, 31+\base
  82. nsau \as, \as // get index of \as, numbered from msbit (32 if absent)
  83. sub \ad, \at, \as // get numbering from lsbit (0..31, -1 if absent)
  84. #else /* XCHAL_HAVE_NSA */
  85. movi \at, \base // start with result of 0 (point to lsbit of 32)
  86. beqz \as, 2f // special case for zero argument: return -1
  87. bltui \as, 0x10000, 1f // is it one of the 16 lsbits? (if so, check lower 16 bits)
  88. addi \at, \at, 16 // no, increment result to upper 16 bits (of 32)
  89. //srli \as, \as, 16 // check upper half (shift right 16 bits)
  90. extui \as, \as, 16, 16 // check upper half (shift right 16 bits)
  91. 1: bltui \as, 0x100, 1f // is it one of the 8 lsbits? (if so, check lower 8 bits)
  92. addi \at, \at, 8 // no, increment result to upper 8 bits (of 16)
  93. srli \as, \as, 8 // shift right to check upper 8 bits
  94. 1: bltui \as, 0x10, 1f // is it one of the 4 lsbits? (if so, check lower 4 bits)
  95. addi \at, \at, 4 // no, increment result to upper 4 bits (of 8)
  96. srli \as, \as, 4 // shift right 4 bits to check upper half
  97. 1: bltui \as, 0x4, 1f // is it one of the 2 lsbits? (if so, check lower 2 bits)
  98. addi \at, \at, 2 // no, increment result to upper 2 bits (of 4)
  99. srli \as, \as, 2 // shift right 2 bits to check upper half
  100. 1: bltui \as, 0x2, 1f // is it the lsbit?
  101. addi \at, \at, 2 // no, increment result to upper bit (of 2)
  102. 2: addi \at, \at, -1 // (from just above: add 1; from beqz: return -1)
  103. //srli \as, \as, 1
  104. 1: // done! \at contains index of msbit set (or -1 if none set)
  105. .if 0x\ad - 0x\at // destination different than \at ? (works because regs are a0-a15)
  106. mov \ad, \at // then move result to \ad
  107. .endif
  108. #endif /* XCHAL_HAVE_NSA */
  109. .endm // find_ms_setbit
  110. /*----------------------------------------------------------------------
  111. * find_ls_setbit
  112. *
  113. * This macro finds the least significant bit that is set in <as>,
  114. * and return its index in <ad>.
  115. * Usage is the same as for the find_ms_setbit macro.
  116. * Example:
  117. * find_ls_setbit a0, a4, a0, 0 -- return in a0 index of lsbit set in a4
  118. */
  119. .macro find_ls_setbit ad, as, at, base
  120. neg \at, \as // keep only the least-significant bit that is set...
  121. and \as, \at, \as // ... in \as
  122. find_ms_setbit \ad, \as, \at, \base
  123. .endm // find_ls_setbit
  124. /*----------------------------------------------------------------------
  125. * find_ls_one
  126. *
  127. * Same as find_ls_setbit with base zero.
  128. * Source (as) and destination (ad) registers must be different.
  129. * Provided for backward compatibility.
  130. */
  131. .macro find_ls_one ad, as
  132. find_ls_setbit \ad, \as, \ad, 0
  133. .endm // find_ls_one
  134. /*----------------------------------------------------------------------
  135. * floop, floopnez, floopgtz, floopend
  136. *
  137. * These macros are used for fast inner loops that
  138. * work whether or not the Loops options is configured.
  139. * If the Loops option is configured, they simply use
  140. * the zero-overhead LOOP instructions; otherwise
  141. * they use explicit decrement and branch instructions.
  142. *
  143. * They are used in pairs, with floop, floopnez or floopgtz
  144. * at the beginning of the loop, and floopend at the end.
  145. *
  146. * Each pair of loop macro calls must be given the loop count
  147. * address register and a unique label for that loop.
  148. *
  149. * Example:
  150. *
  151. * movi a3, 16 // loop 16 times
  152. * floop a3, myloop1
  153. * :
  154. * bnez a7, end1 // exit loop if a7 != 0
  155. * :
  156. * floopend a3, myloop1
  157. * end1:
  158. *
  159. * Like the LOOP instructions, these macros cannot be
  160. * nested, must include at least one instruction,
  161. * cannot call functions inside the loop, etc.
  162. * The loop can be exited by jumping to the instruction
  163. * following floopend (or elsewhere outside the loop),
  164. * or continued by jumping to a NOP instruction placed
  165. * immediately before floopend.
  166. *
  167. * Unlike LOOP instructions, the register passed to floop*
  168. * cannot be used inside the loop, because it is used as
  169. * the loop counter if the Loops option is not configured.
  170. * And its value is undefined after exiting the loop.
  171. * And because the loop counter register is active inside
  172. * the loop, you can't easily use this construct to loop
  173. * across a register file using ROTW as you might with LOOP
  174. * instructions, unless you copy the loop register along.
  175. */
  176. /* Named label version of the macros: */
  177. .macro floop ar, endlabel
  178. floop_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
  179. .endm
  180. .macro floopnez ar, endlabel
  181. floopnez_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
  182. .endm
  183. .macro floopgtz ar, endlabel
  184. floopgtz_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
  185. .endm
  186. .macro floopend ar, endlabel
  187. floopend_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
  188. .endm
  189. /* Numbered local label version of the macros: */
  190. #if 0 /*UNTESTED*/
  191. .macro floop89 ar
  192. floop_ \ar, 8, 9f
  193. .endm
  194. .macro floopnez89 ar
  195. floopnez_ \ar, 8, 9f
  196. .endm
  197. .macro floopgtz89 ar
  198. floopgtz_ \ar, 8, 9f
  199. .endm
  200. .macro floopend89 ar
  201. floopend_ \ar, 8b, 9
  202. .endm
  203. #endif /*0*/
  204. /* Underlying version of the macros: */
  205. .macro floop_ ar, startlabel, endlabelref
  206. .ifdef _infloop_
  207. .if _infloop_
  208. .err // Error: floop cannot be nested
  209. .endif
  210. .endif
  211. .set _infloop_, 1
  212. #if XCHAL_HAVE_LOOPS
  213. loop \ar, \endlabelref
  214. #else /* XCHAL_HAVE_LOOPS */
  215. \startlabel:
  216. addi \ar, \ar, -1
  217. #endif /* XCHAL_HAVE_LOOPS */
  218. .endm // floop_
  219. .macro floopnez_ ar, startlabel, endlabelref
  220. .ifdef _infloop_
  221. .if _infloop_
  222. .err // Error: floopnez cannot be nested
  223. .endif
  224. .endif
  225. .set _infloop_, 1
  226. #if XCHAL_HAVE_LOOPS
  227. loopnez \ar, \endlabelref
  228. #else /* XCHAL_HAVE_LOOPS */
  229. beqz \ar, \endlabelref
  230. \startlabel:
  231. addi \ar, \ar, -1
  232. #endif /* XCHAL_HAVE_LOOPS */
  233. .endm // floopnez_
  234. .macro floopgtz_ ar, startlabel, endlabelref
  235. .ifdef _infloop_
  236. .if _infloop_
  237. .err // Error: floopgtz cannot be nested
  238. .endif
  239. .endif
  240. .set _infloop_, 1
  241. #if XCHAL_HAVE_LOOPS
  242. loopgtz \ar, \endlabelref
  243. #else /* XCHAL_HAVE_LOOPS */
  244. bltz \ar, \endlabelref
  245. beqz \ar, \endlabelref
  246. \startlabel:
  247. addi \ar, \ar, -1
  248. #endif /* XCHAL_HAVE_LOOPS */
  249. .endm // floopgtz_
  250. .macro floopend_ ar, startlabelref, endlabel
  251. .ifndef _infloop_
  252. .err // Error: floopend without matching floopXXX
  253. .endif
  254. .ifeq _infloop_
  255. .err // Error: floopend without matching floopXXX
  256. .endif
  257. .set _infloop_, 0
  258. #if ! XCHAL_HAVE_LOOPS
  259. bnez \ar, \startlabelref
  260. #endif /* XCHAL_HAVE_LOOPS */
  261. \endlabel:
  262. .endm // floopend_
  263. /*----------------------------------------------------------------------
  264. * crsil -- conditional RSIL (read/set interrupt level)
  265. *
  266. * Executes the RSIL instruction if it exists, else just reads PS.
  267. * The RSIL instruction does not exist in the new exception architecture
  268. * if the interrupt option is not selected.
  269. */
  270. .macro crsil ar, newlevel
  271. #if XCHAL_HAVE_OLD_EXC_ARCH || XCHAL_HAVE_INTERRUPTS
  272. rsil \ar, \newlevel
  273. #else
  274. rsr \ar, PS
  275. #endif
  276. .endm // crsil
  277. /*----------------------------------------------------------------------
  278. * safe_movi_a0 -- move constant into a0 when L32R is not safe
  279. *
  280. * This macro is typically used by interrupt/exception handlers.
  281. * Loads a 32-bit constant in a0, without using any other register,
  282. * and without corrupting the LITBASE register, even when the
  283. * value of the LITBASE register is unknown (eg. when application
  284. * code and interrupt/exception handling code are built independently,
  285. * and thus with independent values of the LITBASE register;
  286. * debug monitors are one example of this).
  287. *
  288. * Worst-case size of resulting code: 17 bytes.
  289. */
  290. .macro safe_movi_a0 constant
  291. #if XCHAL_HAVE_ABSOLUTE_LITERALS
  292. /* Contort a PC-relative literal load even though we may be in litbase-relative mode: */
  293. j 1f
  294. .begin no-transform // ensure what follows is assembled exactly as-is
  295. .align 4 // ensure constant and call0 target ...
  296. .byte 0 // ... are 4-byte aligned (call0 instruction is 3 bytes long)
  297. 1: call0 2f // read PC (that follows call0) in a0
  298. .long \constant // 32-bit constant to load into a0
  299. 2:
  300. .end no-transform
  301. l32i a0, a0, 0 // load constant
  302. #else
  303. movi a0, \constant // no LITBASE, can assume PC-relative L32R
  304. #endif
  305. .endm
  306. /*----------------------------------------------------------------------
  307. * window_spill{4,8,12}
  308. *
  309. * These macros spill callers' register windows to the stack.
  310. * They work for both privileged and non-privileged tasks.
  311. * Must be called from a windowed ABI context, eg. within
  312. * a windowed ABI function (ie. valid stack frame, window
  313. * exceptions enabled, not in exception mode, etc).
  314. *
  315. * This macro requires a single invocation of the window_spill_common
  316. * macro in the same assembly unit and section.
  317. *
  318. * Note that using window_spill{4,8,12} macros is more efficient
  319. * than calling a function implemented using window_spill_function,
  320. * because the latter needs extra code to figure out the size of
  321. * the call to the spilling function.
  322. *
  323. * Example usage:
  324. *
  325. * .text
  326. * .align 4
  327. * .global some_function
  328. * .type some_function,@function
  329. * some_function:
  330. * entry a1, 16
  331. * :
  332. * :
  333. *
  334. * window_spill4 // Spill windows of some_function's callers; preserves a0..a3 only;
  335. * // to use window_spill{8,12} in this example function we'd have
  336. * // to increase space allocated by the entry instruction, because
  337. * // 16 bytes only allows call4; 32 or 48 bytes (+locals) are needed
  338. * // for call8/window_spill8 or call12/window_spill12 respectively.
  339. *
  340. * :
  341. *
  342. * retw
  343. *
  344. * window_spill_common // instantiates code used by window_spill4
  345. *
  346. *
  347. * On entry:
  348. * none (if window_spill4)
  349. * stack frame has enough space allocated for call8 (if window_spill8)
  350. * stack frame has enough space allocated for call12 (if window_spill12)
  351. * On exit:
  352. * a4..a15 clobbered (if window_spill4)
  353. * a8..a15 clobbered (if window_spill8)
  354. * a12..a15 clobbered (if window_spill12)
  355. * no caller windows are in live registers
  356. */
  357. .macro window_spill4
  358. #if XCHAL_HAVE_WINDOWED
  359. # if XCHAL_NUM_AREGS == 16
  360. movi a15, 0 // for 16-register files, no need to call to reach the end
  361. # elif XCHAL_NUM_AREGS == 32
  362. call4 .L__wdwspill_assist28 // call deep enough to clear out any live callers
  363. # elif XCHAL_NUM_AREGS == 64
  364. call4 .L__wdwspill_assist60 // call deep enough to clear out any live callers
  365. # endif
  366. #endif
  367. .endm // window_spill4
  368. .macro window_spill8
  369. #if XCHAL_HAVE_WINDOWED
  370. # if XCHAL_NUM_AREGS == 16
  371. movi a15, 0 // for 16-register files, no need to call to reach the end
  372. # elif XCHAL_NUM_AREGS == 32
  373. call8 .L__wdwspill_assist24 // call deep enough to clear out any live callers
  374. # elif XCHAL_NUM_AREGS == 64
  375. call8 .L__wdwspill_assist56 // call deep enough to clear out any live callers
  376. # endif
  377. #endif
  378. .endm // window_spill8
  379. .macro window_spill12
  380. #if XCHAL_HAVE_WINDOWED
  381. # if XCHAL_NUM_AREGS == 16
  382. movi a15, 0 // for 16-register files, no need to call to reach the end
  383. # elif XCHAL_NUM_AREGS == 32
  384. call12 .L__wdwspill_assist20 // call deep enough to clear out any live callers
  385. # elif XCHAL_NUM_AREGS == 64
  386. call12 .L__wdwspill_assist52 // call deep enough to clear out any live callers
  387. # endif
  388. #endif
  389. .endm // window_spill12
  390. /*----------------------------------------------------------------------
  391. * window_spill_function
  392. *
  393. * This macro outputs a function that will spill its caller's callers'
  394. * register windows to the stack. Eg. it could be used to implement
  395. * a version of xthal_window_spill() that works in non-privileged tasks.
  396. * This works for both privileged and non-privileged tasks.
  397. *
  398. * Typical usage:
  399. *
  400. * .text
  401. * .align 4
  402. * .global my_spill_function
  403. * .type my_spill_function,@function
  404. * my_spill_function:
  405. * window_spill_function
  406. *
  407. * On entry to resulting function:
  408. * none
  409. * On exit from resulting function:
  410. * none (no caller windows are in live registers)
  411. */
  412. .macro window_spill_function
  413. #if XCHAL_HAVE_WINDOWED
  414. # if XCHAL_NUM_AREGS == 32
  415. entry sp, 48
  416. bbci.l a0, 31, 1f // branch if called with call4
  417. bbsi.l a0, 30, 2f // branch if called with call12
  418. call8 .L__wdwspill_assist16 // called with call8, only need another 8
  419. retw
  420. 1: call12 .L__wdwspill_assist16 // called with call4, only need another 12
  421. retw
  422. 2: call4 .L__wdwspill_assist16 // called with call12, only need another 4
  423. retw
  424. # elif XCHAL_NUM_AREGS == 64
  425. entry sp, 48
  426. bbci.l a0, 31, 1f // branch if called with call4
  427. bbsi.l a0, 30, 2f // branch if called with call12
  428. call4 .L__wdwspill_assist52 // called with call8, only need a call4
  429. retw
  430. 1: call8 .L__wdwspill_assist52 // called with call4, only need a call8
  431. retw
  432. 2: call12 .L__wdwspill_assist40 // called with call12, can skip a call12
  433. retw
  434. # elif XCHAL_NUM_AREGS == 16
  435. entry sp, 16
  436. bbci.l a0, 31, 1f // branch if called with call4
  437. bbsi.l a0, 30, 2f // branch if called with call12
  438. movi a7, 0 // called with call8
  439. retw
  440. 1: movi a11, 0 // called with call4
  441. 2: retw // if called with call12, everything already spilled
  442. // movi a15, 0 // trick to spill all but the direct caller
  443. // j 1f
  444. // // The entry instruction is magical in the assembler (gets auto-aligned)
  445. // // so we have to jump to it to avoid falling through the padding.
  446. // // We need entry/retw to know where to return.
  447. //1: entry sp, 16
  448. // retw
  449. # else
  450. # error "unrecognized address register file size"
  451. # endif
  452. #endif /* XCHAL_HAVE_WINDOWED */
  453. window_spill_common
  454. .endm // window_spill_function
  455. /*----------------------------------------------------------------------
  456. * window_spill_common
  457. *
  458. * Common code used by any number of invocations of the window_spill##
  459. * and window_spill_function macros.
  460. *
  461. * Must be instantiated exactly once within a given assembly unit,
  462. * within call/j range of and same section as window_spill##
  463. * macro invocations for that assembly unit.
  464. * (Is automatically instantiated by the window_spill_function macro.)
  465. */
  466. .macro window_spill_common
  467. #if XCHAL_HAVE_WINDOWED && (XCHAL_NUM_AREGS == 32 || XCHAL_NUM_AREGS == 64)
  468. .ifndef .L__wdwspill_defined
  469. # if XCHAL_NUM_AREGS >= 64
  470. .L__wdwspill_assist60:
  471. entry sp, 32
  472. call8 .L__wdwspill_assist52
  473. retw
  474. .L__wdwspill_assist56:
  475. entry sp, 16
  476. call4 .L__wdwspill_assist52
  477. retw
  478. .L__wdwspill_assist52:
  479. entry sp, 48
  480. call12 .L__wdwspill_assist40
  481. retw
  482. .L__wdwspill_assist40:
  483. entry sp, 48
  484. call12 .L__wdwspill_assist28
  485. retw
  486. # endif
  487. .L__wdwspill_assist28:
  488. entry sp, 48
  489. call12 .L__wdwspill_assist16
  490. retw
  491. .L__wdwspill_assist24:
  492. entry sp, 32
  493. call8 .L__wdwspill_assist16
  494. retw
  495. .L__wdwspill_assist20:
  496. entry sp, 16
  497. call4 .L__wdwspill_assist16
  498. retw
  499. .L__wdwspill_assist16:
  500. entry sp, 16
  501. movi a15, 0
  502. retw
  503. .set .L__wdwspill_defined, 1
  504. .endif
  505. #endif /* XCHAL_HAVE_WINDOWED with 32 or 64 aregs */
  506. .endm // window_spill_common
  507. /*----------------------------------------------------------------------
  508. * beqi32
  509. *
  510. * macro implements version of beqi for arbitrary 32-bit immediate value
  511. *
  512. * beqi32 ax, ay, imm32, label
  513. *
  514. * Compares value in register ax with imm32 value and jumps to label if
  515. * equal. Clobbers register ay if needed
  516. *
  517. */
  518. .macro beqi32 ax, ay, imm, label
  519. .ifeq ((\imm-1) & ~7) // 1..8 ?
  520. beqi \ax, \imm, \label
  521. .else
  522. .ifeq (\imm+1) // -1 ?
  523. beqi \ax, \imm, \label
  524. .else
  525. .ifeq (\imm) // 0 ?
  526. beqz \ax, \label
  527. .else
  528. // We could also handle immediates 10,12,16,32,64,128,256
  529. // but it would be a long macro...
  530. movi \ay, \imm
  531. beq \ax, \ay, \label
  532. .endif
  533. .endif
  534. .endif
  535. .endm // beqi32
  536. /*----------------------------------------------------------------------
  537. * isync_retw_nop
  538. *
  539. * This macro must be invoked immediately after ISYNC if ISYNC
  540. * would otherwise be immediately followed by RETW (or other instruction
  541. * modifying WindowBase or WindowStart), in a context where
  542. * kernel vector mode may be selected, and level-one interrupts
  543. * and window overflows may be enabled, on an XEA1 configuration.
  544. *
  545. * On hardware with erratum "XEA1KWIN" (see <xtensa/core.h> for details),
  546. * XEA1 code must have at least one instruction between ISYNC and RETW if
  547. * run in kernel vector mode with interrupts and window overflows enabled.
  548. */
  549. .macro isync_retw_nop
  550. #if XCHAL_MAYHAVE_ERRATUM_XEA1KWIN
  551. nop
  552. #endif
  553. .endm
  554. /*----------------------------------------------------------------------
  555. * isync_erratum453
  556. *
  557. * This macro must be invoked at certain points in the code,
  558. * such as in exception and interrupt vectors in particular,
  559. * to work around erratum 453.
  560. */
  561. .macro isync_erratum453
  562. #if XCHAL_ERRATUM_453
  563. isync
  564. #endif
  565. .endm
  566. /*----------------------------------------------------------------------
  567. * abs
  568. *
  569. * implements abs on machines that do not have it configured
  570. */
  571. #if !XCHAL_HAVE_ABS
  572. .macro abs arr, ars
  573. .ifc \arr, \ars
  574. //src equal dest is less efficient
  575. bgez \arr, 1f
  576. neg \arr, \arr
  577. 1:
  578. .else
  579. neg \arr, \ars
  580. movgez \arr, \ars, \ars
  581. .endif
  582. .endm
  583. #endif /* !XCHAL_HAVE_ABS */
  584. /*----------------------------------------------------------------------
  585. * addx2
  586. *
  587. * implements addx2 on machines that do not have it configured
  588. *
  589. */
  590. #if !XCHAL_HAVE_ADDX
  591. .macro addx2 arr, ars, art
  592. .ifc \arr, \art
  593. .ifc \arr, \ars
  594. // addx2 a, a, a (not common)
  595. .err
  596. .else
  597. add \arr, \ars, \art
  598. add \arr, \ars, \art
  599. .endif
  600. .else
  601. //addx2 a, b, c
  602. //addx2 a, a, b
  603. //addx2 a, b, b
  604. slli \arr, \ars, 1
  605. add \arr, \arr, \art
  606. .endif
  607. .endm
  608. #endif /* !XCHAL_HAVE_ADDX */
  609. /*----------------------------------------------------------------------
  610. * addx4
  611. *
  612. * implements addx4 on machines that do not have it configured
  613. *
  614. */
  615. #if !XCHAL_HAVE_ADDX
  616. .macro addx4 arr, ars, art
  617. .ifc \arr, \art
  618. .ifc \arr, \ars
  619. // addx4 a, a, a (not common)
  620. .err
  621. .else
  622. //# addx4 a, b, a
  623. add \arr, \ars, \art
  624. add \arr, \ars, \art
  625. add \arr, \ars, \art
  626. add \arr, \ars, \art
  627. .endif
  628. .else
  629. //addx4 a, b, c
  630. //addx4 a, a, b
  631. //addx4 a, b, b
  632. slli \arr, \ars, 2
  633. add \arr, \arr, \art
  634. .endif
  635. .endm
  636. #endif /* !XCHAL_HAVE_ADDX */
  637. /*----------------------------------------------------------------------
  638. * addx8
  639. *
  640. * implements addx8 on machines that do not have it configured
  641. *
  642. */
  643. #if !XCHAL_HAVE_ADDX
  644. .macro addx8 arr, ars, art
  645. .ifc \arr, \art
  646. .ifc \arr, \ars
  647. //addx8 a, a, a (not common)
  648. .err
  649. .else
  650. //addx8 a, b, a
  651. add \arr, \ars, \art
  652. add \arr, \ars, \art
  653. add \arr, \ars, \art
  654. add \arr, \ars, \art
  655. add \arr, \ars, \art
  656. add \arr, \ars, \art
  657. add \arr, \ars, \art
  658. add \arr, \ars, \art
  659. .endif
  660. .else
  661. //addx8 a, b, c
  662. //addx8 a, a, b
  663. //addx8 a, b, b
  664. slli \arr, \ars, 3
  665. add \arr, \arr, \art
  666. .endif
  667. .endm
  668. #endif /* !XCHAL_HAVE_ADDX */
  669. /*----------------------------------------------------------------------
  670. * rfe_rfue
  671. *
  672. * Maps to RFUE on XEA1, and RFE on XEA2. No mapping on XEAX.
  673. */
  674. #if XCHAL_HAVE_XEA1
  675. .macro rfe_rfue
  676. rfue
  677. .endm
  678. #elif XCHAL_HAVE_XEA2
  679. .macro rfe_rfue
  680. rfe
  681. .endm
  682. #endif
  683. /*----------------------------------------------------------------------
  684. * abi_entry
  685. *
  686. * Generate proper function entry sequence for the current ABI
  687. * (windowed or call0). Takes care of allocating stack space (up to 1kB)
  688. * and saving the return PC, if necessary. The corresponding abi_return
  689. * macro does the corresponding stack deallocation and restoring return PC.
  690. *
  691. * Parameters are:
  692. *
  693. * locsize Number of bytes to allocate on the stack
  694. * for local variables (and for args to pass to
  695. * callees, if any calls are made). Defaults to zero.
  696. * The macro rounds this up to a multiple of 16.
  697. * NOTE: large values are allowed (e.g. up to 1 GB).
  698. *
  699. * callsize Maximum call size made by this function.
  700. * Leave zero (default) for leaf functions, i.e. if
  701. * this function makes no calls to other functions.
  702. * Otherwise must be set to 4, 8, or 12 according
  703. * to whether the "largest" call made is a call[x]4,
  704. * call[x]8, or call[x]12 (for call0 ABI, it makes
  705. * no difference whether this is set to 4, 8 or 12,
  706. * but it must be set to one of these values).
  707. *
  708. * NOTE: It is up to the caller to align the entry point, declare the
  709. * function symbol, make it global, etc.
  710. *
  711. * NOTE: This macro relies on assembler relaxation for large values
  712. * of locsize. It might not work with the no-transform directive.
  713. * NOTE: For the call0 ABI, this macro ensures SP is allocated or
  714. * de-allocated cleanly, i.e. without temporarily allocating too much
  715. * (or allocating negatively!) due to addi relaxation.
  716. *
  717. * NOTE: Generating the proper sequence and register allocation for
  718. * making calls in an ABI independent manner is a separate topic not
  719. * covered by this macro.
  720. *
  721. * NOTE: To access arguments, you can't use a fixed offset from SP.
  722. * The offset depends on the ABI, whether the function is leaf, etc.
  723. * The simplest method is probably to use the .locsz symbol, which
  724. * is set by this macro to the actual number of bytes allocated on
  725. * the stack, in other words, to the offset from SP to the arguments.
  726. * E.g. for a function whose arguments are all 32-bit integers, you
  727. * can get the 7th and 8th arguments (1st and 2nd args stored on stack)
  728. * using:
  729. * l32i a2, sp, .locsz
  730. * l32i a3, sp, .locsz+4
  731. * (this example works as long as locsize is under L32I's offset limit
  732. * of 1020 minus up to 48 bytes of ABI-specific stack usage;
  733. * otherwise you might first need to do "addi a?, sp, .locsz"
  734. * or similar sequence).
  735. *
  736. * NOTE: For call0 ABI, this macro (and abi_return) may clobber a9
  737. * (a caller-saved register).
  738. *
  739. * Examples:
  740. * abi_entry
  741. * abi_entry 5
  742. * abi_entry 22, 8
  743. * abi_entry 0, 4
  744. */
  745. /*
  746. * Compute .locsz and .callsz without emitting any instructions.
  747. * Used by both abi_entry and abi_return.
  748. * Assumes locsize >= 0.
  749. */
  750. .macro abi_entry_size locsize=0, callsize=0
  751. #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  752. .ifeq \callsize
  753. .set .callsz, 16
  754. .else
  755. .ifeq \callsize-4
  756. .set .callsz, 16
  757. .else
  758. .ifeq \callsize-8
  759. .set .callsz, 32
  760. .else
  761. .ifeq \callsize-12
  762. .set .callsz, 48
  763. .else
  764. .error "abi_entry: invalid call size \callsize"
  765. .endif
  766. .endif
  767. .endif
  768. .endif
  769. .set .locsz, .callsz + ((\locsize + 15) & -16)
  770. #else
  771. .set .callsz, \callsize
  772. .if .callsz /* if calls, need space for return PC */
  773. .set .locsz, (\locsize + 4 + 15) & -16
  774. .else
  775. .set .locsz, (\locsize + 15) & -16
  776. .endif
  777. #endif
  778. .endm
  779. .macro abi_entry locsize=0, callsize=0
  780. .iflt \locsize
  781. .error "abi_entry: invalid negative size of locals (\locsize)"
  782. .endif
  783. abi_entry_size \locsize, \callsize
  784. #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  785. .ifgt .locsz - 32760 /* .locsz > 32760 (ENTRY's max range)? */
  786. /* Funky computation to try to have assembler use addmi efficiently if possible: */
  787. entry sp, 0x7F00 + (.locsz & 0xF0)
  788. addi a12, sp, - ((.locsz & -0x100) - 0x7F00)
  789. movsp sp, a12
  790. .else
  791. entry sp, .locsz
  792. .endif
  793. #else
  794. .if .locsz
  795. .ifle .locsz - 128 /* if locsz <= 128 */
  796. addi sp, sp, -.locsz
  797. .if .callsz
  798. s32i a0, sp, .locsz - 4
  799. .endif
  800. .elseif .callsz /* locsz > 128, with calls: */
  801. movi a9, .locsz - 16 /* note: a9 is caller-saved */
  802. addi sp, sp, -16
  803. s32i a0, sp, 12
  804. sub sp, sp, a9
  805. .else /* locsz > 128, no calls: */
  806. movi a9, .locsz
  807. sub sp, sp, a9
  808. .endif /* end */
  809. .endif
  810. #endif
  811. .endm
  812. /*----------------------------------------------------------------------
  813. * abi_return
  814. *
  815. * Generate proper function exit sequence for the current ABI
  816. * (windowed or call0). Takes care of freeing stack space and
  817. * restoring the return PC, if necessary.
  818. * NOTE: This macro MUST be invoked following a corresponding
  819. * abi_entry macro invocation. For call0 ABI in particular,
  820. * all stack and PC restoration are done according to the last
  821. * abi_entry macro invoked before this macro in the assembly file.
  822. *
  823. * Normally this macro takes no arguments. However to allow
  824. * for placing abi_return *before* abi_entry (as must be done
  825. * for some highly optimized assembly), it optionally takes
  826. * exactly the same arguments as abi_entry.
  827. */
  828. .macro abi_return locsize=-1, callsize=0
  829. .ifge \locsize
  830. abi_entry_size \locsize, \callsize
  831. .endif
  832. #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
  833. retw
  834. #else
  835. .if .locsz
  836. .iflt .locsz - 128 /* if locsz < 128 */
  837. .if .callsz
  838. l32i a0, sp, .locsz - 4
  839. .endif
  840. addi sp, sp, .locsz
  841. .elseif .callsz /* locsz >= 128, with calls: */
  842. addi a9, sp, .locsz - 16
  843. l32i a0, a9, 12
  844. addi sp, a9, 16
  845. .else /* locsz >= 128, no calls: */
  846. movi a9, .locsz
  847. add sp, sp, a9
  848. .endif /* end */
  849. .endif
  850. ret
  851. #endif
  852. .endm
  853. /*
  854. * HW erratum fixes.
  855. */
  856. .macro hw_erratum_487_fix
  857. #if defined XSHAL_ERRATUM_487_FIX
  858. isync
  859. #endif
  860. .endm
  861. #endif /*XTENSA_COREASM_H*/