core_feature_dsp.h 855 KB


  1. /*
  2. * Copyright (c) 2019 Nuclei Limited. All rights reserved.
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the License); you may
  7. * not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  14. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #ifndef __CORE_FEATURE_DSP__
  19. #define __CORE_FEATURE_DSP__
  20. /*!
  21. * @file core_feature_dsp.h
  22. * @brief DSP feature API header file for Nuclei N/NX Core
  23. */
  24. /*
  25. * DSP Feature Configuration Macro:
  26. * 1. __DSP_PRESENT: Define whether Digital Signal Processing Unit(DSP) is present or not
  27. * * 0: Not present
  28. * * 1: Present
  29. */
  30. #ifdef __cplusplus
  31. extern "C" {
  32. #endif
  33. #include "core_feature_base.h"
  34. #if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1)
  35. #if defined(__INC_INTRINSIC_API) && (__INC_INTRINSIC_API == 1)
  36. #if defined(__zcc__)
  37. #include <rvp_intrinsic.h>
  38. #else
  39. #if !defined(__ICCRISCV__) && !defined(__llvm__)
  40. #include <rvp_intrinsic.h>
  41. #endif
  42. #endif
  43. #endif
  44. #ifndef __ICCRISCV__
  45. /* ########################### CPU SIMD DSP Intrinsic Functions ########################### */
  46. /**
  47. * \defgroup NMSIS_Core_DSP_Intrinsic Intrinsic Functions for SIMD Instructions
  48. * \ingroup NMSIS_Core
  49. * \brief Functions that generate RISC-V DSP SIMD instructions.
  50. * \details
  51. *
  52. * The following functions generate specified RISC-V SIMD instructions that cannot be directly accessed by compiler.
  53. * * **DSP ISA Extension Instruction Summary**
  54. * + **Shorthand Definitions**
  55. * - r.H == rH1: r[31:16], r.L == r.H0: r[15:0]
  56. * - r.B3: r[31:24], r.B2: r[23:16], r.B1: r[15:8], r.B0: r[7:0]
  57. * - r.B[x]: r[(x*8+7):(x*8+0)]
  58. * - r.H[x]: r[(x*16+7):(x*16+0)]
  59. * - r.W[x]: r[(x*32+31):(x*32+0)]
  60. * - r[xU]: the upper 32-bit of a 64-bit number; xU represents the GPR number that contains this upper part 32-bit value.
  61. * - r[xL]: the lower 32-bit of a 64-bit number; xL represents the GPR number that contains this lower part 32-bit value.
  62. * - r[xU].r[xL]: a 64-bit number that is formed from a pair of GPRs.
  63. * - s>>: signed arithmetic right shift:
  64. * - u>>: unsigned logical right shift
  65. * - SAT.Qn(): Saturate to the range of [-2^n, 2^n-1], if saturation happens, set PSW.OV.
  66. * - SAT.Um(): Saturate to the range of [0, 2^m-1], if saturation happens, set PSW.OV.
  67. * - RUND(): Indicate `rounding`, i.e., add 1 to the most significant discarded bit for right shift or MSW-type multiplication instructions.
  68. * - Sign or Zero Extending functions:
  69. * - SEm(data): Sign-Extend data to m-bit.:
  70. * - ZEm(data): Zero-Extend data to m-bit.
  71. * - ABS(x): Calculate the absolute value of `x`.
  72. * - CONCAT(x,y): Concatinate `x` and `y` to form a value.
  73. * - u<: Unsinged less than comparison.
  74. * - u<=: Unsinged less than & equal comparison.
  75. * - u>: Unsinged greater than comparison.
  76. * - s*: Signed multiplication.
  77. * - u*: Unsigned multiplication.
  78. *
  79. * @{
  80. */
  81. /** @} */ /* End of Doxygen Group NMSIS_Core_DSP_Intrinsic */
  82. /**
  83. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS SIMD Data Processing Instructions
  84. * \ingroup NMSIS_Core_DSP_Intrinsic
  85. * \brief SIMD Data Processing Instructions
  86. * \details
  87. */
  88. /**
  89. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB SIMD 16-bit Add/Subtract Instructions
  90. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  91. * \brief SIMD 16-bit Add/Subtract Instructions
  92. * \details
  93. * Based on the combination of the types of the two 16-bit arithmetic operations, the SIMD 16-bit
  94. * add/subtract instructions can be classified into 6 main categories: Addition (two 16-bit addition),
  95. * Subtraction (two 16-bit subtraction), Crossed Add & Sub (one addition and one subtraction), and
  96. * Crossed Sub & Add (one subtraction and one addition), Straight Add & Sub (one addition and one
  97. * subtraction), and Straight Sub & Add (one subtraction and one addition).
  98. * Based on the way of how an overflow condition is handled, the SIMD 16-bit add/subtract
  99. * instructions can be classified into 5 groups: Wrap-around (dropping overflow), Signed Halving
  100. * (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed Saturation (clipping overflow),
  101. * and Unsigned Saturation.
  102. * Together, there are 30 SIMD 16-bit add/subtract instructions.
  103. */
  104. /**
  105. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB SIMD 8-bit Addition & Subtraction Instructions
  106. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  107. * \brief SIMD 8-bit Addition & Subtraction Instructions
  108. * \details
  109. * Based on the types of the four 8-bit arithmetic operations, the SIMD 8-bit add/subtract instructions
  110. * can be classified into 2 main categories: Addition (four 8-bit addition), and Subtraction (four 8-bit
  111. * subtraction).
  112. * Based on the way of how an overflow condition is handled for singed or unsigned operation, the
  113. * SIMD 8-bit add/subtract instructions can be classified into 5 groups: Wrap-around (dropping
  114. * overflow), Signed Halving (keeping overflow by dropping 1 LSB bit), Unsigned Halving, Signed
  115. * Saturation (clipping overflow), and Unsigned Saturation.
  116. * Together, there are 10 SIMD 8-bit add/subtract instructions.
  117. */
  118. /**
  119. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT SIMD 16-bit Shift Instructions
  120. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  121. * \brief SIMD 16-bit Shift Instructions
  122. * \details
  123. * there are 14 SIMD 16-bit shift instructions.
  124. */
  125. /**
  126. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT SIMD 8-bit Shift Instructions
  127. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  128. * \brief SIMD 8-bit Shift Instructions
  129. * \details
  130. * there are 14 SIMD 8-bit shift instructions.
  131. */
  132. /**
  133. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP SIMD 16-bit Compare Instructions
  134. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  135. * \brief SIMD 16-bit Compare Instructions
  136. * \details
  137. * there are 5 SIMD 16-bit Compare instructions.
  138. */
  139. /**
  140. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP SIMD 8-bit Compare Instructions
  141. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  142. * \brief SIMD 8-bit Compare Instructions
  143. * \details
  144. * there are 5 SIMD 8-bit Compare instructions.
  145. */
  146. /**
  147. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY SIMD 16-bit Multiply Instructions
  148. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  149. * \brief SIMD 16-bit Multiply Instructions
  150. * \details
  151. * there are 6 SIMD 16-bit Multiply instructions.
  152. */
  153. /**
  154. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY SIMD 8-bit Multiply Instructions
  155. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  156. * \brief SIMD 8-bit Multiply Instructions
  157. * \details
  158. * there are 6 SIMD 8-bit Multiply instructions.
  159. */
  160. /**
  161. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC SIMD 16-bit Miscellaneous Instructions
  162. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  163. * \brief SIMD 16-bit Miscellaneous Instructions
  164. * \details
  165. * there are 10 SIMD 16-bit Misc instructions.
  166. */
  167. /**
  168. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC SIMD 8-bit Miscellaneous Instructions
  169. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  170. * \brief SIMD 8-bit Miscellaneous Instructions
  171. * \details
  172. * there are 10 SIMD 8-bit Miscellaneous instructions.
  173. */
  174. /**
  175. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK SIMD 8-bit Unpacking Instructions
  176. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_DATA_PROCESS
  177. * \brief SIMD 8-bit Unpacking Instructions
  178. * \details
  179. * there are 8 SIMD 8-bit Unpacking instructions.
  180. */
  181. /**
  182. * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD Non-SIMD Instructions
  183. * \ingroup NMSIS_Core_DSP_Intrinsic
  184. * \brief Non-SIMD Instructions
  185. * \details
  186. */
  187. /**
  188. * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU Non-SIMD Q15 saturation ALU Instructions
  189. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
  190. * \brief Non-SIMD Q15 saturation ALU Instructions
  191. * \details
  192. * there are 7 Non-SIMD Q15 saturation ALU Instructions
  193. */
  194. /**
  195. * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU Non-SIMD Q31 saturation ALU Instructions
  196. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
  197. * \brief Non-SIMD Q31 saturation ALU Instructions
  198. * \details
  199. * there are Non-SIMD Q31 saturation ALU Instructions
  200. */
  201. /**
  202. * \defgroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION 32-bit Computation Instructions
  203. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
  204. * \brief 32-bit Computation Instructions
  205. * \details
  206. * there are 8 32-bit Computation Instructions
  207. */
  208. /**
  209. * \defgroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC OV (Overflow) flag Set/Clear Instructions
  210. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
  211. * \brief OV (Overflow) flag Set/Clear Instructions
  212. * \details
  213. * The following table lists the user instructions related to Overflow (OV) flag manipulation. there are 2 OV (Overflow) flag Set/Clear Instructions
  214. */
  215. /**
  216. * \defgroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC Non-SIMD Miscellaneous Instructions
  217. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD
  218. * \brief Non-SIMD Miscellaneous Instructions
  219. * \details
  220. * There are 13 Miscellaneous Instructions here.
  221. */
  222. /**
  223. * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS Partial-SIMD Data Processing Instructions
  224. * \ingroup NMSIS_Core_DSP_Intrinsic
  225. * \brief Partial-SIMD Data Processing Instructions
  226. * \details
  227. */
  228. /**
  229. * \defgroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK SIMD 16-bit Packing Instructions
  230. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
  231. * \brief SIMD 16-bit Packing Instructions
  232. * \details
  233. * there are 4 SIMD16-bit Packing Instructions.
  234. */
  235. /**
  236. * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC Signed MSW 32x32 Multiply and Add Instructions
  237. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
  238. * \brief Signed MSW 32x32 Multiply and Add Instructions
  239. * \details
  240. * there are 8 Signed MSW 32x32 Multiply and Add Instructions
  241. */
  242. /**
  243. * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC Signed MSW 32x16 Multiply and Add Instructions
  244. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
  245. * \brief Signed MSW 32x16 Multiply and Add Instructions
  246. * \details
  247. * there are 15 Signed MSW 32x16 Multiply and Add Instructions
  248. */
  249. /**
  250. * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB Signed 16-bit Multiply 32-bit Add/Subtract Instructions
  251. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
  252. * \brief Signed 16-bit Multiply 32-bit Add/Subtract Instructions
  253. * \details
  254. * there are 18 Signed 16-bit Multiply 32-bit Add/Subtract Instructions
  255. */
  256. /**
  257. * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB Signed 16-bit Multiply 64-bit Add/Subtract Instructions
  258. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
  259. * \brief Signed 16-bit Multiply 64-bit Add/Subtract Instructions
  260. * \details
  261. * there is Signed 16-bit Multiply 64-bit Add/Subtract Instructions
  262. */
  263. /**
  264. * \defgroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC Partial-SIMD Miscellaneous Instructions
  265. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
  266. * \brief Partial-SIMD Miscellaneous Instructions
  267. * \details
  268. * there are 7 Partial-SIMD Miscellaneous Instructions
  269. */
  270. /**
  271. * \defgroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD 8-bit Multiply with 32-bit Add Instructions
  272. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_DATA_PROCESS
  273. * \brief 8-bit Multiply with 32-bit Add Instructions
  274. * \details
  275. * there are 3 8-bit Multiply with 32-bit Add Instructions
  276. */
  277. /**
  278. * \defgroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE 64-bit Profile Instructions
  279. * \ingroup NMSIS_Core_DSP_Intrinsic
  280. * \brief 64-bit Profile Instructions
  281. * \details
  282. */
  283. /**
  284. * \defgroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB 64-bit Addition & Subtraction Instructions
  285. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE
  286. * \brief 64-bit Addition & Subtraction Instructions
  287. * \details
  288. * there are 10 64-bit Addition & Subtraction Instructions.
  289. */
  290. /**
  291. * \defgroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB 32-bit Multiply with 64-bit Add/Subtract Instructions
  292. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE
  293. * \brief 32-bit Multiply with 64-bit Add/Subtract Instructions
  294. * \details
  295. * there are 32-bit Multiply 64-bit Add/Subtract Instructions
  296. */
  297. /**
  298. * \defgroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
  299. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_PROFILE
  300. * \brief Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
  301. * \details
  302. * there are 10 Signed 16-bit Multiply with 64-bit Add/Subtract Instructions
  303. */
  304. /**
  305. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY RV64 Only Instructions
  306. * \ingroup NMSIS_Core_DSP_Intrinsic
  307. * \brief RV64 Only Instructions
  308. * \details
  309. */
  310. /**
  311. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB (RV64 Only) SIMD 32-bit Add/Subtract Instructions
  312. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  313. * \brief (RV64 Only) SIMD 32-bit Add/Subtract Instructions
  314. * \details
  315. * The following tables list instructions that are only present in RV64.
  316. * There are 30 SIMD 32-bit addition or subtraction instructions.there are 4 SIMD16-bit Packing Instructions.
  317. */
  318. /**
  319. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT (RV64 Only) SIMD 32-bit Shift Instructions
  320. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  321. * \brief (RV64 Only) SIMD 32-bit Shift Instructions
  322. * \details
  323. * there are 14 (RV64 Only) SIMD 32-bit Shift Instructions
  324. */
  325. /**
  326. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC (RV64 Only) SIMD 32-bit Miscellaneous Instructions
  327. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  328. * \brief (RV64 Only) SIMD 32-bit Miscellaneous Instructions
  329. * \details
  330. * there are 5 (RV64 Only) SIMD 32-bit Miscellaneous Instructions
  331. */
  332. /**
  333. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT (RV64 Only) SIMD Q15 Saturating Multiply Instructions
  334. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  335. * \brief (RV64 Only) SIMD Q15 Saturating Multiply Instructions
  336. * \details
  337. * there are 9 (RV64 Only) SIMD Q15 saturating Multiply Instructions
  338. */
  339. /**
  340. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT (RV64 Only) 32-bit Multiply Instructions
  341. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  342. * \brief (RV64 Only) 32-bit Multiply Instructions
  343. * \details
  344. * there is 3 RV64 Only) 32-bit Multiply Instructions
  345. */
  346. /**
  347. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD (RV64 Only) 32-bit Multiply & Add Instructions
  348. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  349. * \brief (RV64 Only) 32-bit Multiply & Add Instructions
  350. * \details
  351. * there are 3 (RV64 Only) 32-bit Multiply & Add Instructions
  352. */
  353. /**
  354. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC (RV64 Only) 32-bit Parallel Multiply & Add Instructions
  355. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  356. * \brief (RV64 Only) 32-bit Parallel Multiply & Add Instructions
  357. * \details
  358. * there are 12 (RV64 Only) 32-bit Parallel Multiply & Add Instructions
  359. */
  360. /**
  361. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT (RV64 Only) Non-SIMD 32-bit Shift Instructions
  362. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  363. * \brief (RV64 Only) Non-SIMD 32-bit Shift Instructions
  364. * \details
  365. * there are 1 (RV64 Only) Non-SIMD 32-bit Shift Instructions
  366. */
  367. /**
  368. * \defgroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK 32-bit Packing Instructions
  369. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_ONLY
  370. * \brief 32-bit Packing Instructions
  371. * \details
  372. * There are four 32-bit packing instructions here
  373. */
  374. /* ===== Inline Function Start for 3.1. ADD8 ===== */
  375. /**
  376. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  377. * \brief ADD8 (SIMD 8-bit Addition)
  378. * \details
  379. * **Type**: SIMD
  380. *
  381. * **Syntax**:\n
  382. * ~~~
  383. * ADD8 Rd, Rs1, Rs2
  384. * ~~~
  385. *
  386. * **Purpose**:\n
  387. * Do 8-bit integer element additions simultaneously.
  388. *
  389. * **Description**:\n
  390. * This instruction adds the 8-bit integer elements in Rs1 with the 8-bit integer elements
  391. * in Rs2, and then writes the 8-bit element results to Rd.
  392. *
  393. * **Note**:\n
  394. * This instruction can be used for either signed or unsigned addition.
  395. *
  396. * **Operations**:\n
  397. * ~~~
  398. * Rd.B[x] = Rs1.B[x] + Rs2.B[x];
  399. * for RV32: x=3...0,
  400. * for RV64: x=7...0
  401. * ~~~
  402. *
  403. * \param [in] a unsigned long type of value stored in a
  404. * \param [in] b unsigned long type of value stored in b
  405. * \return value stored in unsigned long type
  406. */
  407. __STATIC_FORCEINLINE unsigned long __RV_ADD8(unsigned long a, unsigned long b)
  408. {
  409. unsigned long result;
  410. __ASM volatile("add8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  411. return result;
  412. }
  413. /* ===== Inline Function End for 3.1. ADD8 ===== */
  414. /* ===== Inline Function Start for 3.2. ADD16 ===== */
  415. /**
  416. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  417. * \brief ADD16 (SIMD 16-bit Addition)
  418. * \details
  419. * **Type**: SIMD
  420. *
  421. * **Syntax**:\n
  422. * ~~~
  423. * ADD16 Rd, Rs1, Rs2
  424. * ~~~
  425. *
  426. * **Purpose**:\n
  427. * Do 16-bit integer element additions simultaneously.
  428. *
  429. * **Description**:\n
  430. * This instruction adds the 16-bit integer elements in Rs1 with the 16-bit integer
  431. * elements in Rs2, and then writes the 16-bit element results to Rd.
  432. *
  433. * **Note**:\n
  434. * This instruction can be used for either signed or unsigned addition.
  435. *
  436. * **Operations**:\n
  437. * ~~~
  438. * Rd.H[x] = Rs1.H[x] + Rs2.H[x];
  439. * for RV32: x=1...0,
  440. * for RV64: x=3...0
  441. * ~~~
  442. *
  443. * \param [in] a unsigned long type of value stored in a
  444. * \param [in] b unsigned long type of value stored in b
  445. * \return value stored in unsigned long type
  446. */
  447. __STATIC_FORCEINLINE unsigned long __RV_ADD16(unsigned long a, unsigned long b)
  448. {
  449. unsigned long result;
  450. __ASM volatile("add16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  451. return result;
  452. }
  453. /* ===== Inline Function End for 3.2. ADD16 ===== */
  454. /* ===== Inline Function Start for 3.3. ADD64 ===== */
  455. /**
  456. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  457. * \brief ADD64 (64-bit Addition)
  458. * \details
  459. * **Type**: 64-bit Profile
  460. *
  461. * **Syntax**:\n
  462. * ~~~
  463. * ADD64 Rd, Rs1, Rs2
  464. * ~~~
  465. *
  466. * **Purpose**:\n
  467. * Add two 64-bit signed or unsigned integers.
  468. *
  469. * **RV32 Description**:\n
  470. * This instruction adds the 64-bit integer of an even/odd pair of registers specified
  471. * by Rs1(4,1) with the 64-bit integer of an even/odd pair of registers specified by Rs2(4,1), and then
  472. * writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1).
  473. * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
  474. * pair includes register 2d and 2d+1.
  475. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  476. * of the pair contains the low 32-bit of the result.
  477. *
  478. * **RV64 Description**:\n
  479. * This instruction has the same behavior as the ADD instruction in RV64I.
  480. *
  481. * **Note**:\n
  482. * This instruction can be used for either signed or unsigned addition.
  483. *
  484. * **Operations**:\n
  485. * ~~~
  486. * RV32:
  487. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  488. * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
  489. * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
  490. * R[t_H].R[t_L] = R[a_H].R[a_L] + R[b_H].R[b_L];
  491. * RV64:
  492. * Rd = Rs1 + Rs2;
  493. * ~~~
  494. *
  495. * \param [in] a unsigned long long type of value stored in a
  496. * \param [in] b unsigned long long type of value stored in b
  497. * \return value stored in unsigned long long type
  498. */
  499. __STATIC_FORCEINLINE unsigned long long __RV_ADD64(unsigned long long a, unsigned long long b)
  500. {
  501. unsigned long long result;
  502. __ASM volatile("add64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  503. return result;
  504. }
  505. /* ===== Inline Function End for 3.3. ADD64 ===== */
  506. /* ===== Inline Function Start for 3.4. AVE ===== */
  507. /**
  508. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  509. * \brief AVE (Average with Rounding)
  510. * \details
  511. * **Type**: DSP
  512. *
  513. * **Syntax**:\n
  514. * ~~~
  515. * AVE Rd, Rs1, Rs2
  516. * ~~~
  517. *
  518. * **Purpose**:\n
  519. * Calculate the average of the contents of two general registers.
  520. *
  521. * **Description**:\n
  522. * This instruction calculates the average value of two signed integers stored in Rs1 and
  523. * Rs2, rounds up a half-integer result to the nearest integer, and writes the result to Rd.
  524. *
  525. * **Operations**:\n
  526. * ~~~
  527. * Sum = CONCAT(Rs1[MSB],Rs1[MSB:0]) + CONCAT(Rs2[MSB],Rs2[MSB:0]) + 1;
  528. * Rd = Sum[(MSB+1):1];
  529. * for RV32: MSB=31,
  530. * for RV64: MSB=63
  531. * ~~~
  532. *
  533. * \param [in] a long type of value stored in a
  534. * \param [in] b long type of value stored in b
  535. * \return value stored in long type
  536. */
  537. __STATIC_FORCEINLINE long __RV_AVE(long a, long b)
  538. {
  539. long result;
  540. __ASM volatile("ave %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  541. return result;
  542. }
  543. /* ===== Inline Function End for 3.4. AVE ===== */
  544. /* ===== Inline Function Start for 3.5. BITREV ===== */
  545. /**
  546. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  547. * \brief BITREV (Bit Reverse)
  548. * \details
  549. * **Type**: DSP
  550. *
  551. * **Syntax**:\n
  552. * ~~~
  553. * BITREV Rd, Rs1, Rs2
  554. * ~~~
  555. *
  556. * **Purpose**:\n
  557. * Reverse the bit positions of the source operand within a specified width starting from bit
  558. * 0. The reversed width is a variable from a GPR.
  559. *
  560. * **Description**:\n
  561. * This instruction reverses the bit positions of the content of Rs1. The reversed bit width
  562. * is calculated as Rs2[4:0]+1 (RV32) or Rs2[5:0]+1 (RV64). The upper bits beyond the reversed width
  563. * are filled with zeros. After the bit reverse operation, the result is written to Rd.
  564. *
  565. * **Operations**:\n
  566. * ~~~
  567. * msb = Rs2[4:0]; (for RV32)
  568. * msb = Rs2[5:0]; (for RV64)
  569. * rev[0:msb] = Rs1[msb:0];
  570. * Rd = ZE(rev[msb:0]);
  571. * ~~~
  572. *
  573. * \param [in] a unsigned long type of value stored in a
  574. * \param [in] b unsigned long type of value stored in b
  575. * \return value stored in unsigned long type
  576. */
  577. __STATIC_FORCEINLINE unsigned long __RV_BITREV(unsigned long a, unsigned long b)
  578. {
  579. unsigned long result;
  580. __ASM volatile("bitrev %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  581. return result;
  582. }
  583. /* ===== Inline Function End for 3.5. BITREV ===== */
  584. /* ===== Inline Function Start for 3.6. BITREVI ===== */
  585. /**
  586. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  587. * \brief BITREVI (Bit Reverse Immediate)
  588. * \details
  589. * **Type**: DSP
  590. *
  591. * **Syntax**:\n
  592. * ~~~
  593. * (RV32) BITREVI Rd, Rs1, imm[4:0]
  594. * (RV64) BITREVI Rd, Rs1, imm[5:0]
  595. * ~~~
  596. *
  597. * **Purpose**:\n
  598. * Reverse the bit positions of the source operand within a specified width starting from bit
  599. * 0. The reversed width is an immediate value.
  600. *
  601. * **Description**:\n
  602. * This instruction reverses the bit positions of the content of Rs1. The reversed bit width
  603. * is calculated as imm[4:0]+1 (RV32) or imm[5:0]+1 (RV64). The upper bits beyond the reversed width
  604. * are filled with zeros. After the bit reverse operation, the result is written to Rd.
  605. *
  606. * **Operations**:\n
  607. * ~~~
  608. * msb = imm[4:0]; (RV32)
  609. * msb = imm[5:0]; (RV64)
  610. * rev[0:msb] = Rs1[msb:0];
  611. * Rd = ZE(rev[msb:0]);
  612. * ~~~
  613. *
  614. * \param [in] a unsigned long type of value stored in a
  615. * \param [in] b unsigned long type of value stored in b
  616. * \return value stored in unsigned long type
  617. */
  618. #define __RV_BITREVI(a, b) \
  619. ({ \
  620. unsigned long result; \
  621. unsigned long __a = (unsigned long)(a); \
  622. __ASM volatile("bitrevi %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  623. result; \
  624. })
  625. /* ===== Inline Function End for 3.6. BITREVI ===== */
  626. /* ===== Inline Function Start for 3.7. BPICK ===== */
  627. /**
  628. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  629. * \brief BPICK (Bit-wise Pick)
  630. * \details
  631. * **Type**: DSP
  632. *
  633. * **Syntax**:\n
  634. * ~~~
  635. * BPICK Rd, Rs1, Rs2, Rc
  636. * ~~~
  637. *
  638. * **Purpose**:\n
  639. * Select from two source operands based on a bit mask in the third operand.
  640. *
  641. * **Description**:\n
  642. * This instruction selects individual bits from Rs1 or Rs2, based on the bit mask value in
  643. * Rc. If a bit in Rc is 1, the corresponding bit is from Rs1; otherwise, the corresponding bit is from Rs2.
  644. * The selection results are written to Rd.
  645. *
  646. * **Operations**:\n
  647. * ~~~
  648. * Rd[x] = Rc[x]? Rs1[x] : Rs2[x];
  649. * for RV32, x=31...0
  650. * for RV64, x=63...0
  651. * ~~~
  652. *
  653. * \param [in] a unsigned long type of value stored in a
  654. * \param [in] b unsigned long type of value stored in b
  655. * \param [in] c unsigned long type of value stored in c
  656. * \return value stored in unsigned long type
  657. */
  658. __STATIC_FORCEINLINE unsigned long __RV_BPICK(unsigned long a, unsigned long b, unsigned long c)
  659. {
  660. unsigned long result;
  661. __ASM volatile("bpick %0, %1, %2, %3" : "=r"(result) : "r"(a), "r"(b), "r"(c));
  662. return result;
  663. }
  664. /* ===== Inline Function End for 3.7. BPICK ===== */
  665. /* ===== Inline Function Start for 3.8. CLROV ===== */
  666. /**
  667. * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC
  668. * \brief CLROV (Clear OV flag)
  669. * \details
  670. * **Type**: DSP
  671. *
  672. * **Syntax**:\n
  673. * ~~~
  674. * CLROV # pseudo mnemonic
  675. * ~~~
  676. *
  677. * **Purpose**:\n
  678. * This pseudo instruction is an alias to `CSRRCI x0, ucode, 1` instruction.
  679. *
  680. *
  681. */
  682. __STATIC_FORCEINLINE void __RV_CLROV(void)
  683. {
  684. __ASM volatile("clrov ");
  685. }
  686. /* ===== Inline Function End for 3.8. CLROV ===== */
  687. /* ===== Inline Function Start for 3.9. CLRS8 ===== */
  688. /**
  689. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  690. * \brief CLRS8 (SIMD 8-bit Count Leading Redundant Sign)
  691. * \details
  692. * **Type**: SIMD
  693. *
  694. * **Syntax**:\n
  695. * ~~~
  696. * CLRS8 Rd, Rs1
  697. * ~~~
  698. *
  699. * **Purpose**:\n
  700. * Count the number of redundant sign bits of the 8-bit elements of a general register.
  701. *
  702. * **Description**:\n
  703. * Starting from the bits next to the sign bits of the 8-bit elements of Rs1, this instruction
  704. * counts the number of redundant sign bits and writes the result to the corresponding 8-bit elements
  705. * of Rd.
  706. *
  707. * **Operations**:\n
  708. * ~~~
  709. * snum[x] = Rs1.B[x];
  710. * cnt[x] = 0;
  711. * for (i = 6 to 0) {
  712. * if (snum[x](i) == snum[x](7)) {
  713. * cnt[x] = cnt[x] + 1;
  714. * } else {
  715. * break;
  716. * }
  717. * }
  718. * Rd.B[x] = cnt[x];
  719. * for RV32: x=3...0
  720. * for RV64: x=7...0
  721. * ~~~
  722. *
  723. * \param [in] a unsigned long type of value stored in a
  724. * \return value stored in unsigned long type
  725. */
  726. __STATIC_FORCEINLINE unsigned long __RV_CLRS8(unsigned long a)
  727. {
  728. unsigned long result;
  729. __ASM volatile("clrs8 %0, %1" : "=r"(result) : "r"(a));
  730. return result;
  731. }
  732. /* ===== Inline Function End for 3.9. CLRS8 ===== */
  733. /* ===== Inline Function Start for 3.10. CLRS16 ===== */
  734. /**
  735. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  736. * \brief CLRS16 (SIMD 16-bit Count Leading Redundant Sign)
  737. * \details
  738. * **Type**: SIMD
  739. *
  740. * **Syntax**:\n
  741. * ~~~
  742. * CLRS16 Rd, Rs1
  743. * ~~~
  744. *
  745. * **Purpose**:\n
  746. * Count the number of redundant sign bits of the 16-bit elements of a general register.
  747. *
  748. * **Description**:\n
  749. * Starting from the bits next to the sign bits of the 16-bit elements of Rs1, this
  750. * instruction counts the number of redundant sign bits and writes the result to the corresponding 16-
  751. * bit elements of Rd.
  752. *
  753. * **Operations**:\n
  754. * ~~~
  755. * snum[x] = Rs1.H[x];
  756. * cnt[x] = 0;
  757. * for (i = 14 to 0) {
  758. * if (snum[x](i) == snum[x](15)) {
  759. * cnt[x] = cnt[x] + 1;
  760. * } else {
  761. * break;
  762. * }
  763. * }
  764. * Rd.H[x] = cnt[x];
  765. * for RV32: x=1...0
  766. * for RV64: x=3...0
  767. * ~~~
  768. *
  769. * \param [in] a unsigned long type of value stored in a
  770. * \return value stored in unsigned long type
  771. */
  772. __STATIC_FORCEINLINE unsigned long __RV_CLRS16(unsigned long a)
  773. {
  774. unsigned long result;
  775. __ASM volatile("clrs16 %0, %1" : "=r"(result) : "r"(a));
  776. return result;
  777. }
  778. /* ===== Inline Function End for 3.10. CLRS16 ===== */
  779. /* ===== Inline Function Start for 3.11. CLRS32 ===== */
  780. /**
  781. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
  782. * \brief CLRS32 (SIMD 32-bit Count Leading Redundant Sign)
  783. * \details
  784. * **Type**: SIMD
  785. *
  786. * **Syntax**:\n
  787. * ~~~
  788. * CLRS32 Rd, Rs1
  789. * ~~~
  790. *
  791. * **Purpose**:\n
  792. * Count the number of redundant sign bits of the 32-bit elements of a general register.
  793. *
  794. * **Description**:\n
  795. * Starting from the bits next to the sign bits of the 32-bit elements of Rs1, this
  796. * instruction counts the number of redundant sign bits and writes the result to the corresponding 32-
  797. * bit elements of Rd.
  798. *
  799. * **Operations**:\n
  800. * ~~~
  801. * snum[x] = Rs1.W[x];
  802. * cnt[x] = 0;
  803. * for (i = 30 to 0) {
  804. * if (snum[x](i) == snum[x](31)) {
  805. * cnt[x] = cnt[x] + 1;
  806. * } else {
  807. * break;
  808. * }
  809. * }
  810. * Rd.W[x] = cnt[x];
  811. * for RV32: x=0
  812. * for RV64: x=1...0
  813. * ~~~
  814. *
  815. * \param [in] a unsigned long type of value stored in a
  816. * \return value stored in unsigned long type
  817. */
  818. __STATIC_FORCEINLINE unsigned long __RV_CLRS32(unsigned long a)
  819. {
  820. unsigned long result;
  821. __ASM volatile("clrs32 %0, %1" : "=r"(result) : "r"(a));
  822. return result;
  823. }
  824. /* ===== Inline Function End for 3.11. CLRS32 ===== */
  825. /* ===== Inline Function Start for 3.12. CLO8 ===== */
  826. /**
  827. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  828. * \brief CLO8 (SIMD 8-bit Count Leading One)
  829. * \details
  830. * **Type**: SIMD
  831. *
  832. * **Syntax**:\n
  833. * ~~~
  834. * CLO8 Rd, Rs1
  835. * ~~~
  836. *
  837. * **Purpose**:\n
  838. * Count the number of leading one bits of the 8-bit elements of a general register.
  839. *
  840. * **Description**:\n
  841. * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction
  842. * counts the number of leading one bits and writes the results to the corresponding 8-bit elements of
  843. * Rd.
  844. *
  845. * **Operations**:\n
  846. * ~~~
  847. * snum[x] = Rs1.B[x];
  848. * cnt[x] = 0;
  849. * for (i = 7 to 0) {
  850. * if (snum[x](i) == 1) {
  851. * cnt[x] = cnt[x] + 1;
  852. * } else {
  853. * break;
  854. * }
  855. * }
  856. * Rd.B[x] = cnt[x];
  857. * for RV32: x=3...0
  858. * for RV64: x=7...0
  859. * ~~~
  860. *
  861. * \param [in] a unsigned long type of value stored in a
  862. * \return value stored in unsigned long type
  863. */
  864. __STATIC_FORCEINLINE unsigned long __RV_CLO8(unsigned long a)
  865. {
  866. unsigned long result;
  867. __ASM volatile("clo8 %0, %1" : "=r"(result) : "r"(a));
  868. return result;
  869. }
  870. /* ===== Inline Function End for 3.12. CLO8 ===== */
  871. /* ===== Inline Function Start for 3.13. CLO16 ===== */
  872. /**
  873. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  874. * \brief CLO16 (SIMD 16-bit Count Leading One)
  875. * \details
  876. * **Type**: SIMD
  877. *
  878. * **Syntax**:\n
  879. * ~~~
  880. * CLO16 Rd, Rs1
  881. * ~~~
  882. *
  883. * **Purpose**:\n
  884. * Count the number of leading one bits of the 16-bit elements of a general register.
  885. *
  886. * **Description**:\n
  887. * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction
  888. * counts the number of leading one bits and writes the results to the corresponding 16-bit elements
  889. * of Rd.
  890. *
  891. * **Operations**:\n
  892. * ~~~
  893. * snum[x] = Rs1.H[x];
  894. * cnt[x] = 0;
  895. * for (i = 15 to 0) {
  896. * if (snum[x](i) == 1) {
  897. * cnt[x] = cnt[x] + 1;
  898. * } else {
  899. * break;
  900. * }
  901. * }
  902. * Rd.H[x] = cnt[x];
  903. * for RV32: x=1...0
  904. * for RV64: x=3...0
  905. * ~~~
  906. *
  907. * \param [in] a unsigned long type of value stored in a
  908. * \return value stored in unsigned long type
  909. */
  910. __STATIC_FORCEINLINE unsigned long __RV_CLO16(unsigned long a)
  911. {
  912. unsigned long result;
  913. __ASM volatile("clo16 %0, %1" : "=r"(result) : "r"(a));
  914. return result;
  915. }
  916. /* ===== Inline Function End for 3.13. CLO16 ===== */
  917. /* ===== Inline Function Start for 3.14. CLO32 ===== */
  918. /**
  919. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
  920. * \brief CLO32 (SIMD 32-bit Count Leading One)
  921. * \details
  922. * **Type**: SIMD
  923. *
  924. * **Syntax**:\n
  925. * ~~~
  926. * CLO32 Rd, Rs1
  927. * ~~~
  928. *
  929. * **Purpose**:\n
  930. * Count the number of leading one bits of the 32-bit elements of a general register.
  931. *
  932. * **Description**:\n
  933. * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction
  934. * counts the number of leading one bits and writes the results to the corresponding 32-bit elements
  935. * of Rd.
  936. *
  937. * **Operations**:\n
  938. * ~~~
  939. * snum[x] = Rs1.W[x];
  940. * cnt[x] = 0;
  941. * for (i = 31 to 0) {
  942. * if (snum[x](i) == 1) {
  943. * cnt[x] = cnt[x] + 1;
  944. * } else {
  945. * break;
  946. * }
  947. * }
  948. * Rd.W[x] = cnt[x];
  949. * for RV32: x=0
  950. * for RV64: x=1...0
  951. * ~~~
  952. *
  953. * \param [in] a unsigned long type of value stored in a
  954. * \return value stored in unsigned long type
  955. */
  956. __STATIC_FORCEINLINE unsigned long __RV_CLO32(unsigned long a)
  957. {
  958. unsigned long result;
  959. __ASM volatile("clo32 %0, %1" : "=r"(result) : "r"(a));
  960. return result;
  961. }
  962. /* ===== Inline Function End for 3.14. CLO32 ===== */
  963. /* ===== Inline Function Start for 3.15. CLZ8 ===== */
  964. /**
  965. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  966. * \brief CLZ8 (SIMD 8-bit Count Leading Zero)
  967. * \details
  968. * **Type**: SIMD
  969. *
  970. * **Syntax**:\n
  971. * ~~~
  972. * CLZ8 Rd, Rs1
  973. * ~~~
  974. *
  975. * **Purpose**:\n
  976. * Count the number of leading zero bits of the 8-bit elements of a general register.
  977. *
  978. * **Description**:\n
  979. * Starting from the most significant bits of the 8-bit elements of Rs1, this instruction
  980. * counts the number of leading zero bits and writes the results to the corresponding 8-bit elements of
  981. * Rd.
  982. *
  983. * **Operations**:\n
  984. * ~~~
  985. * snum[x] = Rs1.B[x];
  986. * cnt[x] = 0;
  987. * for (i = 7 to 0) {
  988. * if (snum[x](i) == 0) {
  989. * cnt[x] = cnt[x] + 1;
  990. * } else {
  991. * break;
  992. * }
  993. * }
  994. * Rd.B[x] = cnt[x];
  995. * for RV32: x=3...0
  996. * for RV64: x=7...0
  997. * ~~~
  998. *
  999. * \param [in] a unsigned long type of value stored in a
  1000. * \return value stored in unsigned long type
  1001. */
  1002. __STATIC_FORCEINLINE unsigned long __RV_CLZ8(unsigned long a)
  1003. {
  1004. unsigned long result;
  1005. __ASM volatile("clz8 %0, %1" : "=r"(result) : "r"(a));
  1006. return result;
  1007. }
  1008. /* ===== Inline Function End for 3.15. CLZ8 ===== */
  1009. /* ===== Inline Function Start for 3.16. CLZ16 ===== */
  1010. /**
  1011. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  1012. * \brief CLZ16 (SIMD 16-bit Count Leading Zero)
  1013. * \details
  1014. * **Type**: SIMD
  1015. *
  1016. * **Syntax**:\n
  1017. * ~~~
  1018. * CLZ16 Rd, Rs1
  1019. * ~~~
  1020. *
  1021. * **Purpose**:\n
  1022. * Count the number of leading zero bits of the 16-bit elements of a general register.
  1023. *
  1024. * **Description**:\n
  1025. * Starting from the most significant bits of the 16-bit elements of Rs1, this instruction
  1026. * counts the number of leading zero bits and writes the results to the corresponding 16-bit elements
  1027. * of Rd.
  1028. *
  1029. * **Operations**:\n
  1030. * ~~~
  1031. * snum[x] = Rs1.H[x];
  1032. * cnt[x] = 0;
  1033. * for (i = 15 to 0) {
  1034. * if (snum[x](i) == 0) {
  1035. * cnt[x] = cnt[x] + 1;
  1036. * } else {
  1037. * break;
  1038. * }
  1039. * }
  1040. * Rd.H[x] = cnt[x];
  1041. * for RV32: x=1...0
  1042. * for RV64: x=3...0
  1043. * ~~~
  1044. *
  1045. * \param [in] a unsigned long type of value stored in a
  1046. * \return value stored in unsigned long type
  1047. */
  1048. __STATIC_FORCEINLINE unsigned long __RV_CLZ16(unsigned long a)
  1049. {
  1050. unsigned long result;
  1051. __ASM volatile("clz16 %0, %1" : "=r"(result) : "r"(a));
  1052. return result;
  1053. }
  1054. /* ===== Inline Function End for 3.16. CLZ16 ===== */
  1055. /* ===== Inline Function Start for 3.17. CLZ32 ===== */
  1056. /**
  1057. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
  1058. * \brief CLZ32 (SIMD 32-bit Count Leading Zero)
  1059. * \details
  1060. * **Type**: SIMD
  1061. *
  1062. * **Syntax**:\n
  1063. * ~~~
  1064. * CLZ32 Rd, Rs1
  1065. * ~~~
  1066. *
  1067. * **Purpose**:\n
  1068. * Count the number of leading zero bits of the 32-bit elements of a general register.
  1069. *
  1070. * **Description**:\n
  1071. * Starting from the most significant bits of the 32-bit elements of Rs1, this instruction
  1072. * counts the number of leading zero bits and writes the results to the corresponding 32-bit elements
  1073. * of Rd.
  1074. *
  1075. * **Operations**:\n
  1076. * ~~~
  1077. * snum[x] = Rs1.W[x];
  1078. * cnt[x] = 0;
  1079. * for (i = 31 to 0) {
  1080. * if (snum[x](i) == 0) {
  1081. * cnt[x] = cnt[x] + 1;
  1082. * } else {
  1083. * break;
  1084. * }
  1085. * }
  1086. * Rd.W[x] = cnt[x];
  1087. * for RV32: x=0
  1088. * for RV64: x=1...0
  1089. * ~~~
  1090. *
  1091. * \param [in] a unsigned long type of value stored in a
  1092. * \return value stored in unsigned long type
  1093. */
  1094. __STATIC_FORCEINLINE unsigned long __RV_CLZ32(unsigned long a)
  1095. {
  1096. unsigned long result;
  1097. __ASM volatile("clz32 %0, %1" : "=r"(result) : "r"(a));
  1098. return result;
  1099. }
  1100. /* ===== Inline Function End for 3.17. CLZ32 ===== */
  1101. /* ===== Inline Function Start for 3.18. CMPEQ8 ===== */
  1102. /**
  1103. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
  1104. * \brief CMPEQ8 (SIMD 8-bit Integer Compare Equal)
  1105. * \details
  1106. * **Type**: SIMD
  1107. *
  1108. * **Syntax**:\n
  1109. * ~~~
  1110. * CMPEQ8 Rs, Rs1, Rs2
  1111. * ~~~
  1112. *
  1113. * **Purpose**:\n
  1114. * Do 8-bit integer elements equal comparisons simultaneously.
  1115. *
  1116. * **Description**:\n
  1117. * This instruction compares the 8-bit integer elements in Rs1 with the 8-bit integer
  1118. * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFF; otherwise, the result is
  1119. * 0x0. The 8-bit element comparison results are written to Rd.
  1120. *
  1121. * **Note**:\n
  1122. * This instruction can be used for either signed or unsigned numbers.
  1123. *
  1124. * **Operations**:\n
  1125. * ~~~
  1126. * Rd.B[x] = (Rs1.B[x] == Rs2.B[x])? 0xff : 0x0;
  1127. * for RV32: x=3...0,
  1128. * for RV64: x=7...0
  1129. * ~~~
  1130. *
  1131. * \param [in] a unsigned long type of value stored in a
  1132. * \param [in] b unsigned long type of value stored in b
  1133. * \return value stored in unsigned long type
  1134. */
  1135. __STATIC_FORCEINLINE unsigned long __RV_CMPEQ8(unsigned long a, unsigned long b)
  1136. {
  1137. unsigned long result;
  1138. __ASM volatile("cmpeq8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1139. return result;
  1140. }
  1141. /* ===== Inline Function End for 3.18. CMPEQ8 ===== */
  1142. /* ===== Inline Function Start for 3.19. CMPEQ16 ===== */
  1143. /**
  1144. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
  1145. * \brief CMPEQ16 (SIMD 16-bit Integer Compare Equal)
  1146. * \details
  1147. * **Type**: SIMD
  1148. *
  1149. * **Syntax**:\n
  1150. * ~~~
  1151. * CMPEQ16 Rd, Rs1, Rs2
  1152. * ~~~
  1153. *
  1154. * **Purpose**:\n
  1155. * Do 16-bit integer elements equal comparisons simultaneously.
  1156. *
  1157. * **Description**:\n
  1158. * This instruction compares the 16-bit integer elements in Rs1 with the 16-bit integer
  1159. * elements in Rs2 to see if they are equal. If they are equal, the result is 0xFFFF; otherwise, the result
  1160. * is 0x0. The 16-bit element comparison results are written to Rt.
  1161. *
  1162. * **Note**:\n
  1163. * This instruction can be used for either signed or unsigned numbers.
  1164. *
  1165. * **Operations**:\n
  1166. * ~~~
  1167. * Rd.H[x] = (Rs1.H[x] == Rs2.H[x])? 0xffff : 0x0;
  1168. * for RV32: x=1...0,
  1169. * for RV64: x=3...0
  1170. * ~~~
  1171. *
  1172. * \param [in] a unsigned long type of value stored in a
  1173. * \param [in] b unsigned long type of value stored in b
  1174. * \return value stored in unsigned long type
  1175. */
  1176. __STATIC_FORCEINLINE unsigned long __RV_CMPEQ16(unsigned long a, unsigned long b)
  1177. {
  1178. unsigned long result;
  1179. __ASM volatile("cmpeq16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1180. return result;
  1181. }
  1182. /* ===== Inline Function End for 3.19. CMPEQ16 ===== */
  1183. /* ===== Inline Function Start for 3.20. CRAS16 ===== */
  1184. /**
  1185. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  1186. * \brief CRAS16 (SIMD 16-bit Cross Addition & Subtraction)
  1187. * \details
  1188. * **Type**: SIMD
  1189. *
  1190. * **Syntax**:\n
  1191. * ~~~
  1192. * CRAS16 Rd, Rs1, Rs2
  1193. * ~~~
  1194. *
  1195. * **Purpose**:\n
  1196. * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit
  1197. * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
  1198. *
  1199. * **Description**:\n
  1200. * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with
  1201. * the 16-bit integer element in [15:0] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit
  1202. * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [31:16] of 32-bit chunks in
  1203. * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32-
  1204. * bit chunks in Rd.
  1205. *
  1206. * **Note**:\n
  1207. * This instruction can be used for either signed or unsigned operations.
  1208. *
  1209. * **Operations**:\n
  1210. * ~~~
  1211. * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][15:0];
  1212. * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][31:16];
  1213. * for RV32, x=0
  1214. * for RV64, x=1...0
  1215. * ~~~
  1216. *
  1217. * \param [in] a unsigned long type of value stored in a
  1218. * \param [in] b unsigned long type of value stored in b
  1219. * \return value stored in unsigned long type
  1220. */
  1221. __STATIC_FORCEINLINE unsigned long __RV_CRAS16(unsigned long a, unsigned long b)
  1222. {
  1223. unsigned long result;
  1224. __ASM volatile("cras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1225. return result;
  1226. }
  1227. /* ===== Inline Function End for 3.20. CRAS16 ===== */
  1228. /* ===== Inline Function Start for 3.21. CRSA16 ===== */
  1229. /**
  1230. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  1231. * \brief CRSA16 (SIMD 16-bit Cross Subtraction & Addition)
  1232. * \details
  1233. * **Type**: SIMD
  1234. *
  1235. * **Syntax**:\n
  1236. * ~~~
  1237. * CRSA16 Rd, Rs1, Rs2
  1238. * ~~~
  1239. *
  1240. * **Purpose**:\n
  1241. * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit
  1242. * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
  1243. *
  1244. * **Description**:\n
  1245. * This instruction subtracts the 16-bit integer element in [15:0] of 32-bit chunks in Rs2
  1246. * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of
  1247. * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [31:16] of 32-bit chunks
  1248. * in Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to
  1249. * [15:0] of 32-bit chunks in Rd.
  1250. *
  1251. * **Note**:\n
  1252. * This instruction can be used for either signed or unsigned operations.
  1253. *
  1254. * **Operations**:\n
  1255. * ~~~
  1256. * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][15:0];
  1257. * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][31:16];
  1258. * for RV32, x=0
  1259. * for RV64, x=1...0
  1260. * ~~~
  1261. *
  1262. * \param [in] a unsigned long type of value stored in a
  1263. * \param [in] b unsigned long type of value stored in b
  1264. * \return value stored in unsigned long type
  1265. */
  1266. __STATIC_FORCEINLINE unsigned long __RV_CRSA16(unsigned long a, unsigned long b)
  1267. {
  1268. unsigned long result;
  1269. __ASM volatile("crsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1270. return result;
  1271. }
  1272. /* ===== Inline Function End for 3.21. CRSA16 ===== */
  1273. /* ===== Inline Function Start for 3.22. INSB ===== */
  1274. /**
  1275. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  1276. * \brief INSB (Insert Byte)
  1277. * \details
  1278. * **Type**: DSP
  1279. *
  1280. * **Syntax**:\n
  1281. * ~~~
  1282. * (RV32) INSB Rd, Rs1, imm[1:0]
  1283. * (RV64) INSB Rd, Rs1, imm[2:0]
  1284. * ~~~
  1285. *
  1286. * **Purpose**:\n
  1287. * Insert byte 0 of a 32-bit or 64-bit register into one of the byte elements of another register.
  1288. *
  1289. * **Description**:\n
  1290. * This instruction inserts byte 0 of Rs1 into byte `imm[1:0]` (RV32) or `imm[2:0]` (RV64)
  1291. * of Rd.
  1292. *
  1293. * **Operations**:\n
  1294. * ~~~
  1295. * bpos = imm[1:0]; (RV32)
  1296. * bpos = imm[2:0]; (RV64)
  1297. * Rd.B[bpos] = Rs1.B[0]
  1298. * ~~~
  1299. *
  1300. * \param [in] t unsigned long type of value stored in t
  1301. * \param [in] a unsigned long type of value stored in a
  1302. * \param [in] b unsigned long type of value stored in b
  1303. * \return value stored in unsigned long type
  1304. */
  1305. #define __RV_INSB(t, a, b) \
  1306. ({ \
  1307. unsigned long __t = (unsigned long)(t); \
  1308. unsigned long __a = (unsigned long)(a); \
  1309. __ASM volatile("insb %0, %1, %2" : "+r"(__t) : "r"(__a), "K"(b)); \
  1310. __t; \
  1311. })
  1312. /* ===== Inline Function End for 3.22. INSB ===== */
  1313. /* ===== Inline Function Start for 3.23. KABS8 ===== */
  1314. /**
  1315. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  1316. * \brief KABS8 (SIMD 8-bit Saturating Absolute)
  1317. * \details
  1318. * **Type**: SIMD
  1319. *
  1320. * **Syntax**:\n
  1321. * ~~~
  1322. * KABS8 Rd, Rs1
  1323. * ~~~
  1324. *
  1325. * **Purpose**:\n
  1326. * Get the absolute value of 8-bit signed integer elements simultaneously.
  1327. *
  1328. * **Description**:\n
  1329. * This instruction calculates the absolute value of 8-bit signed integer elements stored
  1330. * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates
  1331. * 0x7f as the output and sets the OV bit to 1.
  1332. *
  1333. * **Operations**:\n
  1334. * ~~~
  1335. * src = Rs1.B[x];
  1336. * if (src == 0x80) {
  1337. * src = 0x7f;
  1338. * OV = 1;
  1339. * } else if (src[7] == 1)
  1340. * src = -src;
  1341. * }
  1342. * Rd.B[x] = src;
  1343. * for RV32: x=3...0,
  1344. * for RV64: x=7...0
  1345. * ~~~
  1346. *
  1347. * \param [in] a unsigned long type of value stored in a
  1348. * \return value stored in unsigned long type
  1349. */
  1350. __STATIC_FORCEINLINE unsigned long __RV_KABS8(unsigned long a)
  1351. {
  1352. unsigned long result;
  1353. __ASM volatile("kabs8 %0, %1" : "=r"(result) : "r"(a));
  1354. return result;
  1355. }
  1356. /* ===== Inline Function End for 3.23. KABS8 ===== */
  1357. /* ===== Inline Function Start for 3.24. KABS16 ===== */
  1358. /**
  1359. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  1360. * \brief KABS16 (SIMD 16-bit Saturating Absolute)
  1361. * \details
  1362. * **Type**: SIMD
  1363. *
  1364. * **Syntax**:\n
  1365. * ~~~
  1366. * KABS16 Rd, Rs1
  1367. * ~~~
  1368. *
  1369. * **Purpose**:\n
  1370. * Get the absolute value of 16-bit signed integer elements simultaneously.
  1371. *
  1372. * **Description**:\n
  1373. * This instruction calculates the absolute value of 16-bit signed integer elements stored
  1374. * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction
  1375. * generates 0x7fff as the output and sets the OV bit to 1.
  1376. *
  1377. * **Operations**:\n
  1378. * ~~~
  1379. * src = Rs1.H[x];
  1380. * if (src == 0x8000) {
  1381. * src = 0x7fff;
  1382. * OV = 1;
  1383. * } else if (src[15] == 1)
  1384. * src = -src;
  1385. * }
  1386. * Rd.H[x] = src;
  1387. * for RV32: x=1...0,
  1388. * for RV64: x=3...0
  1389. * ~~~
  1390. *
  1391. * \param [in] a unsigned long type of value stored in a
  1392. * \return value stored in unsigned long type
  1393. */
  1394. __STATIC_FORCEINLINE unsigned long __RV_KABS16(unsigned long a)
  1395. {
  1396. unsigned long result;
  1397. __ASM volatile("kabs16 %0, %1" : "=r"(result) : "r"(a));
  1398. return result;
  1399. }
  1400. /* ===== Inline Function End for 3.24. KABS16 ===== */
  1401. /* ===== Inline Function Start for 3.25. KABSW ===== */
  1402. /**
  1403. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  1404. * \brief KABSW (Scalar 32-bit Absolute Value with Saturation)
  1405. * \details
  1406. * **Type**: DSP
  1407. *
  1408. * **Syntax**:\n
  1409. * ~~~
  1410. * KABSW Rd, Rs1
  1411. * ~~~
  1412. *
  1413. * **Purpose**:\n
  1414. * Get the absolute value of a signed 32-bit integer in a general register.
  1415. *
  1416. * **Description**:\n
  1417. * This instruction calculates the absolute value of a signed 32-bit integer stored in Rs1.
  1418. * The result is sign-extended (for RV64) and written to Rd. This instruction with the minimum
  1419. * negative integer input of 0x80000000 will produce a saturated output of maximum positive integer
  1420. * of 0x7fffffff and the OV flag will be set to 1.
  1421. *
  1422. * **Operations**:\n
  1423. * ~~~
  1424. * if (Rs1.W[0] >= 0) {
  1425. * res = Rs1.W[0];
  1426. * } else {
  1427. * If (Rs1.W[0] == 0x80000000) {
  1428. * res = 0x7fffffff;
  1429. * OV = 1;
  1430. * } else {
  1431. * res = -Rs1.W[0];
  1432. * }
  1433. * }
  1434. * Rd = SE32(res);
  1435. * ~~~
  1436. *
  1437. * \param [in] a signed long type of value stored in a
  1438. * \return value stored in unsigned long type
  1439. */
  1440. __STATIC_FORCEINLINE unsigned long __RV_KABSW(signed long a)
  1441. {
  1442. unsigned long result;
  1443. __ASM volatile("kabsw %0, %1" : "=r"(result) : "r"(a));
  1444. return result;
  1445. }
  1446. /* ===== Inline Function End for 3.25. KABSW ===== */
  1447. /* ===== Inline Function Start for 3.26. KADD8 ===== */
  1448. /**
  1449. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  1450. * \brief KADD8 (SIMD 8-bit Signed Saturating Addition)
  1451. * \details
  1452. * **Type**: SIMD
  1453. *
  1454. * **Syntax**:\n
  1455. * ~~~
  1456. * KADD8 Rd, Rs1, Rs2
  1457. * ~~~
  1458. *
  1459. * **Purpose**:\n
  1460. * Do 8-bit signed integer element saturating additions simultaneously.
  1461. *
  1462. * **Description**:\n
  1463. * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
  1464. * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they
  1465. * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
  1466. *
  1467. * **Operations**:\n
  1468. * ~~~
  1469. * res[x] = Rs1.B[x] + Rs2.B[x];
  1470. * if (res[x] > 127) {
  1471. * res[x] = 127;
  1472. * OV = 1;
  1473. * } else if (res[x] < -128) {
  1474. * res[x] = -128;
  1475. * OV = 1;
  1476. * }
  1477. * Rd.B[x] = res[x];
  1478. * for RV32: x=3...0,
  1479. * for RV64: x=7...0
  1480. * ~~~
  1481. *
  1482. * \param [in] a unsigned long type of value stored in a
  1483. * \param [in] b unsigned long type of value stored in b
  1484. * \return value stored in unsigned long type
  1485. */
  1486. __STATIC_FORCEINLINE unsigned long __RV_KADD8(unsigned long a, unsigned long b)
  1487. {
  1488. unsigned long result;
  1489. __ASM volatile("kadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1490. return result;
  1491. }
  1492. /* ===== Inline Function End for 3.26. KADD8 ===== */
  1493. /* ===== Inline Function Start for 3.27. KADD16 ===== */
  1494. /**
  1495. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  1496. * \brief KADD16 (SIMD 16-bit Signed Saturating Addition)
  1497. * \details
  1498. * **Type**: SIMD
  1499. *
  1500. * **Syntax**:\n
  1501. * ~~~
  1502. * KADD16 Rd, Rs1, Rs2
  1503. * ~~~
  1504. *
  1505. * **Purpose**:\n
  1506. * Do 16-bit signed integer element saturating additions simultaneously.
  1507. *
  1508. * **Description**:\n
  1509. * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
  1510. * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1),
  1511. * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
  1512. *
  1513. * **Operations**:\n
  1514. * ~~~
  1515. * res[x] = Rs1.H[x] + Rs2.H[x];
  1516. * if (res[x] > 32767) {
  1517. * res[x] = 32767;
  1518. * OV = 1;
  1519. * } else if (res[x] < -32768) {
  1520. * res[x] = -32768;
  1521. * OV = 1;
  1522. * }
  1523. * Rd.H[x] = res[x];
  1524. * for RV32: x=1...0,
  1525. * for RV64: x=3...0
  1526. * ~~~
  1527. *
  1528. * \param [in] a unsigned long type of value stored in a
  1529. * \param [in] b unsigned long type of value stored in b
  1530. * \return value stored in unsigned long type
  1531. */
  1532. __STATIC_FORCEINLINE unsigned long __RV_KADD16(unsigned long a, unsigned long b)
  1533. {
  1534. unsigned long result;
  1535. __ASM volatile("kadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1536. return result;
  1537. }
  1538. /* ===== Inline Function End for 3.27. KADD16 ===== */
  1539. /* ===== Inline Function Start for 3.28. KADD64 ===== */
  1540. /**
  1541. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  1542. * \brief KADD64 (64-bit Signed Saturating Addition)
  1543. * \details
  1544. * **Type**: DSP (64-bit Profile)
  1545. *
  1546. * **Syntax**:\n
  1547. * ~~~
  1548. * KADD64 Rd, Rs1, Rs2
  1549. * ~~~
  1550. *
  1551. * **Purpose**:\n
  1552. * Add two 64-bit signed integers. The result is saturated to the Q63 range.
  1553. *
  1554. * **RV32 Description**:\n
  1555. * This instruction adds the 64-bit signed integer of an even/odd pair of registers
  1556. * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by
  1557. * Rs2(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the
  1558. * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers
  1559. * specified by Rd(4,1).
  1560. * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
  1561. * pair includes register 2d and 2d+1.
  1562. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  1563. * of the pair contains the low 32-bit of the result.
  1564. *
  1565. * **RV64 Description**:\n
  1566. * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed
  1567. * integer in Rs2. If the result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the
  1568. * range and the OV bit is set to 1. The saturated result is written to Rd.
  1569. *
  1570. * **Operations**:\n
  1571. * ~~~
  1572. * RV32:
  1573. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  1574. * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
  1575. * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
  1576. * result = R[a_H].R[a_L] + R[b_H].R[b_L];
  1577. * if (result > (2^63)-1) {
  1578. * result = (2^63)-1; OV = 1;
  1579. * } else if (result < -2^63) {
  1580. * result = -2^63; OV = 1;
  1581. * }
  1582. * R[t_H].R[t_L] = result;
  1583. * RV64:
  1584. * result = Rs1 + Rs2;
  1585. * if (result > (2^63)-1) {
  1586. * result = (2^63)-1; OV = 1;
  1587. * } else if (result < -2^63) {
  1588. * result = -2^63; OV = 1;
  1589. * }
  1590. * Rd = result;
  1591. * ~~~
  1592. *
  1593. * \param [in] a long long type of value stored in a
  1594. * \param [in] b long long type of value stored in b
  1595. * \return value stored in long long type
  1596. */
  1597. __STATIC_FORCEINLINE long long __RV_KADD64(long long a, long long b)
  1598. {
  1599. long long result;
  1600. __ASM volatile("kadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1601. return result;
  1602. }
  1603. /* ===== Inline Function End for 3.28. KADD64 ===== */
  1604. /* ===== Inline Function Start for 3.29. KADDH ===== */
  1605. /**
  1606. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
  1607. * \brief KADDH (Signed Addition with Q15 Saturation)
  1608. * \details
  1609. * **Type**: DSP
  1610. *
  1611. * **Syntax**:\n
  1612. * ~~~
  1613. * KADDH Rd, Rs1, Rs2
  1614. * ~~~
  1615. *
  1616. * **Purpose**:\n
  1617. * Add the signed lower 32-bit content of two registers with Q15 saturation.
  1618. *
  1619. * **Description**:\n
  1620. * The signed lower 32-bit content of Rs1 is added with the signed lower 32-bit content of
  1621. * Rs2. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then sign-
  1622. * extended and written to Rd. If saturation happens, this instruction sets the OV flag.
  1623. *
  1624. * **Operations**:\n
  1625. * ~~~
  1626. * tmp = Rs1.W[0] + Rs2.W[0];
  1627. * if (tmp > 32767) {
  1628. * res = 32767;
  1629. * OV = 1;
  1630. * } else if (tmp < -32768) {
  1631. * res = -32768;
  1632. * OV = 1
  1633. * } else {
  1634. * res = tmp;
  1635. * }
  1636. * Rd = SE(tmp[15:0]);
  1637. * ~~~
  1638. *
  1639. * \param [in] a int type of value stored in a
  1640. * \param [in] b int type of value stored in b
  1641. * \return value stored in long type
  1642. */
  1643. __STATIC_FORCEINLINE long __RV_KADDH(int a, int b)
  1644. {
  1645. long result;
  1646. __ASM volatile("kaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1647. return result;
  1648. }
  1649. /* ===== Inline Function End for 3.29. KADDH ===== */
  1650. /* ===== Inline Function Start for 3.30. KADDW ===== */
  1651. /**
  1652. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  1653. * \brief KADDW (Signed Addition with Q31 Saturation)
  1654. * \details
  1655. * **Type**: DSP
  1656. *
  1657. * **Syntax**:\n
  1658. * ~~~
  1659. * KADDW Rd, Rs1, Rs2
  1660. * ~~~
  1661. *
  1662. * **Purpose**:\n
  1663. * Add the lower 32-bit signed content of two registers with Q31 saturation.
  1664. *
  1665. * **Description**:\n
  1666. * The lower 32-bit signed content of Rs1 is added with the lower 32-bit signed content of
  1667. * Rs2. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then sign-
  1668. * extended and written to Rd. If saturation happens, this instruction sets the OV flag.
  1669. *
  1670. * **Operations**:\n
  1671. * ~~~
  1672. * tmp = Rs1.W[0] + Rs2.W[0];
  1673. * if (tmp > (2^31)-1) {
  1674. * res = (2^31)-1;
  1675. * OV = 1;
  1676. * } else if (tmp < -2^31) {
  1677. * res = -2^31;
  1678. * OV = 1
  1679. * } else {
  1680. * res = tmp;
  1681. * }
  1682. * Rd = res[31:0]; // RV32
  1683. * Rd = SE(res[31:0]) // RV64
  1684. * ~~~
  1685. *
  1686. * \param [in] a int type of value stored in a
  1687. * \param [in] b int type of value stored in b
  1688. * \return value stored in long type
  1689. */
  1690. __STATIC_FORCEINLINE long __RV_KADDW(int a, int b)
  1691. {
  1692. long result;
  1693. __ASM volatile("kaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1694. return result;
  1695. }
  1696. /* ===== Inline Function End for 3.30. KADDW ===== */
  1697. /* ===== Inline Function Start for 3.31. KCRAS16 ===== */
  1698. /**
  1699. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  1700. * \brief KCRAS16 (SIMD 16-bit Signed Saturating Cross Addition & Subtraction)
  1701. * \details
  1702. * **Type**: SIMD
  1703. *
  1704. * **Syntax**:\n
  1705. * ~~~
  1706. * KCRAS16 Rd, Rs1, Rs2
  1707. * ~~~
  1708. *
  1709. * **Purpose**:\n
  1710. * Do 16-bit signed integer element saturating addition and 16-bit signed integer element
  1711. * saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-
  1712. * bit chunks.
  1713. *
  1714. * **Description**:\n
  1715. * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
  1716. * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it
  1717. * subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed
  1718. * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
  1719. * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
  1720. * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for
  1721. * subtraction.
  1722. *
  1723. * **Operations**:\n
  1724. * ~~~
  1725. * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
  1726. * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
  1727. * for (res in [res1, res2]) {
  1728. * if (res > (2^15)-1) {
  1729. * res = (2^15)-1;
  1730. * OV = 1;
  1731. * } else if (res < -2^15) {
  1732. * res = -2^15;
  1733. * OV = 1;
  1734. * }
  1735. * }
  1736. * Rd.W[x][31:16] = res1;
  1737. * Rd.W[x][15:0] = res2;
  1738. * for RV32, x=0
  1739. * for RV64, x=1...0
  1740. * ~~~
  1741. *
  1742. * \param [in] a unsigned long type of value stored in a
  1743. * \param [in] b unsigned long type of value stored in b
  1744. * \return value stored in unsigned long type
  1745. */
  1746. __STATIC_FORCEINLINE unsigned long __RV_KCRAS16(unsigned long a, unsigned long b)
  1747. {
  1748. unsigned long result;
  1749. __ASM volatile("kcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1750. return result;
  1751. }
  1752. /* ===== Inline Function End for 3.31. KCRAS16 ===== */
  1753. /* ===== Inline Function Start for 3.32. KCRSA16 ===== */
  1754. /**
  1755. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  1756. * \brief KCRSA16 (SIMD 16-bit Signed Saturating Cross Subtraction & Addition)
  1757. * \details
  1758. * **Type**: SIMD
  1759. *
  1760. * **Syntax**:\n
  1761. * ~~~
  1762. * KCRSA16 Rd, Rs1, Rs2
  1763. * ~~~
  1764. *
  1765. * **Purpose**:\n
  1766. * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element
  1767. * saturating addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit
  1768. * chunks.
  1769. *
  1770. * **Description**:\n
  1771. * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks
  1772. * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it
  1773. * adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 with the 16-bit signed
  1774. * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
  1775. * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
  1776. * results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd
  1777. * for addition.
  1778. *
  1779. * **Operations**:\n
  1780. * ~~~
  1781. * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
  1782. * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
  1783. * for (res in [res1, res2]) {
  1784. * if (res > (2^15)-1) {
  1785. * res = (2^15)-1;
  1786. * OV = 1;
  1787. * } else if (res < -2^15) {
  1788. * res = -2^15;
  1789. * OV = 1;
  1790. * }
  1791. * }
  1792. * Rd.W[x][31:16] = res1;
  1793. * Rd.W[x][15:0] = res2;
  1794. * for RV32, x=0
  1795. * for RV64, x=1...0
  1796. * ~~~
  1797. *
  1798. * \param [in] a unsigned long type of value stored in a
  1799. * \param [in] b unsigned long type of value stored in b
  1800. * \return value stored in unsigned long type
  1801. */
  1802. __STATIC_FORCEINLINE unsigned long __RV_KCRSA16(unsigned long a, unsigned long b)
  1803. {
  1804. unsigned long result;
  1805. __ASM volatile("kcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1806. return result;
  1807. }
  1808. /* ===== Inline Function End for 3.32. KCRSA16 ===== */
  1809. /* ===== Inline Function Start for 3.33.1. KDMBB ===== */
  1810. /**
  1811. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  1812. * \brief KDMBB (Signed Saturating Double Multiply B16 x B16)
  1813. * \details
  1814. * **Type**: DSP
  1815. *
  1816. * **Syntax**:\n
  1817. * ~~~
  1818. * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  1819. * ~~~
  1820. *
  1821. * **Purpose**:\n
  1822. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  1823. * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
  1824. * written into the destination register for RV32 or sign-extended to 64-bits and written into the
  1825. * destination register for RV64. If saturation happens, an overflow flag OV will be set.
  1826. *
  1827. * **Description**:\n
  1828. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  1829. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
  1830. * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
  1831. * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
  1832. * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
  1833. *
  1834. * **Operations**:\n
  1835. * ~~~
  1836. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
  1837. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
  1838. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
  1839. * If (0x8000 != aop | 0x8000 != bop) {
  1840. * Mresult = aop * bop;
  1841. * resQ31 = Mresult << 1;
  1842. * Rd = resQ31; // RV32
  1843. * Rd = SE(resQ31); // RV64
  1844. * } else {
  1845. * resQ31 = 0x7FFFFFFF;
  1846. * Rd = resQ31; // RV32
  1847. * Rd = SE(resQ31); // RV64
  1848. * OV = 1;
  1849. * }
  1850. * ~~~
  1851. *
  1852. * \param [in] a unsigned int type of value stored in a
  1853. * \param [in] b unsigned int type of value stored in b
  1854. * \return value stored in long type
  1855. */
  1856. __STATIC_FORCEINLINE long __RV_KDMBB(unsigned int a, unsigned int b)
  1857. {
  1858. long result;
  1859. __ASM volatile("kdmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1860. return result;
  1861. }
  1862. /* ===== Inline Function End for 3.33.1. KDMBB ===== */
  1863. /* ===== Inline Function Start for 3.33.2. KDMBT ===== */
  1864. /**
  1865. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  1866. * \brief KDMBT (Signed Saturating Double Multiply B16 x T16)
  1867. * \details
  1868. * **Type**: DSP
  1869. *
  1870. * **Syntax**:\n
  1871. * ~~~
  1872. * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  1873. * ~~~
  1874. *
  1875. * **Purpose**:\n
  1876. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  1877. * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
  1878. * written into the destination register for RV32 or sign-extended to 64-bits and written into the
  1879. * destination register for RV64. If saturation happens, an overflow flag OV will be set.
  1880. *
  1881. * **Description**:\n
  1882. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  1883. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
  1884. * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
  1885. * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
  1886. * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
  1887. *
  1888. * **Operations**:\n
  1889. * ~~~
  1890. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
  1891. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
  1892. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
  1893. * If (0x8000 != aop | 0x8000 != bop) {
  1894. * Mresult = aop * bop;
  1895. * resQ31 = Mresult << 1;
  1896. * Rd = resQ31; // RV32
  1897. * Rd = SE(resQ31); // RV64
  1898. * } else {
  1899. * resQ31 = 0x7FFFFFFF;
  1900. * Rd = resQ31; // RV32
  1901. * Rd = SE(resQ31); // RV64
  1902. * OV = 1;
  1903. * }
  1904. * ~~~
  1905. *
  1906. * \param [in] a unsigned int type of value stored in a
  1907. * \param [in] b unsigned int type of value stored in b
  1908. * \return value stored in long type
  1909. */
  1910. __STATIC_FORCEINLINE long __RV_KDMBT(unsigned int a, unsigned int b)
  1911. {
  1912. long result;
  1913. __ASM volatile("kdmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1914. return result;
  1915. }
  1916. /* ===== Inline Function End for 3.33.2. KDMBT ===== */
  1917. /* ===== Inline Function Start for 3.33.3. KDMTT ===== */
  1918. /**
  1919. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  1920. * \brief KDMTT (Signed Saturating Double Multiply T16 x T16)
  1921. * \details
  1922. * **Type**: DSP
  1923. *
  1924. * **Syntax**:\n
  1925. * ~~~
  1926. * KDMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  1927. * ~~~
  1928. *
  1929. * **Purpose**:\n
  1930. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  1931. * of the lower 32-bit chunk in registers and then double and saturate the Q31 result. The result is
  1932. * written into the destination register for RV32 or sign-extended to 64-bits and written into the
  1933. * destination register for RV64. If saturation happens, an overflow flag OV will be set.
  1934. *
  1935. * **Description**:\n
  1936. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  1937. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
  1938. * doubled and saturated into a Q31 value. The Q31 value is then written into Rd (sign-extended in
  1939. * RV64). When both the two Q15 inputs are 0x8000, saturation will happen. The result will be
  1940. * saturated to 0x7FFFFFFF and the overflow flag OV will be set.
  1941. *
  1942. * **Operations**:\n
  1943. * ~~~
  1944. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMBB
  1945. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMBT
  1946. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMTT
  1947. * If (0x8000 != aop | 0x8000 != bop) {
  1948. * Mresult = aop * bop;
  1949. * resQ31 = Mresult << 1;
  1950. * Rd = resQ31; // RV32
  1951. * Rd = SE(resQ31); // RV64
  1952. * } else {
  1953. * resQ31 = 0x7FFFFFFF;
  1954. * Rd = resQ31; // RV32
  1955. * Rd = SE(resQ31); // RV64
  1956. * OV = 1;
  1957. * }
  1958. * ~~~
  1959. *
  1960. * \param [in] a unsigned int type of value stored in a
  1961. * \param [in] b unsigned int type of value stored in b
  1962. * \return value stored in long type
  1963. */
  1964. __STATIC_FORCEINLINE long __RV_KDMTT(unsigned int a, unsigned int b)
  1965. {
  1966. long result;
  1967. __ASM volatile("kdmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  1968. return result;
  1969. }
  1970. /* ===== Inline Function End for 3.33.3. KDMTT ===== */
  1971. /* ===== Inline Function Start for 3.34.1. KDMABB ===== */
  1972. /**
  1973. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  1974. * \brief KDMABB (Signed Saturating Double Multiply Addition B16 x B16)
  1975. * \details
  1976. * **Type**: DSP
  1977. *
  1978. * **Syntax**:\n
  1979. * ~~~
  1980. * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  1981. * ~~~
  1982. *
  1983. * **Purpose**:\n
  1984. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  1985. * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
  1986. * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
  1987. * result into the destination register. If saturation happens, an overflow flag OV will be set.
  1988. *
  1989. * **Description**:\n
  1990. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  1991. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
  1992. * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
  1993. * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
  1994. * the OV flag is set to 1. The result after saturation is written to Rd.
  1995. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
  1996. * set.
  1997. *
  1998. * **Operations**:\n
  1999. * ~~~
  2000. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
  2001. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
  2002. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
  2003. * If (0x8000 != aop | 0x8000 != bop) {
  2004. * Mresult = aop * bop;
  2005. * resQ31 = Mresult << 1;
  2006. * } else {
  2007. * resQ31 = 0x7FFFFFFF;
  2008. * OV = 1;
  2009. * }
  2010. * resadd = Rd + resQ31; // RV32
  2011. * resadd = Rd.W[0] + resQ31; // RV64
  2012. * if (resadd > (2^31)-1) {
  2013. * resadd = (2^31)-1;
  2014. * OV = 1;
  2015. * } else if (resadd < -2^31) {
  2016. * resadd = -2^31;
  2017. * OV = 1;
  2018. * }
  2019. * Rd = resadd; // RV32
  2020. * Rd = SE(resadd); // RV64
  2021. * ~~~
  2022. *
  2023. * \param [in] t long type of value stored in t
  2024. * \param [in] a unsigned int type of value stored in a
  2025. * \param [in] b unsigned int type of value stored in b
  2026. * \return value stored in long type
  2027. */
  2028. __STATIC_FORCEINLINE long __RV_KDMABB(long t, unsigned int a, unsigned int b)
  2029. {
  2030. __ASM volatile("kdmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2031. return t;
  2032. }
  2033. /* ===== Inline Function End for 3.34.1. KDMABB ===== */
  2034. /* ===== Inline Function Start for 3.34.2. KDMABT ===== */
  2035. /**
  2036. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  2037. * \brief KDMABT (Signed Saturating Double Multiply Addition B16 x T16)
  2038. * \details
  2039. * **Type**: DSP
  2040. *
  2041. * **Syntax**:\n
  2042. * ~~~
  2043. * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  2044. * ~~~
  2045. *
  2046. * **Purpose**:\n
  2047. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  2048. * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
  2049. * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
  2050. * result into the destination register. If saturation happens, an overflow flag OV will be set.
  2051. *
  2052. * **Description**:\n
  2053. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  2054. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
  2055. * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
  2056. * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
  2057. * the OV flag is set to 1. The result after saturation is written to Rd.
  2058. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
  2059. * set.
  2060. *
  2061. * **Operations**:\n
  2062. * ~~~
  2063. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
  2064. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
  2065. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
  2066. * If (0x8000 != aop | 0x8000 != bop) {
  2067. * Mresult = aop * bop;
  2068. * resQ31 = Mresult << 1;
  2069. * } else {
  2070. * resQ31 = 0x7FFFFFFF;
  2071. * OV = 1;
  2072. * }
  2073. * resadd = Rd + resQ31; // RV32
  2074. * resadd = Rd.W[0] + resQ31; // RV64
  2075. * if (resadd > (2^31)-1) {
  2076. * resadd = (2^31)-1;
  2077. * OV = 1;
  2078. * } else if (resadd < -2^31) {
  2079. * resadd = -2^31;
  2080. * OV = 1;
  2081. * }
  2082. * Rd = resadd; // RV32
  2083. * Rd = SE(resadd); // RV64
  2084. * ~~~
  2085. *
  2086. * \param [in] t long type of value stored in t
  2087. * \param [in] a unsigned int type of value stored in a
  2088. * \param [in] b unsigned int type of value stored in b
  2089. * \return value stored in long type
  2090. */
  2091. __STATIC_FORCEINLINE long __RV_KDMABT(long t, unsigned int a, unsigned int b)
  2092. {
  2093. __ASM volatile("kdmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2094. return t;
  2095. }
  2096. /* ===== Inline Function End for 3.34.2. KDMABT ===== */
  2097. /* ===== Inline Function Start for 3.34.3. KDMATT ===== */
  2098. /**
  2099. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  2100. * \brief KDMATT (Signed Saturating Double Multiply Addition T16 x T16)
  2101. * \details
  2102. * **Type**: DSP
  2103. *
  2104. * **Syntax**:\n
  2105. * ~~~
  2106. * KDMAxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  2107. * ~~~
  2108. *
  2109. * **Purpose**:\n
  2110. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  2111. * of the lower 32-bit chunk in registers and then double and saturate the Q31 result, add the result
  2112. * with the sign-extended lower 32-bit chunk destination register and write the saturated addition
  2113. * result into the destination register. If saturation happens, an overflow flag OV will be set.
  2114. *
  2115. * **Description**:\n
  2116. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  2117. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then
  2118. * doubled and saturated into a Q31 value. The Q31 value is then added with the content of Rd. If the
  2119. * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
  2120. * the OV flag is set to 1. The result after saturation is written to Rd.
  2121. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
  2122. * set.
  2123. *
  2124. * **Operations**:\n
  2125. * ~~~
  2126. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KDMABB
  2127. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KDMABT
  2128. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KDMATT
  2129. * If (0x8000 != aop | 0x8000 != bop) {
  2130. * Mresult = aop * bop;
  2131. * resQ31 = Mresult << 1;
  2132. * } else {
  2133. * resQ31 = 0x7FFFFFFF;
  2134. * OV = 1;
  2135. * }
  2136. * resadd = Rd + resQ31; // RV32
  2137. * resadd = Rd.W[0] + resQ31; // RV64
  2138. * if (resadd > (2^31)-1) {
  2139. * resadd = (2^31)-1;
  2140. * OV = 1;
  2141. * } else if (resadd < -2^31) {
  2142. * resadd = -2^31;
  2143. * OV = 1;
  2144. * }
  2145. * Rd = resadd; // RV32
  2146. * Rd = SE(resadd); // RV64
  2147. * ~~~
  2148. *
  2149. * \param [in] t long type of value stored in t
  2150. * \param [in] a unsigned int type of value stored in a
  2151. * \param [in] b unsigned int type of value stored in b
  2152. * \return value stored in long type
  2153. */
  2154. __STATIC_FORCEINLINE long __RV_KDMATT(long t, unsigned int a, unsigned int b)
  2155. {
  2156. __ASM volatile("kdmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2157. return t;
  2158. }
  2159. /* ===== Inline Function End for 3.34.3. KDMATT ===== */
  2160. /* ===== Inline Function Start for 3.35.1. KHM8 ===== */
  2161. /**
  2162. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
  2163. * \brief KHM8 (SIMD Signed Saturating Q7 Multiply)
  2164. * \details
  2165. * **Type**: SIMD
  2166. *
  2167. * **Syntax**:\n
  2168. * ~~~
  2169. * KHM8 Rd, Rs1, Rs2
  2170. * KHMX8 Rd, Rs1, Rs2
  2171. * ~~~
  2172. *
  2173. * **Purpose**:\n
  2174. * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
  2175. * numbers again.
  2176. *
  2177. * **Description**:\n
  2178. * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
  2179. * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
  2180. * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
  2181. * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
  2182. * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
  2183. * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
  2184. * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
  2185. * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
  2186. * The result will be saturated to 0x7F and the overflow flag OV will be set.
  2187. *
  2188. * **Operations**:\n
  2189. * ~~~
  2190. * if (is `KHM8`) {
  2191. * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
  2192. * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
  2193. * } else if (is `KHMX8`) {
  2194. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
  2195. * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
  2196. * }
  2197. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  2198. * if (0x80 != aop | 0x80 != bop) {
  2199. * res = (aop s* bop) >> 7;
  2200. * } else {
  2201. * res= 0x7F;
  2202. * OV = 1;
  2203. * }
  2204. * }
  2205. * Rd.H[x/2] = concat(rest, resb);
  2206. * for RV32, x=0,2
  2207. * for RV64, x=0,2,4,6
  2208. * ~~~
  2209. *
  2210. * \param [in] a unsigned long type of value stored in a
  2211. * \param [in] b unsigned long type of value stored in b
  2212. * \return value stored in unsigned long type
  2213. */
  2214. __STATIC_FORCEINLINE unsigned long __RV_KHM8(unsigned long a, unsigned long b)
  2215. {
  2216. unsigned long result;
  2217. __ASM volatile("khm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  2218. return result;
  2219. }
  2220. /* ===== Inline Function End for 3.35.1. KHM8 ===== */
  2221. /* ===== Inline Function Start for 3.35.2. KHMX8 ===== */
  2222. /**
  2223. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
  2224. * \brief KHMX8 (SIMD Signed Saturating Crossed Q7 Multiply)
  2225. * \details
  2226. * **Type**: SIMD
  2227. *
  2228. * **Syntax**:\n
  2229. * ~~~
  2230. * KHM8 Rd, Rs1, Rs2
  2231. * KHMX8 Rd, Rs1, Rs2
  2232. * ~~~
  2233. *
  2234. * **Purpose**:\n
  2235. * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
  2236. * numbers again.
  2237. *
  2238. * **Description**:\n
  2239. * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
  2240. * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
  2241. * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
  2242. * For the `KHMX16` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
  2243. * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
  2244. * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
  2245. * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
  2246. * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
  2247. * The result will be saturated to 0x7F and the overflow flag OV will be set.
  2248. *
  2249. * **Operations**:\n
  2250. * ~~~
  2251. * if (is `KHM8`) {
  2252. * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
  2253. * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
  2254. * } else if (is `KHMX8`) {
  2255. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
  2256. * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
  2257. * }
  2258. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  2259. * if (0x80 != aop | 0x80 != bop) {
  2260. * res = (aop s* bop) >> 7;
  2261. * } else {
  2262. * res= 0x7F;
  2263. * OV = 1;
  2264. * }
  2265. * }
  2266. * Rd.H[x/2] = concat(rest, resb);
  2267. * for RV32, x=0,2
  2268. * for RV64, x=0,2,4,6
  2269. * ~~~
  2270. *
  2271. * \param [in] a unsigned long type of value stored in a
  2272. * \param [in] b unsigned long type of value stored in b
  2273. * \return value stored in unsigned long type
  2274. */
  2275. __STATIC_FORCEINLINE unsigned long __RV_KHMX8(unsigned long a, unsigned long b)
  2276. {
  2277. unsigned long result;
  2278. __ASM volatile("khmx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  2279. return result;
  2280. }
  2281. /* ===== Inline Function End for 3.35.2. KHMX8 ===== */
  2282. /* ===== Inline Function Start for 3.36.1. KHM16 ===== */
  2283. /**
  2284. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
  2285. * \brief KHM16 (SIMD Signed Saturating Q15 Multiply)
  2286. * \details
  2287. * **Type**: SIMD
  2288. *
  2289. * **Syntax**:\n
  2290. * ~~~
  2291. * KHM16 Rd, Rs1, Rs2
  2292. * KHMX16 Rd, Rs1, Rs2
  2293. * ~~~
  2294. *
  2295. * **Purpose**:\n
  2296. * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
  2297. * Q15 numbers again.
  2298. *
  2299. * **Description**:\n
  2300. * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
  2301. * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
  2302. * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
  2303. * Rs2.
  2304. * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
  2305. * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
  2306. * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
  2307. * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
  2308. * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
  2309. * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
  2310. *
  2311. * **Operations**:\n
  2312. * ~~~
  2313. * if (is `KHM16`) {
  2314. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
  2315. * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
  2316. * } else if (is `KHMX16`) {
  2317. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
  2318. * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
  2319. * }
  2320. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  2321. * if (0x8000 != aop | 0x8000 != bop) {
  2322. * res = (aop s* bop) >> 15;
  2323. * } else {
  2324. * res= 0x7FFF;
  2325. * OV = 1;
  2326. * }
  2327. * }
  2328. * Rd.W[x/2] = concat(rest, resb);
  2329. * for RV32: x=0
  2330. * for RV64: x=0,2
  2331. * ~~~
  2332. *
  2333. * \param [in] a unsigned long type of value stored in a
  2334. * \param [in] b unsigned long type of value stored in b
  2335. * \return value stored in unsigned long type
  2336. */
  2337. __STATIC_FORCEINLINE unsigned long __RV_KHM16(unsigned long a, unsigned long b)
  2338. {
  2339. unsigned long result;
  2340. __ASM volatile("khm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  2341. return result;
  2342. }
  2343. /* ===== Inline Function End for 3.36.1. KHM16 ===== */
  2344. /* ===== Inline Function Start for 3.36.2. KHMX16 ===== */
  2345. /**
  2346. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
  2347. * \brief KHMX16 (SIMD Signed Saturating Crossed Q15 Multiply)
  2348. * \details
  2349. * **Type**: SIMD
  2350. *
  2351. * **Syntax**:\n
  2352. * ~~~
  2353. * KHM16 Rd, Rs1, Rs2
  2354. * KHMX16 Rd, Rs1, Rs2
  2355. * ~~~
  2356. *
  2357. * **Purpose**:\n
  2358. * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
  2359. * Q15 numbers again.
  2360. *
  2361. * **Description**:\n
  2362. * For the `KHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
  2363. * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
  2364. * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
  2365. * Rs2.
  2366. * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
  2367. * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
  2368. * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
  2369. * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
  2370. * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
  2371. * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
  2372. *
  2373. * **Operations**:\n
  2374. * ~~~
  2375. * if (is `KHM16`) {
  2376. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
  2377. * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
  2378. * } else if (is `KHMX16`) {
  2379. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // Rs1 top
  2380. * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // Rs1 bottom
  2381. * }
  2382. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  2383. * if (0x8000 != aop | 0x8000 != bop) {
  2384. * res = (aop s* bop) >> 15;
  2385. * } else {
  2386. * res= 0x7FFF;
  2387. * OV = 1;
  2388. * }
  2389. * }
  2390. * Rd.W[x/2] = concat(rest, resb);
  2391. * for RV32: x=0
  2392. * for RV64: x=0,2
  2393. * ~~~
  2394. *
  2395. * \param [in] a unsigned long type of value stored in a
  2396. * \param [in] b unsigned long type of value stored in b
  2397. * \return value stored in unsigned long type
  2398. */
  2399. __STATIC_FORCEINLINE unsigned long __RV_KHMX16(unsigned long a, unsigned long b)
  2400. {
  2401. unsigned long result;
  2402. __ASM volatile("khmx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  2403. return result;
  2404. }
  2405. /* ===== Inline Function End for 3.36.2. KHMX16 ===== */
  2406. /* ===== Inline Function Start for 3.37.1. KHMBB ===== */
  2407. /**
  2408. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
  2409. * \brief KHMBB (Signed Saturating Half Multiply B16 x B16)
  2410. * \details
  2411. * **Type**: DSP
  2412. *
  2413. * **Syntax**:\n
  2414. * ~~~
  2415. * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  2416. * ~~~
  2417. *
  2418. * **Purpose**:\n
  2419. * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
  2420. * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
  2421. * number again and saturate the Q15 result into the destination register. If saturation happens, an
  2422. * overflow flag OV will be set.
  2423. *
  2424. * **Description**:\n
  2425. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  2426. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
  2427. * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
  2428. * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
  2429. * to 0x7FFF and the overflow flag OV will be set.
  2430. *
  2431. * **Operations**:\n
  2432. * ~~~
  2433. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
  2434. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
  2435. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
  2436. * If (0x8000 != aop | 0x8000 != bop) {
  2437. * Mresult[31:0] = aop * bop;
  2438. * res[15:0] = Mresult[30:15];
  2439. * } else {
  2440. * res[15:0] = 0x7FFF;
  2441. * OV = 1;
  2442. * }
  2443. * Rd = SE32(res[15:0]); // Rv32
  2444. * Rd = SE64(res[15:0]); // RV64
  2445. * ~~~
  2446. *
  2447. * \param [in] a unsigned int type of value stored in a
  2448. * \param [in] b unsigned int type of value stored in b
  2449. * \return value stored in long type
  2450. */
  2451. __STATIC_FORCEINLINE long __RV_KHMBB(unsigned int a, unsigned int b)
  2452. {
  2453. long result;
  2454. __ASM volatile("khmbb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  2455. return result;
  2456. }
  2457. /* ===== Inline Function End for 3.37.1. KHMBB ===== */
  2458. /* ===== Inline Function Start for 3.37.2. KHMBT ===== */
  2459. /**
  2460. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
  2461. * \brief KHMBT (Signed Saturating Half Multiply B16 x T16)
  2462. * \details
  2463. * **Type**: DSP
  2464. *
  2465. * **Syntax**:\n
  2466. * ~~~
  2467. * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  2468. * ~~~
  2469. *
  2470. * **Purpose**:\n
  2471. * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
  2472. * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
  2473. * number again and saturate the Q15 result into the destination register. If saturation happens, an
  2474. * overflow flag OV will be set.
  2475. *
  2476. * **Description**:\n
  2477. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  2478. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
  2479. * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
  2480. * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
  2481. * to 0x7FFF and the overflow flag OV will be set.
  2482. *
  2483. * **Operations**:\n
  2484. * ~~~
  2485. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
  2486. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
  2487. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
  2488. * If (0x8000 != aop | 0x8000 != bop) {
  2489. * Mresult[31:0] = aop * bop;
  2490. * res[15:0] = Mresult[30:15];
  2491. * } else {
  2492. * res[15:0] = 0x7FFF;
  2493. * OV = 1;
  2494. * }
  2495. * Rd = SE32(res[15:0]); // Rv32
  2496. * Rd = SE64(res[15:0]); // RV64
  2497. * ~~~
  2498. *
  2499. * \param [in] a unsigned int type of value stored in a
  2500. * \param [in] b unsigned int type of value stored in b
  2501. * \return value stored in long type
  2502. */
  2503. __STATIC_FORCEINLINE long __RV_KHMBT(unsigned int a, unsigned int b)
  2504. {
  2505. long result;
  2506. __ASM volatile("khmbt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  2507. return result;
  2508. }
  2509. /* ===== Inline Function End for 3.37.2. KHMBT ===== */
  2510. /* ===== Inline Function Start for 3.37.3. KHMTT ===== */
  2511. /**
  2512. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
  2513. * \brief KHMTT (Signed Saturating Half Multiply T16 x T16)
  2514. * \details
  2515. * **Type**: DSP
  2516. *
  2517. * **Syntax**:\n
  2518. * ~~~
  2519. * KHMxy Rd, Rs1, Rs2 (xy = BB, BT, TT)
  2520. * ~~~
  2521. *
  2522. * **Purpose**:\n
  2523. * Multiply the signed Q15 number contents of two 16-bit data in the corresponding portion
  2524. * of the lower 32-bit chunk in registers and then right-shift 15 bits to turn the Q30 result into a Q15
  2525. * number again and saturate the Q15 result into the destination register. If saturation happens, an
  2526. * overflow flag OV will be set.
  2527. *
  2528. * **Description**:\n
  2529. * Multiply the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs1 with
  2530. * the top or bottom 16-bit Q15 content of the lower 32-bit portion in Rs2. The Q30 result is then right-
  2531. * shifted 15-bits and saturated into a Q15 value. The Q15 value is then sing-extended and written into
  2532. * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
  2533. * to 0x7FFF and the overflow flag OV will be set.
  2534. *
  2535. * **Operations**:\n
  2536. * ~~~
  2537. * aop = Rs1.H[0]; bop = Rs2.H[0]; // KHMBB
  2538. * aop = Rs1.H[0]; bop = Rs2.H[1]; // KHMBT
  2539. * aop = Rs1.H[1]; bop = Rs2.H[1]; // KHMTT
  2540. * If (0x8000 != aop | 0x8000 != bop) {
  2541. * Mresult[31:0] = aop * bop;
  2542. * res[15:0] = Mresult[30:15];
  2543. * } else {
  2544. * res[15:0] = 0x7FFF;
  2545. * OV = 1;
  2546. * }
  2547. * Rd = SE32(res[15:0]); // Rv32
  2548. * Rd = SE64(res[15:0]); // RV64
  2549. * ~~~
  2550. *
  2551. * \param [in] a unsigned int type of value stored in a
  2552. * \param [in] b unsigned int type of value stored in b
  2553. * \return value stored in long type
  2554. */
  2555. __STATIC_FORCEINLINE long __RV_KHMTT(unsigned int a, unsigned int b)
  2556. {
  2557. long result;
  2558. __ASM volatile("khmtt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  2559. return result;
  2560. }
  2561. /* ===== Inline Function End for 3.37.3. KHMTT ===== */
  2562. /* ===== Inline Function Start for 3.38.1. KMABB ===== */
  2563. /**
  2564. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  2565. * \brief KMABB (SIMD Saturating Signed Multiply Bottom Halfs & Add)
  2566. * \details
  2567. * **Type**: SIMD
  2568. *
  2569. * **Syntax**:\n
  2570. * ~~~
  2571. * KMABB Rd, Rs1, Rs2
  2572. * KMABT Rd, Rs1, Rs2
  2573. * KMATT Rd, Rs1, Rs2
  2574. * ~~~
  2575. *
  2576. * **Purpose**:\n
  2577. * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
  2578. * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
  2579. * third register. The addition result may be saturated and is written to the third register.
  2580. * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
  2581. * * KMABT rd.W[x] + bottom*top (per 32-bit element)
  2582. * * KMATT rd.W[x] + top*top (per 32-bit element)
  2583. *
  2584. * **Description**:\n
  2585. * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2586. * the bottom 16-bit content of 32-bit elements in Rs2.
  2587. * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2588. * the top 16-bit content of 32-bit elements in Rs2.
  2589. * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  2590. * top 16-bit content of 32-bit elements in Rs2.
  2591. * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
  2592. * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
  2593. * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
  2594. * signed integers.
  2595. *
  2596. * **Operations**:\n
  2597. * ~~~
  2598. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
  2599. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
  2600. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
  2601. * if (res[x] > (2^31)-1) {
  2602. * res[x] = (2^31)-1;
  2603. * OV = 1;
  2604. * } else if (res[x] < -2^31) {
  2605. * res[x] = -2^31;
  2606. * OV = 1;
  2607. * }
  2608. * Rd.W[x] = res[x];
  2609. * for RV32: x=0
  2610. * for RV64: x=1...0
  2611. * ~~~
  2612. *
  2613. * \param [in] t long type of value stored in t
  2614. * \param [in] a unsigned long type of value stored in a
  2615. * \param [in] b unsigned long type of value stored in b
  2616. * \return value stored in long type
  2617. */
  2618. __STATIC_FORCEINLINE long __RV_KMABB(long t, unsigned long a, unsigned long b)
  2619. {
  2620. __ASM volatile("kmabb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2621. return t;
  2622. }
  2623. /* ===== Inline Function End for 3.38.1. KMABB ===== */
  2624. /* ===== Inline Function Start for 3.38.2. KMABT ===== */
  2625. /**
  2626. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  2627. * \brief KMABT (SIMD Saturating Signed Multiply Bottom & Top Halfs & Add)
  2628. * \details
  2629. * **Type**: SIMD
  2630. *
  2631. * **Syntax**:\n
  2632. * ~~~
  2633. * KMABB Rd, Rs1, Rs2
  2634. * KMABT Rd, Rs1, Rs2
  2635. * KMATT Rd, Rs1, Rs2
  2636. * ~~~
  2637. *
  2638. * **Purpose**:\n
  2639. * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
  2640. * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
  2641. * third register. The addition result may be saturated and is written to the third register.
  2642. * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
  2643. * * KMABT rd.W[x] + bottom*top (per 32-bit element)
  2644. * * KMATT rd.W[x] + top*top (per 32-bit element)
  2645. *
  2646. * **Description**:\n
  2647. * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2648. * the bottom 16-bit content of 32-bit elements in Rs2.
  2649. * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2650. * the top 16-bit content of 32-bit elements in Rs2.
  2651. * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  2652. * top 16-bit content of 32-bit elements in Rs2.
  2653. * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
  2654. * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
  2655. * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
  2656. * signed integers.
  2657. *
  2658. * **Operations**:\n
  2659. * ~~~
  2660. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
  2661. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
  2662. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
  2663. * if (res[x] > (2^31)-1) {
  2664. * res[x] = (2^31)-1;
  2665. * OV = 1;
  2666. * } else if (res[x] < -2^31) {
  2667. * res[x] = -2^31;
  2668. * OV = 1;
  2669. * }
  2670. * Rd.W[x] = res[x];
  2671. * for RV32: x=0
  2672. * for RV64: x=1...0
  2673. * ~~~
  2674. *
  2675. * \param [in] t long type of value stored in t
  2676. * \param [in] a unsigned long type of value stored in a
  2677. * \param [in] b unsigned long type of value stored in b
  2678. * \return value stored in long type
  2679. */
  2680. __STATIC_FORCEINLINE long __RV_KMABT(long t, unsigned long a, unsigned long b)
  2681. {
  2682. __ASM volatile("kmabt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2683. return t;
  2684. }
  2685. /* ===== Inline Function End for 3.38.2. KMABT ===== */
  2686. /* ===== Inline Function Start for 3.38.3. KMATT ===== */
  2687. /**
  2688. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  2689. * \brief KMATT (SIMD Saturating Signed Multiply Top Halfs & Add)
  2690. * \details
  2691. * **Type**: SIMD
  2692. *
  2693. * **Syntax**:\n
  2694. * ~~~
  2695. * KMABB Rd, Rs1, Rs2
  2696. * KMABT Rd, Rs1, Rs2
  2697. * KMATT Rd, Rs1, Rs2
  2698. * ~~~
  2699. *
  2700. * **Purpose**:\n
  2701. * Multiply the signed 16-bit content of 32-bit elements in a register with the 16-bit content
  2702. * of 32-bit elements in another register and add the result to the content of 32-bit elements in the
  2703. * third register. The addition result may be saturated and is written to the third register.
  2704. * * KMABB: rd.W[x] + bottom*bottom (per 32-bit element)
  2705. * * KMABT rd.W[x] + bottom*top (per 32-bit element)
  2706. * * KMATT rd.W[x] + top*top (per 32-bit element)
  2707. *
  2708. * **Description**:\n
  2709. * For the `KMABB` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2710. * the bottom 16-bit content of 32-bit elements in Rs2.
  2711. * For the `KMABT` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2712. * the top 16-bit content of 32-bit elements in Rs2.
  2713. * For the `KMATT` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  2714. * top 16-bit content of 32-bit elements in Rs2.
  2715. * The multiplication result is added to the content of 32-bit elements in Rd. If the addition result is
  2716. * beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to
  2717. * 1. The results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as
  2718. * signed integers.
  2719. *
  2720. * **Operations**:\n
  2721. * ~~~
  2722. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]); // KMABB
  2723. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[1]); // KMABT
  2724. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]); // KMATT
  2725. * if (res[x] > (2^31)-1) {
  2726. * res[x] = (2^31)-1;
  2727. * OV = 1;
  2728. * } else if (res[x] < -2^31) {
  2729. * res[x] = -2^31;
  2730. * OV = 1;
  2731. * }
  2732. * Rd.W[x] = res[x];
  2733. * for RV32: x=0
  2734. * for RV64: x=1...0
  2735. * ~~~
  2736. *
  2737. * \param [in] t long type of value stored in t
  2738. * \param [in] a unsigned long type of value stored in a
  2739. * \param [in] b unsigned long type of value stored in b
  2740. * \return value stored in long type
  2741. */
  2742. __STATIC_FORCEINLINE long __RV_KMATT(long t, unsigned long a, unsigned long b)
  2743. {
  2744. __ASM volatile("kmatt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2745. return t;
  2746. }
  2747. /* ===== Inline Function End for 3.38.3. KMATT ===== */
  2748. /* ===== Inline Function Start for 3.39.1. KMADA ===== */
  2749. /**
  2750. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  2751. * \brief KMADA (SIMD Saturating Signed Multiply Two Halfs and Two Adds)
  2752. * \details
  2753. * **Type**: SIMD
  2754. *
  2755. * **Syntax**:\n
  2756. * ~~~
  2757. * KMADA Rd, Rs1, Rs2
  2758. * KMAXDA Rd, Rs1, Rs2
  2759. * ~~~
  2760. *
  2761. * **Purpose**:\n
  2762. * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds
  2763. * the two 32-bit results and 32-bit elements in a third register together. The addition result may be
  2764. * saturated.
  2765. * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element)
  2766. * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element)
  2767. *
  2768. * **Description**:\n
  2769. * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2770. * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
  2771. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
  2772. * elements in Rs2.
  2773. * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  2774. * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying
  2775. * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in
  2776. * Rs2.
  2777. * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31
  2778. * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit
  2779. * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
  2780. * integers.
  2781. *
  2782. * **Operations**:\n
  2783. * ~~~
  2784. * // KMADA
  2785. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  2786. * // KMAXDA
  2787. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  2788. * if (res[x] > (2^31)-1) {
  2789. * res[x] = (2^31)-1;
  2790. * OV = 1;
  2791. * } else if (res[x] < -2^31) {
  2792. * res[x] = -2^31;
  2793. * OV = 1;
  2794. * }
  2795. * Rd.W[x] = res[x];
  2796. * for RV32: x=0
  2797. * for RV64: x=1...0
  2798. * ~~~
  2799. *
  2800. * \param [in] t long type of value stored in t
  2801. * \param [in] a unsigned long type of value stored in a
  2802. * \param [in] b unsigned long type of value stored in b
  2803. * \return value stored in long type
  2804. */
  2805. __STATIC_FORCEINLINE long __RV_KMADA(long t, unsigned long a, unsigned long b)
  2806. {
  2807. __ASM volatile("kmada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2808. return t;
  2809. }
  2810. /* ===== Inline Function End for 3.39.1. KMADA ===== */
  2811. /* ===== Inline Function Start for 3.39.2. KMAXDA ===== */
  2812. /**
  2813. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  2814. * \brief KMAXDA (SIMD Saturating Signed Crossed Multiply Two Halfs and Two Adds)
  2815. * \details
  2816. * **Type**: SIMD
  2817. *
  2818. * **Syntax**:\n
  2819. * ~~~
  2820. * KMADA Rd, Rs1, Rs2
  2821. * KMAXDA Rd, Rs1, Rs2
  2822. * ~~~
  2823. *
  2824. * **Purpose**:\n
  2825. * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then adds
  2826. * the two 32-bit results and 32-bit elements in a third register together. The addition result may be
  2827. * saturated.
  2828. * * KMADA: rd.W[x] + top*top + bottom*bottom (per 32-bit element)
  2829. * * KMAXDA: rd.W[x] + top*bottom + bottom*top (per 32-bit element)
  2830. *
  2831. * **Description**:\n
  2832. * For the `KMADA instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2833. * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
  2834. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
  2835. * elements in Rs2.
  2836. * For the `KMAXDA` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  2837. * bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of multiplying
  2838. * the bottom 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in
  2839. * Rs2.
  2840. * The result is added to the content of 32-bit elements in Rd. If the addition result is beyond the Q31
  2841. * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The 32-bit
  2842. * results after saturation are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
  2843. * integers.
  2844. *
  2845. * **Operations**:\n
  2846. * ~~~
  2847. * // KMADA
  2848. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  2849. * // KMAXDA
  2850. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  2851. * if (res[x] > (2^31)-1) {
  2852. * res[x] = (2^31)-1;
  2853. * OV = 1;
  2854. * } else if (res[x] < -2^31) {
  2855. * res[x] = -2^31;
  2856. * OV = 1;
  2857. * }
  2858. * Rd.W[x] = res[x];
  2859. * for RV32: x=0
  2860. * for RV64: x=1...0
  2861. * ~~~
  2862. *
  2863. * \param [in] t long type of value stored in t
  2864. * \param [in] a unsigned long type of value stored in a
  2865. * \param [in] b unsigned long type of value stored in b
  2866. * \return value stored in long type
  2867. */
  2868. __STATIC_FORCEINLINE long __RV_KMAXDA(long t, unsigned long a, unsigned long b)
  2869. {
  2870. __ASM volatile("kmaxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2871. return t;
  2872. }
  2873. /* ===== Inline Function End for 3.39.2. KMAXDA ===== */
  2874. /* ===== Inline Function Start for 3.40.1. KMADS ===== */
  2875. /**
  2876. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  2877. * \brief KMADS (SIMD Saturating Signed Multiply Two Halfs & Subtract & Add)
  2878. * \details
  2879. * **Type**: SIMD
  2880. *
  2881. * **Syntax**:\n
  2882. * ~~~
  2883. * KMADS Rd, Rs1, Rs2
  2884. * KMADRS Rd, Rs1, Rs2
  2885. * KMAXDS Rd, Rs1, Rs2
  2886. * ~~~
  2887. *
  2888. * **Purpose**:\n
  2889. * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
  2890. * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
  2891. * the corresponding 32-bit elements in a third register. The addition result may be saturated.
  2892. * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
  2893. * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
  2894. * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
  2895. *
  2896. * **Description**:\n
  2897. * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2898. * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  2899. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
  2900. * elements in Rs2.
  2901. * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  2902. * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  2903. * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
  2904. * bit elements in Rs2.
  2905. * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2906. * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  2907. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
  2908. * elements in Rs2.
  2909. * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
  2910. * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
  2911. * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
  2912. * and Rs2 are treated as signed integers.
  2913. *
  2914. * **Operations**:\n
  2915. * ~~~
  2916. * // KMADS
  2917. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  2918. * // KMADRS
  2919. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
  2920. * // KMAXDS
  2921. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  2922. * if (res[x] > (2^31)-1) {
  2923. * res[x] = (2^31)-1;
  2924. * OV = 1;
  2925. * } else if (res[x] < -2^31) {
  2926. * res[x] = -2^31;
  2927. * OV = 1;
  2928. * }
  2929. * Rd.W[x] = res[x];
  2930. * for RV32: x=0
  2931. * for RV64: x=1...0
  2932. * ~~~
  2933. *
  2934. * \param [in] t long type of value stored in t
  2935. * \param [in] a unsigned long type of value stored in a
  2936. * \param [in] b unsigned long type of value stored in b
  2937. * \return value stored in long type
  2938. */
  2939. __STATIC_FORCEINLINE long __RV_KMADS(long t, unsigned long a, unsigned long b)
  2940. {
  2941. __ASM volatile("kmads %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  2942. return t;
  2943. }
  2944. /* ===== Inline Function End for 3.40.1. KMADS ===== */
  2945. /* ===== Inline Function Start for 3.40.2. KMADRS ===== */
  2946. /**
  2947. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  2948. * \brief KMADRS (SIMD Saturating Signed Multiply Two Halfs & Reverse Subtract & Add)
  2949. * \details
  2950. * **Type**: SIMD
  2951. *
  2952. * **Syntax**:\n
  2953. * ~~~
  2954. * KMADS Rd, Rs1, Rs2
  2955. * KMADRS Rd, Rs1, Rs2
  2956. * KMAXDS Rd, Rs1, Rs2
  2957. * ~~~
  2958. *
  2959. * **Purpose**:\n
  2960. * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
  2961. * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
  2962. * the corresponding 32-bit elements in a third register. The addition result may be saturated.
  2963. * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
  2964. * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
  2965. * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
  2966. *
  2967. * **Description**:\n
  2968. * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2969. * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  2970. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
  2971. * elements in Rs2.
  2972. * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  2973. * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  2974. * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
  2975. * bit elements in Rs2.
  2976. * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  2977. * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  2978. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
  2979. * elements in Rs2.
  2980. * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
  2981. * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
  2982. * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
  2983. * and Rs2 are treated as signed integers.
  2984. *
  2985. * **Operations**:\n
  2986. * ~~~
  2987. * // KMADS
  2988. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  2989. * // KMADRS
  2990. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
  2991. * // KMAXDS
  2992. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  2993. * if (res[x] > (2^31)-1) {
  2994. * res[x] = (2^31)-1;
  2995. * OV = 1;
  2996. * } else if (res[x] < -2^31) {
  2997. * res[x] = -2^31;
  2998. * OV = 1;
  2999. * }
  3000. * Rd.W[x] = res[x];
  3001. * for RV32: x=0
  3002. * for RV64: x=1...0
  3003. * ~~~
  3004. *
  3005. * \param [in] t long type of value stored in t
  3006. * \param [in] a unsigned long type of value stored in a
  3007. * \param [in] b unsigned long type of value stored in b
  3008. * \return value stored in long type
  3009. */
  3010. __STATIC_FORCEINLINE long __RV_KMADRS(long t, unsigned long a, unsigned long b)
  3011. {
  3012. __ASM volatile("kmadrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3013. return t;
  3014. }
  3015. /* ===== Inline Function End for 3.40.2. KMADRS ===== */
  3016. /* ===== Inline Function Start for 3.40.3. KMAXDS ===== */
  3017. /**
  3018. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  3019. * \brief KMAXDS (SIMD Saturating Signed Crossed Multiply Two Halfs & Subtract & Add)
  3020. * \details
  3021. * **Type**: SIMD
  3022. *
  3023. * **Syntax**:\n
  3024. * ~~~
  3025. * KMADS Rd, Rs1, Rs2
  3026. * KMADRS Rd, Rs1, Rs2
  3027. * KMAXDS Rd, Rs1, Rs2
  3028. * ~~~
  3029. *
  3030. * **Purpose**:\n
  3031. * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
  3032. * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
  3033. * the corresponding 32-bit elements in a third register. The addition result may be saturated.
  3034. * * KMADS: rd.W[x] + (top*top - bottom*bottom) (per 32-bit element)
  3035. * * KMADRS: rd.W[x] + (bottom*bottom - top*top) (per 32-bit element)
  3036. * * KMAXDS: rd.W[x] + (top*bottom - bottom*top) (per 32-bit element)
  3037. *
  3038. * **Description**:\n
  3039. * For the `KMADS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  3040. * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  3041. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
  3042. * elements in Rs2.
  3043. * For the `KMADRS` instruction, it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  3044. * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  3045. * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
  3046. * bit elements in Rs2.
  3047. * For the `KMAXDS` instruction, it multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  3048. * the top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  3049. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
  3050. * elements in Rs2.
  3051. * The subtraction result is then added to the content of the corresponding 32-bit elements in Rd. If the
  3052. * addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and
  3053. * the OV bit is set to 1. The 32-bit results after saturation are written to Rd. The 16-bit contents of Rs1
  3054. * and Rs2 are treated as signed integers.
  3055. *
  3056. * **Operations**:\n
  3057. * ~~~
  3058. * // KMADS
  3059. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  3060. * // KMADRS
  3061. * res[x] = Rd.W[x] + (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
  3062. * // KMAXDS
  3063. * res[x] = Rd.W[x] + (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  3064. * if (res[x] > (2^31)-1) {
  3065. * res[x] = (2^31)-1;
  3066. * OV = 1;
  3067. * } else if (res[x] < -2^31) {
  3068. * res[x] = -2^31;
  3069. * OV = 1;
  3070. * }
  3071. * Rd.W[x] = res[x];
  3072. * for RV32: x=0
  3073. * for RV64: x=1...0
  3074. * ~~~
  3075. *
  3076. * \param [in] t long type of value stored in t
  3077. * \param [in] a unsigned long type of value stored in a
  3078. * \param [in] b unsigned long type of value stored in b
  3079. * \return value stored in long type
  3080. */
  3081. __STATIC_FORCEINLINE long __RV_KMAXDS(long t, unsigned long a, unsigned long b)
  3082. {
  3083. __ASM volatile("kmaxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3084. return t;
  3085. }
  3086. /* ===== Inline Function End for 3.40.3. KMAXDS ===== */
  3087. /* ===== Inline Function Start for 3.41. KMAR64 ===== */
  3088. /**
  3089. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  3090. * \brief KMAR64 (Signed Multiply and Saturating Add to 64-Bit Data)
  3091. * \details
  3092. * **Type**: DSP (64-bit Profile)
  3093. *
  3094. * **Syntax**:\n
  3095. * ~~~
  3096. * KMAR64 Rd, Rs1, Rs2
  3097. * ~~~
  3098. *
  3099. * **Purpose**:\n
  3100. * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication
  3101. * results to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
  3102. * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64).
  3103. *
  3104. * **RV32 Description**:\n
  3105. * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds
  3106. * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by
  3107. * Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the Q63 number range (-2^63 <=
  3108. * Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated result is written back
  3109. * to the even/odd pair of registers specified by Rd(4,1).
  3110. * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
  3111. * pair includes register 2d and 2d+1.
  3112. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  3113. * of the pair contains the low 32-bit of the result.
  3114. *
  3115. * **RV64 Description**:\n
  3116. * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
  3117. * adds the 64-bit multiplication results to the 64-bit signed data of Rd with unlimited precision. If the
  3118. * 64-bit addition result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range
  3119. * and the OV bit is set to 1. The saturated result is written back to Rd.
  3120. *
  3121. * **Operations**:\n
  3122. * ~~~
  3123. * RV32:
  3124. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  3125. * result = R[t_H].R[t_L] + (Rs1 * Rs2);
  3126. * if (result > (2^63)-1) {
  3127. * result = (2^63)-1; OV = 1;
  3128. * } else if (result < -2^63) {
  3129. * result = -2^63; OV = 1;
  3130. * }
  3131. * R[t_H].R[t_L] = result;
  3132. * RV64:
  3133. * // `result` has unlimited precision
  3134. * result = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]);
  3135. * if (result > (2^63)-1) {
  3136. * result = (2^63)-1; OV = 1;
  3137. * } else if (result < -2^63) {
  3138. * result = -2^63; OV = 1;
  3139. * }
  3140. * Rd = result;
  3141. * ~~~
  3142. *
  3143. * \param [in] t long long type of value stored in t
  3144. * \param [in] a long type of value stored in a
  3145. * \param [in] b long type of value stored in b
  3146. * \return value stored in long long type
  3147. */
  3148. __STATIC_FORCEINLINE long long __RV_KMAR64(long long t, long a, long b)
  3149. {
  3150. __ASM volatile("kmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3151. return t;
  3152. }
  3153. /* ===== Inline Function End for 3.41. KMAR64 ===== */
  3154. /* ===== Inline Function Start for 3.42.1. KMDA ===== */
  3155. /**
  3156. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  3157. * \brief KMDA (SIMD Signed Multiply Two Halfs and Add)
  3158. * \details
  3159. * **Type**: SIMD
  3160. *
  3161. * **Syntax**:\n
  3162. * ~~~
  3163. * KMDA Rd, Rs1, Rs2
  3164. * KMXDA Rd, Rs1, Rs2
  3165. * ~~~
  3166. *
  3167. * **Purpose**:\n
  3168. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  3169. * adds the two 32-bit results together. The addition result may be saturated.
  3170. * * KMDA: top*top + bottom*bottom (per 32-bit element)
  3171. * * KMXDA: top*bottom + bottom*top (per 32-bit element)
  3172. *
  3173. * **Description**:\n
  3174. * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  3175. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  3176. * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
  3177. * bit elements of Rs2.
  3178. * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  3179. * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  3180. * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
  3181. * 32-bit elements of Rs2.
  3182. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1.
  3183. * The final results are written to Rd. The 16-bit contents are treated as signed integers.
  3184. *
  3185. * **Operations**:\n
  3186. * ~~~
  3187. * if Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000 { // KMDA Rd.W[x] = Rs1.W[x].H[1] *
  3188. * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0])
  3189. * + (Rs1.W[x].H[0] * Rs2.W[x].H[1]; } else { Rd.W[x] = 0x7fffffff; OV = 1; } for RV32: x=0 for RV64:
  3190. * x=1...0
  3191. * ~~~
  3192. *
  3193. * \param [in] a unsigned long type of value stored in a
  3194. * \param [in] b unsigned long type of value stored in b
  3195. * \return value stored in long type
  3196. */
  3197. __STATIC_FORCEINLINE long __RV_KMDA(unsigned long a, unsigned long b)
  3198. {
  3199. long result;
  3200. __ASM volatile("kmda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  3201. return result;
  3202. }
  3203. /* ===== Inline Function End for 3.42.1. KMDA ===== */
  3204. /* ===== Inline Function Start for 3.42.2. KMXDA ===== */
  3205. /**
  3206. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  3207. * \brief KMXDA (SIMD Signed Crossed Multiply Two Halfs and Add)
  3208. * \details
  3209. * **Type**: SIMD
  3210. *
  3211. * **Syntax**:\n
  3212. * ~~~
  3213. * KMDA Rd, Rs1, Rs2
  3214. * KMXDA Rd, Rs1, Rs2
  3215. * ~~~
  3216. *
  3217. * **Purpose**:\n
  3218. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  3219. * adds the two 32-bit results together. The addition result may be saturated.
  3220. * * KMDA: top*top + bottom*bottom (per 32-bit element)
  3221. * * KMXDA: top*bottom + bottom*top (per 32-bit element)
  3222. *
  3223. * **Description**:\n
  3224. * For the `KMDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  3225. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  3226. * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
  3227. * bit elements of Rs2.
  3228. * For the `KMXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  3229. * with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  3230. * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
  3231. * 32-bit elements of Rs2.
  3232. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1.
  3233. * The final results are written to Rd. The 16-bit contents are treated as signed integers.
  3234. *
  3235. * **Operations**:\n
  3236. * ~~~
  3237. * if Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000 { // KMDA Rd.W[x] = Rs1.W[x].H[1] *
  3238. * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]; // KMXDA Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[0])
  3239. * + (Rs1.W[x].H[0] * Rs2.W[x].H[1]; } else { Rd.W[x] = 0x7fffffff; OV = 1; } for RV32: x=0 for RV64:
  3240. * x=1...0
  3241. * ~~~
  3242. *
  3243. * \param [in] a unsigned long type of value stored in a
  3244. * \param [in] b unsigned long type of value stored in b
  3245. * \return value stored in long type
  3246. */
  3247. __STATIC_FORCEINLINE long __RV_KMXDA(unsigned long a, unsigned long b)
  3248. {
  3249. long result;
  3250. __ASM volatile("kmxda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  3251. return result;
  3252. }
  3253. /* ===== Inline Function End for 3.42.2. KMXDA ===== */
  3254. /* ===== Inline Function Start for 3.43.1. KMMAC ===== */
  3255. /**
  3256. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  3257. * \brief KMMAC (SIMD Saturating MSW Signed Multiply Word and Add)
  3258. * \details
  3259. * **Type**: SIMD
  3260. *
  3261. * **Syntax**:\n
  3262. * ~~~
  3263. * KMMAC Rd, Rs1, Rs2
  3264. * KMMAC.u Rd, Rs1, Rs2
  3265. * ~~~
  3266. *
  3267. * **Purpose**:\n
  3268. * Multiply the signed 32-bit integer elements of two registers and add the most significant
  3269. * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are
  3270. * saturated first and then written back to the third register. The `.u` form performs an additional
  3271. * rounding up operation on the multiplication results before adding the most significant 32-bit part
  3272. * of the results.
  3273. *
  3274. * **Description**:\n
  3275. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  3276. * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
  3277. * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
  3278. * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
  3279. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  3280. * adding a 1 to bit 31 of the results.
  3281. *
  3282. * **Operations**:\n
  3283. * ~~~
  3284. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  3285. * if (`.u` form) {
  3286. * Round[x][32:0] = Mres[x][63:31] + 1;
  3287. * res[x] = Rd.W[x] + Round[x][32:1];
  3288. * } else {
  3289. * res[x] = Rd.W[x] + Mres[x][63:32];
  3290. * }
  3291. * if (res[x] > (2^31)-1) {
  3292. * res[x] = (2^31)-1;
  3293. * OV = 1;
  3294. * } else if (res[x] < -2^31) {
  3295. * res[x] = -2^31;
  3296. * OV = 1;
  3297. * }
  3298. * Rd.W[x] = res[x];
  3299. * for RV32: x=0
  3300. * for RV64: x=1...0
  3301. * ~~~
  3302. *
  3303. * \param [in] t long type of value stored in t
  3304. * \param [in] a long type of value stored in a
  3305. * \param [in] b long type of value stored in b
  3306. * \return value stored in long type
  3307. */
  3308. __STATIC_FORCEINLINE long __RV_KMMAC(long t, long a, long b)
  3309. {
  3310. __ASM volatile("kmmac %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3311. return t;
  3312. }
  3313. /* ===== Inline Function End for 3.43.1. KMMAC ===== */
  3314. /* ===== Inline Function Start for 3.43.2. KMMAC.u ===== */
  3315. /**
  3316. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  3317. * \brief KMMAC.u (SIMD Saturating MSW Signed Multiply Word and Add with Rounding)
  3318. * \details
  3319. * **Type**: SIMD
  3320. *
  3321. * **Syntax**:\n
  3322. * ~~~
  3323. * KMMAC Rd, Rs1, Rs2
  3324. * KMMAC.u Rd, Rs1, Rs2
  3325. * ~~~
  3326. *
  3327. * **Purpose**:\n
  3328. * Multiply the signed 32-bit integer elements of two registers and add the most significant
  3329. * 32-bit results with the signed 32-bit integer elements of a third register. The addition results are
  3330. * saturated first and then written back to the third register. The `.u` form performs an additional
  3331. * rounding up operation on the multiplication results before adding the most significant 32-bit part
  3332. * of the results.
  3333. *
  3334. * **Description**:\n
  3335. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  3336. * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
  3337. * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
  3338. * and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
  3339. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  3340. * adding a 1 to bit 31 of the results.
  3341. *
  3342. * **Operations**:\n
  3343. * ~~~
  3344. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  3345. * if (`.u` form) {
  3346. * Round[x][32:0] = Mres[x][63:31] + 1;
  3347. * res[x] = Rd.W[x] + Round[x][32:1];
  3348. * } else {
  3349. * res[x] = Rd.W[x] + Mres[x][63:32];
  3350. * }
  3351. * if (res[x] > (2^31)-1) {
  3352. * res[x] = (2^31)-1;
  3353. * OV = 1;
  3354. * } else if (res[x] < -2^31) {
  3355. * res[x] = -2^31;
  3356. * OV = 1;
  3357. * }
  3358. * Rd.W[x] = res[x];
  3359. * for RV32: x=0
  3360. * for RV64: x=1...0
  3361. * ~~~
  3362. *
  3363. * \param [in] t long type of value stored in t
  3364. * \param [in] a long type of value stored in a
  3365. * \param [in] b long type of value stored in b
  3366. * \return value stored in long type
  3367. */
  3368. __STATIC_FORCEINLINE long __RV_KMMAC_U(long t, long a, long b)
  3369. {
  3370. __ASM volatile("kmmac.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3371. return t;
  3372. }
  3373. /* ===== Inline Function End for 3.43.2. KMMAC.u ===== */
  3374. /* ===== Inline Function Start for 3.44.1. KMMAWB ===== */
  3375. /**
  3376. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3377. * \brief KMMAWB (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add)
  3378. * \details
  3379. * **Type**: SIMD
  3380. *
  3381. * **Syntax**:\n
  3382. * ~~~
  3383. * KMMAWB Rd, Rs1, Rs2
  3384. * KMMAWB.u Rd, Rs1, Rs2
  3385. * ~~~
  3386. *
  3387. * **Purpose**:\n
  3388. * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
  3389. * corresponding 32-bit elements of another register and add the most significant 32-bit results with
  3390. * the corresponding signed 32-bit elements of a third register. The addition result is written to the
  3391. * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
  3392. * results from the most significant discarded bit before the addition operations.
  3393. *
  3394. * **Description**:\n
  3395. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
  3396. * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication
  3397. * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
  3398. * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
  3399. * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
  3400. * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
  3401. * bit 15 of the result before the addition operations.
  3402. *
  3403. * **Operations**:\n
  3404. * ~~~
  3405. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
  3406. * if (`.u` form) {
  3407. * Round[x][32:0] = Mres[x][47:15] + 1;
  3408. * res[x] = Rd.W[x] + Round[x][32:1];
  3409. * } else {
  3410. * res[x] = Rd.W[x] + Mres[x][47:16];
  3411. * }
  3412. * if (res[x] > (2^31)-1) {
  3413. * res[x] = (2^31)-1;
  3414. * OV = 1;
  3415. * } else if (res[x] < -2^31) {
  3416. * res[x] = -2^31;
  3417. * OV = 1;
  3418. * }
  3419. * Rd.W[x] = res[x];
  3420. * for RV32: x=0
  3421. * for RV64: x=1...0
  3422. * ~~~
  3423. *
  3424. * \param [in] t long type of value stored in t
  3425. * \param [in] a unsigned long type of value stored in a
  3426. * \param [in] b unsigned long type of value stored in b
  3427. * \return value stored in long type
  3428. */
  3429. __STATIC_FORCEINLINE long __RV_KMMAWB(long t, unsigned long a, unsigned long b)
  3430. {
  3431. __ASM volatile("kmmawb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3432. return t;
  3433. }
  3434. /* ===== Inline Function End for 3.44.1. KMMAWB ===== */
  3435. /* ===== Inline Function Start for 3.44.2. KMMAWB.u ===== */
  3436. /**
  3437. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3438. * \brief KMMAWB.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half and Add with Rounding)
  3439. * \details
  3440. * **Type**: SIMD
  3441. *
  3442. * **Syntax**:\n
  3443. * ~~~
  3444. * KMMAWB Rd, Rs1, Rs2
  3445. * KMMAWB.u Rd, Rs1, Rs2
  3446. * ~~~
  3447. *
  3448. * **Purpose**:\n
  3449. * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
  3450. * corresponding 32-bit elements of another register and add the most significant 32-bit results with
  3451. * the corresponding signed 32-bit elements of a third register. The addition result is written to the
  3452. * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
  3453. * results from the most significant discarded bit before the addition operations.
  3454. *
  3455. * **Description**:\n
  3456. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
  3457. * of the corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication
  3458. * results with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
  3459. * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
  3460. * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
  3461. * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
  3462. * bit 15 of the result before the addition operations.
  3463. *
  3464. * **Operations**:\n
  3465. * ~~~
  3466. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
  3467. * if (`.u` form) {
  3468. * Round[x][32:0] = Mres[x][47:15] + 1;
  3469. * res[x] = Rd.W[x] + Round[x][32:1];
  3470. * } else {
  3471. * res[x] = Rd.W[x] + Mres[x][47:16];
  3472. * }
  3473. * if (res[x] > (2^31)-1) {
  3474. * res[x] = (2^31)-1;
  3475. * OV = 1;
  3476. * } else if (res[x] < -2^31) {
  3477. * res[x] = -2^31;
  3478. * OV = 1;
  3479. * }
  3480. * Rd.W[x] = res[x];
  3481. * for RV32: x=0
  3482. * for RV64: x=1...0
  3483. * ~~~
  3484. *
  3485. * \param [in] t long type of value stored in t
  3486. * \param [in] a unsigned long type of value stored in a
  3487. * \param [in] b unsigned long type of value stored in b
  3488. * \return value stored in long type
  3489. */
  3490. __STATIC_FORCEINLINE long __RV_KMMAWB_U(long t, unsigned long a, unsigned long b)
  3491. {
  3492. __ASM volatile("kmmawb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3493. return t;
  3494. }
  3495. /* ===== Inline Function End for 3.44.2. KMMAWB.u ===== */
  3496. /* ===== Inline Function Start for 3.45.1. KMMAWB2 ===== */
  3497. /**
  3498. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3499. * \brief KMMAWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add)
  3500. * \details
  3501. * **Type**: SIMD
  3502. *
  3503. * **Syntax**:\n
  3504. * ~~~
  3505. * KMMAWB2 Rd, Rs1, Rs2
  3506. * KMMAWB2.u Rd, Rs1, Rs2
  3507. * ~~~
  3508. *
  3509. * **Purpose**:\n
  3510. * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the
  3511. * corresponding 32-bit elements of another register, double the multiplication results and add the
  3512. * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
  3513. * register. The saturated addition result is written to the corresponding 32-bit elements of the third
  3514. * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
  3515. * before the addition operations.
  3516. *
  3517. * **Description**:\n
  3518. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
  3519. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  3520. * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
  3521. * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
  3522. * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
  3523. * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
  3524. * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
  3525. * the result before the addition operations.
  3526. *
  3527. * **Operations**:\n
  3528. * ~~~
  3529. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
  3530. * addop.W[x] = 0x7fffffff;
  3531. * OV = 1;
  3532. * } else {
  3533. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
  3534. * if (`.u` form) {
  3535. * Mres[x][47:14] = Mres[x][47:14] + 1;
  3536. * }
  3537. * addop.W[x] = Mres[x][46:15]; // doubling
  3538. * }
  3539. * res[x] = Rd.W[x] + addop.W[x];
  3540. * if (res[x] > (2^31)-1) {
  3541. * res[x] = (2^31)-1;
  3542. * OV = 1;
  3543. * } else if (res[x] < -2^31) {
  3544. * res[x] = -2^31;
  3545. * OV = 1;
  3546. * }
  3547. * Rd.W[x] = res[x];
  3548. * for RV32: x=0
  3549. * for RV64: x=1...0
  3550. * ~~~
  3551. *
  3552. * \param [in] t long type of value stored in t
  3553. * \param [in] a unsigned long type of value stored in a
  3554. * \param [in] b unsigned long type of value stored in b
  3555. * \return value stored in long type
  3556. */
  3557. __STATIC_FORCEINLINE long __RV_KMMAWB2(long t, unsigned long a, unsigned long b)
  3558. {
  3559. __ASM volatile("kmmawb2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3560. return t;
  3561. }
  3562. /* ===== Inline Function End for 3.45.1. KMMAWB2 ===== */
  3563. /* ===== Inline Function Start for 3.45.2. KMMAWB2.u ===== */
  3564. /**
  3565. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3566. * \brief KMMAWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 and Add with Rounding)
  3567. * \details
  3568. * **Type**: SIMD
  3569. *
  3570. * **Syntax**:\n
  3571. * ~~~
  3572. * KMMAWB2 Rd, Rs1, Rs2
  3573. * KMMAWB2.u Rd, Rs1, Rs2
  3574. * ~~~
  3575. *
  3576. * **Purpose**:\n
  3577. * Multiply the signed 32-bit elements of one register and the bottom 16-bit of the
  3578. * corresponding 32-bit elements of another register, double the multiplication results and add the
  3579. * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
  3580. * register. The saturated addition result is written to the corresponding 32-bit elements of the third
  3581. * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
  3582. * before the addition operations.
  3583. *
  3584. * **Description**:\n
  3585. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
  3586. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  3587. * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
  3588. * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
  3589. * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
  3590. * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
  3591. * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
  3592. * the result before the addition operations.
  3593. *
  3594. * **Operations**:\n
  3595. * ~~~
  3596. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
  3597. * addop.W[x] = 0x7fffffff;
  3598. * OV = 1;
  3599. * } else {
  3600. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
  3601. * if (`.u` form) {
  3602. * Mres[x][47:14] = Mres[x][47:14] + 1;
  3603. * }
  3604. * addop.W[x] = Mres[x][46:15]; // doubling
  3605. * }
  3606. * res[x] = Rd.W[x] + addop.W[x];
  3607. * if (res[x] > (2^31)-1) {
  3608. * res[x] = (2^31)-1;
  3609. * OV = 1;
  3610. * } else if (res[x] < -2^31) {
  3611. * res[x] = -2^31;
  3612. * OV = 1;
  3613. * }
  3614. * Rd.W[x] = res[x];
  3615. * for RV32: x=0
  3616. * for RV64: x=1...0
  3617. * ~~~
  3618. *
  3619. * \param [in] t long type of value stored in t
  3620. * \param [in] a unsigned long type of value stored in a
  3621. * \param [in] b unsigned long type of value stored in b
  3622. * \return value stored in long type
  3623. */
  3624. __STATIC_FORCEINLINE long __RV_KMMAWB2_U(long t, unsigned long a, unsigned long b)
  3625. {
  3626. __ASM volatile("kmmawb2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3627. return t;
  3628. }
  3629. /* ===== Inline Function End for 3.45.2. KMMAWB2.u ===== */
  3630. /* ===== Inline Function Start for 3.46.1. KMMAWT ===== */
  3631. /**
  3632. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3633. * \brief KMMAWT (SIMD Saturating MSW Signed Multiply Word and Top Half and Add)
  3634. * \details
  3635. * **Type**: SIMD
  3636. *
  3637. * **Syntax**:\n
  3638. * ~~~
  3639. * KMMAWT Rd, Rs1, Rs2
  3640. * KMMAWT.u Rd Rs1, Rs2
  3641. * ~~~
  3642. *
  3643. * **Purpose**:\n
  3644. * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the
  3645. * corresponding 32-bit elements of another register and add the most significant 32-bit results with
  3646. * the corresponding signed 32-bit elements of a third register. The addition results are written to the
  3647. * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
  3648. * results from the most significant discarded bit before the addition operations.
  3649. *
  3650. * **Description**:\n
  3651. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the
  3652. * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results
  3653. * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
  3654. * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
  3655. * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
  3656. * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
  3657. * bit 15 of the result before the addition operations.
  3658. *
  3659. * **Operations**:\n
  3660. * ~~~
  3661. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
  3662. * if (`.u` form) {
  3663. * Round[x][32:0] = Mres[x][47:15] + 1;
  3664. * res[x] = Rd.W[x] + Round[x][32:1];
  3665. * } else {
  3666. * res[x] = Rd.W[x] + Mres[x][47:16];
  3667. * }
  3668. * if (res[x] > (2^31)-1) {
  3669. * res[x] = (2^31)-1;
  3670. * OV = 1;
  3671. * } else if (res[x] < -2^31) {
  3672. * res[x] = -2^31;
  3673. * OV = 1;
  3674. * }
  3675. * Rd.W[x] = res[x];
  3676. * for RV32: x=0
  3677. * for RV64: x=1...0
  3678. * ~~~
  3679. *
  3680. * \param [in] t long type of value stored in t
  3681. * \param [in] a unsigned long type of value stored in a
  3682. * \param [in] b unsigned long type of value stored in b
  3683. * \return value stored in long type
  3684. */
  3685. __STATIC_FORCEINLINE long __RV_KMMAWT(long t, unsigned long a, unsigned long b)
  3686. {
  3687. __ASM volatile("kmmawt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3688. return t;
  3689. }
  3690. /* ===== Inline Function End for 3.46.1. KMMAWT ===== */
  3691. /* ===== Inline Function Start for 3.46.2. KMMAWT.u ===== */
  3692. /**
  3693. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3694. * \brief KMMAWT.u (SIMD Saturating MSW Signed Multiply Word and Top Half and Add with Rounding)
  3695. * \details
  3696. * **Type**: SIMD
  3697. *
  3698. * **Syntax**:\n
  3699. * ~~~
  3700. * KMMAWT Rd, Rs1, Rs2
  3701. * KMMAWT.u Rd Rs1, Rs2
  3702. * ~~~
  3703. *
  3704. * **Purpose**:\n
  3705. * Multiply the signed 32-bit integer elements of one register and the signed top 16-bit of the
  3706. * corresponding 32-bit elements of another register and add the most significant 32-bit results with
  3707. * the corresponding signed 32-bit elements of a third register. The addition results are written to the
  3708. * corresponding 32-bit elements of the third register. The `.u` form rounds up the multiplication
  3709. * results from the most significant discarded bit before the addition operations.
  3710. *
  3711. * **Description**:\n
  3712. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed top 16-bit of the
  3713. * corresponding 32-bit elements of Rs2 and adds the most significant 32-bit multiplication results
  3714. * with the corresponding signed 32-bit elements of Rd. If the addition result is beyond the Q31
  3715. * number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range and the OV bit is set to 1. The results
  3716. * after saturation are written to the corresponding 32-bit elements of Rd. The `.u` form of the
  3717. * instruction rounds up the most significant 32-bit of the 48-bit multiplication results by adding a 1 to
  3718. * bit 15 of the result before the addition operations.
  3719. *
  3720. * **Operations**:\n
  3721. * ~~~
  3722. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
  3723. * if (`.u` form) {
  3724. * Round[x][32:0] = Mres[x][47:15] + 1;
  3725. * res[x] = Rd.W[x] + Round[x][32:1];
  3726. * } else {
  3727. * res[x] = Rd.W[x] + Mres[x][47:16];
  3728. * }
  3729. * if (res[x] > (2^31)-1) {
  3730. * res[x] = (2^31)-1;
  3731. * OV = 1;
  3732. * } else if (res[x] < -2^31) {
  3733. * res[x] = -2^31;
  3734. * OV = 1;
  3735. * }
  3736. * Rd.W[x] = res[x];
  3737. * for RV32: x=0
  3738. * for RV64: x=1...0
  3739. * ~~~
  3740. *
  3741. * \param [in] t long type of value stored in t
  3742. * \param [in] a unsigned long type of value stored in a
  3743. * \param [in] b unsigned long type of value stored in b
  3744. * \return value stored in long type
  3745. */
  3746. __STATIC_FORCEINLINE long __RV_KMMAWT_U(long t, unsigned long a, unsigned long b)
  3747. {
  3748. __ASM volatile("kmmawt.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3749. return t;
  3750. }
  3751. /* ===== Inline Function End for 3.46.2. KMMAWT.u ===== */
  3752. /* ===== Inline Function Start for 3.47.1. KMMAWT2 ===== */
  3753. /**
  3754. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3755. * \brief KMMAWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add)
  3756. * \details
  3757. * **Type**: SIMD
  3758. *
  3759. * **Syntax**:\n
  3760. * ~~~
  3761. * KMMAWT2 Rd, Rs1, Rs2
  3762. * KMMAWT2.u Rd, Rs1, Rs2
  3763. * ~~~
  3764. *
  3765. * **Purpose**:\n
  3766. * Multiply the signed 32-bit elements of one register and the top 16-bit of the
  3767. * corresponding 32-bit elements of another register, double the multiplication results and add the
  3768. * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
  3769. * register. The saturated addition result is written to the corresponding 32-bit elements of the third
  3770. * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
  3771. * before the addition operations.
  3772. *
  3773. * **Description**:\n
  3774. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
  3775. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  3776. * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
  3777. * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
  3778. * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
  3779. * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
  3780. * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
  3781. * the result before the addition operations.
  3782. *
  3783. * **Operations**:\n
  3784. * ~~~
  3785. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
  3786. * addop.W[x] = 0x7fffffff;
  3787. * OV = 1;
  3788. * } else {
  3789. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
  3790. * if (`.u` form) {
  3791. * Mres[x][47:14] = Mres[x][47:14] + 1;
  3792. * }
  3793. * addop.W[x] = Mres[x][46:15]; // doubling
  3794. * }
  3795. * res[x] = Rd.W[x] + addop.W[x];
  3796. * if (res[x] > (2^31)-1) {
  3797. * res[x] = (2^31)-1;
  3798. * OV = 1;
  3799. * } else if (res[x] < -2^31) {
  3800. * res[x] = -2^31;
  3801. * OV = 1;
  3802. * }
  3803. * Rd.W[x] = res[x];
  3804. * for RV32: x=0
  3805. * for RV64: x=1...0
  3806. * ~~~
  3807. *
  3808. * \param [in] t long type of value stored in t
  3809. * \param [in] a unsigned long type of value stored in a
  3810. * \param [in] b unsigned long type of value stored in b
  3811. * \return value stored in long type
  3812. */
  3813. __STATIC_FORCEINLINE long __RV_KMMAWT2(long t, unsigned long a, unsigned long b)
  3814. {
  3815. __ASM volatile("kmmawt2 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3816. return t;
  3817. }
  3818. /* ===== Inline Function End for 3.47.1. KMMAWT2 ===== */
  3819. /* ===== Inline Function Start for 3.47.2. KMMAWT2.u ===== */
  3820. /**
  3821. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  3822. * \brief KMMAWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 and Add with Rounding)
  3823. * \details
  3824. * **Type**: SIMD
  3825. *
  3826. * **Syntax**:\n
  3827. * ~~~
  3828. * KMMAWT2 Rd, Rs1, Rs2
  3829. * KMMAWT2.u Rd, Rs1, Rs2
  3830. * ~~~
  3831. *
  3832. * **Purpose**:\n
  3833. * Multiply the signed 32-bit elements of one register and the top 16-bit of the
  3834. * corresponding 32-bit elements of another register, double the multiplication results and add the
  3835. * saturated most significant 32-bit results with the corresponding signed 32-bit elements of a third
  3836. * register. The saturated addition result is written to the corresponding 32-bit elements of the third
  3837. * register. The `.u` form rounds up the multiplication results from the most significant discarded bit
  3838. * before the addition operations.
  3839. *
  3840. * **Description**:\n
  3841. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
  3842. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  3843. * adds the saturated most significant 32-bit Q31 multiplication results with the corresponding signed
  3844. * 32-bit elements of Rd. If the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
  3845. * saturated to the range and the OV bit is set to 1. The results after saturation are written to the
  3846. * corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the most significant
  3847. * 32-bit of the 48-bit Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of
  3848. * the result before the addition operations.
  3849. *
  3850. * **Operations**:\n
  3851. * ~~~
  3852. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
  3853. * addop.W[x] = 0x7fffffff;
  3854. * OV = 1;
  3855. * } else {
  3856. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
  3857. * if (`.u` form) {
  3858. * Mres[x][47:14] = Mres[x][47:14] + 1;
  3859. * }
  3860. * addop.W[x] = Mres[x][46:15]; // doubling
  3861. * }
  3862. * res[x] = Rd.W[x] + addop.W[x];
  3863. * if (res[x] > (2^31)-1) {
  3864. * res[x] = (2^31)-1;
  3865. * OV = 1;
  3866. * } else if (res[x] < -2^31) {
  3867. * res[x] = -2^31;
  3868. * OV = 1;
  3869. * }
  3870. * Rd.W[x] = res[x];
  3871. * for RV32: x=0
  3872. * for RV64: x=1...0
  3873. * ~~~
  3874. *
  3875. * \param [in] t long type of value stored in t
  3876. * \param [in] a unsigned long type of value stored in a
  3877. * \param [in] b unsigned long type of value stored in b
  3878. * \return value stored in long type
  3879. */
  3880. __STATIC_FORCEINLINE long __RV_KMMAWT2_U(long t, unsigned long a, unsigned long b)
  3881. {
  3882. __ASM volatile("kmmawt2.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3883. return t;
  3884. }
  3885. /* ===== Inline Function End for 3.47.2. KMMAWT2.u ===== */
  3886. /* ===== Inline Function Start for 3.48.1. KMMSB ===== */
  3887. /**
  3888. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  3889. * \brief KMMSB (SIMD Saturating MSW Signed Multiply Word and Subtract)
  3890. * \details
  3891. * **Type**: SIMD
  3892. *
  3893. * **Syntax**:\n
  3894. * ~~~
  3895. * KMMSB Rd, Rs1, Rs2
  3896. * KMMSB.u Rd, Rs1, Rs2
  3897. * ~~~
  3898. *
  3899. * **Purpose**:\n
  3900. * Multiply the signed 32-bit integer elements of two registers and subtract the most
  3901. * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results
  3902. * are written to the third register. The `.u` form performs an additional rounding up operation on
  3903. * the multiplication results before subtracting the most significant 32-bit part of the results.
  3904. *
  3905. * **Description**:\n
  3906. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  3907. * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
  3908. * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
  3909. * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
  3910. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  3911. * adding a 1 to bit 31 of the results.
  3912. *
  3913. * **Operations**:\n
  3914. * ~~~
  3915. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  3916. * if (`.u` form) {
  3917. * Round[x][32:0] = Mres[x][63:31] + 1;
  3918. * res[x] = Rd.W[x] - Round[x][32:1];
  3919. * } else {
  3920. * res[x] = Rd.W[x] - Mres[x][63:32];
  3921. * }
  3922. * if (res[x] > (2^31)-1) {
  3923. * res[x] = (2^31)-1;
  3924. * OV = 1;
  3925. * } else if (res[x] < -2^31) {
  3926. * res[x] = -2^31;
  3927. * OV = 1;
  3928. * }
  3929. * Rd.W[x] = res[x];
  3930. * for RV32: x=0
  3931. * for RV64: x=1...0
  3932. * ~~~
  3933. *
  3934. * \param [in] t long type of value stored in t
  3935. * \param [in] a long type of value stored in a
  3936. * \param [in] b long type of value stored in b
  3937. * \return value stored in long type
  3938. */
  3939. __STATIC_FORCEINLINE long __RV_KMMSB(long t, long a, long b)
  3940. {
  3941. __ASM volatile("kmmsb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  3942. return t;
  3943. }
  3944. /* ===== Inline Function End for 3.48.1. KMMSB ===== */
  3945. /* ===== Inline Function Start for 3.48.2. KMMSB.u ===== */
  3946. /**
  3947. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  3948. * \brief KMMSB.u (SIMD Saturating MSW Signed Multiply Word and Subtraction with Rounding)
  3949. * \details
  3950. * **Type**: SIMD
  3951. *
  3952. * **Syntax**:\n
  3953. * ~~~
  3954. * KMMSB Rd, Rs1, Rs2
  3955. * KMMSB.u Rd, Rs1, Rs2
  3956. * ~~~
  3957. *
  3958. * **Purpose**:\n
  3959. * Multiply the signed 32-bit integer elements of two registers and subtract the most
  3960. * significant 32-bit results from the signed 32-bit elements of a third register. The subtraction results
  3961. * are written to the third register. The `.u` form performs an additional rounding up operation on
  3962. * the multiplication results before subtracting the most significant 32-bit part of the results.
  3963. *
  3964. * **Description**:\n
  3965. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  3966. * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
  3967. * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
  3968. * range and the OV bit is set to 1. The results after saturation are written to Rd. The `.u` form of the
  3969. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  3970. * adding a 1 to bit 31 of the results.
  3971. *
  3972. * **Operations**:\n
  3973. * ~~~
  3974. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  3975. * if (`.u` form) {
  3976. * Round[x][32:0] = Mres[x][63:31] + 1;
  3977. * res[x] = Rd.W[x] - Round[x][32:1];
  3978. * } else {
  3979. * res[x] = Rd.W[x] - Mres[x][63:32];
  3980. * }
  3981. * if (res[x] > (2^31)-1) {
  3982. * res[x] = (2^31)-1;
  3983. * OV = 1;
  3984. * } else if (res[x] < -2^31) {
  3985. * res[x] = -2^31;
  3986. * OV = 1;
  3987. * }
  3988. * Rd.W[x] = res[x];
  3989. * for RV32: x=0
  3990. * for RV64: x=1...0
  3991. * ~~~
  3992. *
  3993. * \param [in] t long type of value stored in t
  3994. * \param [in] a long type of value stored in a
  3995. * \param [in] b long type of value stored in b
  3996. * \return value stored in long type
  3997. */
  3998. __STATIC_FORCEINLINE long __RV_KMMSB_U(long t, long a, long b)
  3999. {
  4000. __ASM volatile("kmmsb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  4001. return t;
  4002. }
  4003. /* ===== Inline Function End for 3.48.2. KMMSB.u ===== */
  4004. /* ===== Inline Function Start for 3.49.1. KMMWB2 ===== */
  4005. /**
  4006. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  4007. * \brief KMMWB2 (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2)
  4008. * \details
  4009. * **Type**: SIMD
  4010. *
  4011. * **Syntax**:\n
  4012. * ~~~
  4013. * KMMWB2 Rd, Rs1, Rs2
  4014. * KMMWB2.u Rd, Rs1, Rs2
  4015. * ~~~
  4016. *
  4017. * **Purpose**:\n
  4018. * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
  4019. * corresponding 32-bit elements of another register, double the multiplication results and write the
  4020. * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
  4021. * form rounds up the results from the most significant discarded bit.
  4022. *
  4023. * **Description**:\n
  4024. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
  4025. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  4026. * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
  4027. * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
  4028. * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
  4029. *
  4030. * **Operations**:\n
  4031. * ~~~
  4032. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
  4033. * Rd.W[x] = 0x7fffffff;
  4034. * OV = 1;
  4035. * } else {
  4036. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
  4037. * if (`.u` form) {
  4038. * Round[x][32:0] = Mres[x][46:14] + 1;
  4039. * Rd.W[x] = Round[x][32:1];
  4040. * } else {
  4041. * Rd.W[x] = Mres[x][46:15];
  4042. * }
  4043. * }
  4044. * for RV32: x=0
  4045. * for RV64: x=1...0
  4046. * ~~~
  4047. *
  4048. * \param [in] a long type of value stored in a
  4049. * \param [in] b unsigned long type of value stored in b
  4050. * \return value stored in long type
  4051. */
  4052. __STATIC_FORCEINLINE long __RV_KMMWB2(long a, unsigned long b)
  4053. {
  4054. long result;
  4055. __ASM volatile("kmmwb2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4056. return result;
  4057. }
  4058. /* ===== Inline Function End for 3.49.1. KMMWB2 ===== */
  4059. /* ===== Inline Function Start for 3.49.2. KMMWB2.u ===== */
  4060. /**
  4061. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  4062. * \brief KMMWB2.u (SIMD Saturating MSW Signed Multiply Word and Bottom Half & 2 with Rounding)
  4063. * \details
  4064. * **Type**: SIMD
  4065. *
  4066. * **Syntax**:\n
  4067. * ~~~
  4068. * KMMWB2 Rd, Rs1, Rs2
  4069. * KMMWB2.u Rd, Rs1, Rs2
  4070. * ~~~
  4071. *
  4072. * **Purpose**:\n
  4073. * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
  4074. * corresponding 32-bit elements of another register, double the multiplication results and write the
  4075. * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
  4076. * form rounds up the results from the most significant discarded bit.
  4077. *
  4078. * **Description**:\n
  4079. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed bottom 16-bit Q15
  4080. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  4081. * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
  4082. * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
  4083. * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
  4084. *
  4085. * **Operations**:\n
  4086. * ~~~
  4087. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[0] == 0x8000)) {
  4088. * Rd.W[x] = 0x7fffffff;
  4089. * OV = 1;
  4090. * } else {
  4091. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[0];
  4092. * if (`.u` form) {
  4093. * Round[x][32:0] = Mres[x][46:14] + 1;
  4094. * Rd.W[x] = Round[x][32:1];
  4095. * } else {
  4096. * Rd.W[x] = Mres[x][46:15];
  4097. * }
  4098. * }
  4099. * for RV32: x=0
  4100. * for RV64: x=1...0
  4101. * ~~~
  4102. *
  4103. * \param [in] a long type of value stored in a
  4104. * \param [in] b unsigned long type of value stored in b
  4105. * \return value stored in long type
  4106. */
  4107. __STATIC_FORCEINLINE long __RV_KMMWB2_U(long a, unsigned long b)
  4108. {
  4109. long result;
  4110. __ASM volatile("kmmwb2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4111. return result;
  4112. }
  4113. /* ===== Inline Function End for 3.49.2. KMMWB2.u ===== */
  4114. /* ===== Inline Function Start for 3.50.1. KMMWT2 ===== */
  4115. /**
  4116. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  4117. * \brief KMMWT2 (SIMD Saturating MSW Signed Multiply Word and Top Half & 2)
  4118. * \details
  4119. * **Type**: SIMD
  4120. *
  4121. * **Syntax**:\n
  4122. * ~~~
  4123. * KMMWT2 Rd, Rs1, Rs2
  4124. * KMMWT2.u Rd, Rs1, Rs2
  4125. * ~~~
  4126. *
  4127. * **Purpose**:\n
  4128. * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
  4129. * corresponding 32-bit elements of another register, double the multiplication results and write the
  4130. * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
  4131. * form rounds up the results from the most significant discarded bit.
  4132. *
  4133. * **Description**:\n
  4134. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
  4135. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  4136. * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
  4137. * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
  4138. * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
  4139. *
  4140. * **Operations**:\n
  4141. * ~~~
  4142. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
  4143. * Rd.W[x] = 0x7fffffff;
  4144. * OV = 1;
  4145. * } else {
  4146. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
  4147. * if (`.u` form) {
  4148. * Round[x][32:0] = Mres[x][46:14] + 1;
  4149. * Rd.W[x] = Round[x][32:1];
  4150. * } else {
  4151. * Rd.W[x] = Mres[x][46:15];
  4152. * }
  4153. * }
  4154. * for RV32: x=0
  4155. * for RV64: x=1...0
  4156. * ~~~
  4157. *
  4158. * \param [in] a long type of value stored in a
  4159. * \param [in] b unsigned long type of value stored in b
  4160. * \return value stored in long type
  4161. */
  4162. __STATIC_FORCEINLINE long __RV_KMMWT2(long a, unsigned long b)
  4163. {
  4164. long result;
  4165. __ASM volatile("kmmwt2 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4166. return result;
  4167. }
  4168. /* ===== Inline Function End for 3.50.1. KMMWT2 ===== */
  4169. /* ===== Inline Function Start for 3.50.2. KMMWT2.u ===== */
  4170. /**
  4171. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  4172. * \brief KMMWT2.u (SIMD Saturating MSW Signed Multiply Word and Top Half & 2 with Rounding)
  4173. * \details
  4174. * **Type**: SIMD
  4175. *
  4176. * **Syntax**:\n
  4177. * ~~~
  4178. * KMMWT2 Rd, Rs1, Rs2
  4179. * KMMWT2.u Rd, Rs1, Rs2
  4180. * ~~~
  4181. *
  4182. * **Purpose**:\n
  4183. * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
  4184. * corresponding 32-bit elements of another register, double the multiplication results and write the
  4185. * saturated most significant 32-bit results to the corresponding 32-bit elements of a register. The `.u`
  4186. * form rounds up the results from the most significant discarded bit.
  4187. *
  4188. * **Description**:\n
  4189. * This instruction multiplies the signed 32-bit Q31 elements of Rs1 with the signed top 16-bit Q15
  4190. * content of the corresponding 32-bit elements of Rs2, doubles the Q46 results to Q47 numbers and
  4191. * writes the saturated most significant 32-bit Q31 multiplication results to the corresponding 32-bit
  4192. * elements of Rd. The `.u` form of the instruction rounds up the most significant 32-bit of the 48-bit
  4193. * Q47 multiplication results by adding a 1 to bit 15 (i.e., bit 14 before doubling) of the results.
  4194. *
  4195. * **Operations**:\n
  4196. * ~~~
  4197. * if ((Rs1.W[x] == 0x80000000) & (Rs2.W[x].H[1] == 0x8000)) {
  4198. * Rd.W[x] = 0x7fffffff;
  4199. * OV = 1;
  4200. * } else {
  4201. * Mres[x][47:0] = Rs1.W[x] s* Rs2.W[x].H[1];
  4202. * if (`.u` form) {
  4203. * Round[x][32:0] = Mres[x][46:14] + 1;
  4204. * Rd.W[x] = Round[x][32:1];
  4205. * } else {
  4206. * Rd.W[x] = Mres[x][46:15];
  4207. * }
  4208. * }
  4209. * for RV32: x=0
  4210. * for RV64: x=1...0
  4211. * ~~~
  4212. *
  4213. * \param [in] a long type of value stored in a
  4214. * \param [in] b unsigned long type of value stored in b
  4215. * \return value stored in long type
  4216. */
  4217. __STATIC_FORCEINLINE long __RV_KMMWT2_U(long a, unsigned long b)
  4218. {
  4219. long result;
  4220. __ASM volatile("kmmwt2.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4221. return result;
  4222. }
  4223. /* ===== Inline Function End for 3.50.2. KMMWT2.u ===== */
  4224. /* ===== Inline Function Start for 3.51.1. KMSDA ===== */
  4225. /**
  4226. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  4227. * \brief KMSDA (SIMD Saturating Signed Multiply Two Halfs & Add & Subtract)
  4228. * \details
  4229. * **Type**: SIMD
  4230. *
  4231. * **Syntax**:\n
  4232. * ~~~
  4233. * KMSDA Rd, Rs1, Rs2
  4234. * KMSXDA Rd, Rs1, Rs2
  4235. * ~~~
  4236. *
  4237. * **Purpose**:\n
  4238. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  4239. * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The
  4240. * subtraction result may be saturated.
  4241. * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element)
  4242. * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element)
  4243. *
  4244. * **Description**:\n
  4245. * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  4246. * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
  4247. * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  4248. * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  4249. * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
  4250. * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
  4251. * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32-
  4252. * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
  4253. * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The
  4254. * 16-bit contents are treated as signed integers.
  4255. *
  4256. * **Operations**:\n
  4257. * ~~~
  4258. * // KMSDA
  4259. * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  4260. * // KMSXDA
  4261. * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  4262. * if (res[x] > (2^31)-1) {
  4263. * res[x] = (2^31)-1;
  4264. * OV = 1;
  4265. * } else if (res[x] < -2^31) {
  4266. * res[x] = -2^31;
  4267. * OV = 1;
  4268. * }
  4269. * Rd.W[x] = res[x];
  4270. * for RV32: x=0
  4271. * for RV64: x=1...0
  4272. * ~~~
  4273. *
  4274. * \param [in] t long type of value stored in t
  4275. * \param [in] a unsigned long type of value stored in a
  4276. * \param [in] b unsigned long type of value stored in b
  4277. * \return value stored in long type
  4278. */
  4279. __STATIC_FORCEINLINE long __RV_KMSDA(long t, unsigned long a, unsigned long b)
  4280. {
  4281. __ASM volatile("kmsda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  4282. return t;
  4283. }
  4284. /* ===== Inline Function End for 3.51.1. KMSDA ===== */
  4285. /* ===== Inline Function Start for 3.51.2. KMSXDA ===== */
  4286. /**
  4287. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  4288. * \brief KMSXDA (SIMD Saturating Signed Crossed Multiply Two Halfs & Add & Subtract)
  4289. * \details
  4290. * **Type**: SIMD
  4291. *
  4292. * **Syntax**:\n
  4293. * ~~~
  4294. * KMSDA Rd, Rs1, Rs2
  4295. * KMSXDA Rd, Rs1, Rs2
  4296. * ~~~
  4297. *
  4298. * **Purpose**:\n
  4299. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  4300. * subtracts the two 32-bit results from the corresponding 32-bit elements of a third register. The
  4301. * subtraction result may be saturated.
  4302. * * KMSDA: rd.W[x] - top*top - bottom*bottom (per 32-bit element)
  4303. * * KMSXDA: rd.W[x] - top*bottom - bottom*top (per 32-bit element)
  4304. *
  4305. * **Description**:\n
  4306. * For the `KMSDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  4307. * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
  4308. * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  4309. * For the `KMSXDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  4310. * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
  4311. * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
  4312. * The two 32-bit multiplication results are then subtracted from the content of the corresponding 32-
  4313. * bit elements of Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is
  4314. * saturated to the range and the OV bit is set to 1. The results after saturation are written to Rd. The
  4315. * 16-bit contents are treated as signed integers.
  4316. *
  4317. * **Operations**:\n
  4318. * ~~~
  4319. * // KMSDA
  4320. * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  4321. * // KMSXDA
  4322. * res[x] = Rd.W[x] - (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  4323. * if (res[x] > (2^31)-1) {
  4324. * res[x] = (2^31)-1;
  4325. * OV = 1;
  4326. * } else if (res[x] < -2^31) {
  4327. * res[x] = -2^31;
  4328. * OV = 1;
  4329. * }
  4330. * Rd.W[x] = res[x];
  4331. * for RV32: x=0
  4332. * for RV64: x=1...0
  4333. * ~~~
  4334. *
  4335. * \param [in] t long type of value stored in t
  4336. * \param [in] a unsigned long type of value stored in a
  4337. * \param [in] b unsigned long type of value stored in b
  4338. * \return value stored in long type
  4339. */
  4340. __STATIC_FORCEINLINE long __RV_KMSXDA(long t, unsigned long a, unsigned long b)
  4341. {
  4342. __ASM volatile("kmsxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  4343. return t;
  4344. }
  4345. /* ===== Inline Function End for 3.51.2. KMSXDA ===== */
  4346. /* ===== Inline Function Start for 3.52. KMSR64 ===== */
  4347. /**
  4348. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  4349. * \brief KMSR64 (Signed Multiply and Saturating Subtract from 64-Bit Data)
  4350. * \details
  4351. * **Type**: DSP (64-bit Profile)
  4352. *
  4353. * **Syntax**:\n
  4354. * ~~~
  4355. * KMSR64 Rd, Rs1, Rs2
  4356. * ~~~
  4357. *
  4358. * **Purpose**:\n
  4359. * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication
  4360. * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
  4361. * saturated to the Q63 range and written back to the pair of registers (RV32) or the register (RV64).
  4362. *
  4363. * **RV32 Description**:\n
  4364. * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It
  4365. * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers
  4366. * specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the Q63
  4367. * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The saturated
  4368. * result is written back to the even/odd pair of registers specified by Rd(4,1).
  4369. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  4370. * includes register 2d and 2d+1.
  4371. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  4372. * of the pair contains the low 32-bit of the result.
  4373. *
  4374. * **RV64 Description**:\n
  4375. * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
  4376. * subtracts the 64-bit multiplication results from the 64-bit signed data in Rd with unlimited
  4377. * precision. If the 64-bit subtraction result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is
  4378. * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd.
  4379. *
  4380. * **Operations**:\n
  4381. * ~~~
  4382. * RV32:
  4383. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  4384. * result = R[t_H].R[t_L] - (Rs1 * Rs2);
  4385. * if (result > (2^63)-1) {
  4386. * result = (2^63)-1; OV = 1;
  4387. * } else if (result < -2^63) {
  4388. * result = -2^63; OV = 1;
  4389. * }
  4390. * R[t_H].R[t_L] = result;
  4391. * RV64:
  4392. * // `result` has unlimited precision
  4393. * result = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]);
  4394. * if (result > (2^63)-1) {
  4395. * result = (2^63)-1; OV = 1;
  4396. * } else if (result < -2^63) {
  4397. * result = -2^63; OV = 1;
  4398. * }
  4399. * Rd = result;
  4400. * ~~~
  4401. *
  4402. * \param [in] t long long type of value stored in t
  4403. * \param [in] a long type of value stored in a
  4404. * \param [in] b long type of value stored in b
  4405. * \return value stored in long long type
  4406. */
  4407. __STATIC_FORCEINLINE long long __RV_KMSR64(long long t, long a, long b)
  4408. {
  4409. __ASM volatile("kmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  4410. return t;
  4411. }
  4412. /* ===== Inline Function End for 3.52. KMSR64 ===== */
  4413. /* ===== Inline Function Start for 3.53. KSLLW ===== */
  4414. /**
  4415. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  4416. * \brief KSLLW (Saturating Shift Left Logical for Word)
  4417. * \details
  4418. * **Type**: DSP
  4419. *
  4420. * **Syntax**:\n
  4421. * ~~~
  4422. * KSLLW Rd, Rs1, Rs2
  4423. * ~~~
  4424. *
  4425. * **Purpose**:\n
  4426. * Do logical left shift operation with saturation on a 32-bit word. The shift amount is a
  4427. * variable from a GPR.
  4428. *
  4429. * **Description**:\n
  4430. * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with
  4431. * zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register. Any
  4432. * shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated
  4433. * to -2^31. And the saturated result is sign-extended and written to Rd. If any saturation is performed,
  4434. * set OV bit to 1.
  4435. *
  4436. * **Operations**:\n
  4437. * ~~~
  4438. * sa = Rs2[4:0];
  4439. * res[(31+sa):0] = Rs1.W[0] << sa;
  4440. * if (res > (2^31)-1) {
  4441. * res = 0x7fffffff; OV = 1;
  4442. * } else if (res < -2^31) {
  4443. * res = 0x80000000; OV = 1;
  4444. * }
  4445. * Rd[31:0] = res[31:0]; // RV32
  4446. * Rd[63:0] = SE(res[31:0]); // RV64
  4447. * ~~~
  4448. *
  4449. * \param [in] a long type of value stored in a
  4450. * \param [in] b unsigned int type of value stored in b
  4451. * \return value stored in long type
  4452. */
  4453. __STATIC_FORCEINLINE long __RV_KSLLW(long a, unsigned int b)
  4454. {
  4455. long result;
  4456. __ASM volatile("ksllw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4457. return result;
  4458. }
  4459. /* ===== Inline Function End for 3.53. KSLLW ===== */
  4460. /* ===== Inline Function Start for 3.54. KSLLIW ===== */
  4461. /**
  4462. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  4463. * \brief KSLLIW (Saturating Shift Left Logical Immediate for Word)
  4464. * \details
  4465. * **Type**: DSP
  4466. *
  4467. * **Syntax**:\n
  4468. * ~~~
  4469. * KSLLIW Rd, Rs1, imm5u
  4470. * ~~~
  4471. *
  4472. * **Purpose**:\n
  4473. * Do logical left shift operation with saturation on a 32-bit word. The shift amount is an
  4474. * immediate value.
  4475. *
  4476. * **Description**:\n
  4477. * The first word data in Rs1 is left-shifted logically. The shifted out bits are filled with
  4478. * zero and the shift amount is specified by the imm5u constant. Any shifted value greater than 2^31-1 is
  4479. * saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated result is
  4480. * sign-extended and written to Rd. If any saturation is performed, set OV bit to 1.
  4481. *
  4482. * **Operations**:\n
  4483. * ~~~
  4484. * sa = imm5u;
  4485. * res[(31+sa):0] = Rs1.W[0] << sa;
  4486. * if (res > (2^31)-1) {
  4487. * res = 0x7fffffff; OV = 1;
  4488. * } else if (res < -2^31) {
  4489. * res = 0x80000000; OV = 1;
  4490. * }
  4491. * Rd[31:0] = res[31:0]; // RV32
  4492. * Rd[63:0] = SE(res[31:0]); // RV64
  4493. * ~~~
  4494. *
  4495. * \param [in] a long type of value stored in a
  4496. * \param [in] b unsigned int type of value stored in b
  4497. * \return value stored in long type
  4498. */
  4499. #define __RV_KSLLIW(a, b) \
  4500. ({ \
  4501. long result; \
  4502. long __a = (long)(a); \
  4503. __ASM volatile("kslliw %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  4504. result; \
  4505. })
  4506. /* ===== Inline Function End for 3.54. KSLLIW ===== */
  4507. /* ===== Inline Function Start for 3.55. KSLL8 ===== */
  4508. /**
  4509. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  4510. * \brief KSLL8 (SIMD 8-bit Saturating Shift Left Logical)
  4511. * \details
  4512. * **Type**: SIMD
  4513. *
  4514. * **Syntax**:\n
  4515. * ~~~
  4516. * KSLL8 Rd, Rs1, Rs2
  4517. * ~~~
  4518. *
  4519. * **Purpose**:\n
  4520. * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift
  4521. * amount is a variable from a GPR.
  4522. *
  4523. * **Description**:\n
  4524. * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
  4525. * with zero and the shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
  4526. * Any shifted value greater than 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is
  4527. * saturated to -2^7. And the saturated results are written to Rd. If any saturation is performed, set OV
  4528. * bit to 1.
  4529. *
  4530. * **Operations**:\n
  4531. * ~~~
  4532. * sa = Rs2[2:0];
  4533. * if (sa != 0) {
  4534. * res[(7+sa):0] = Rs1.B[x] << sa;
  4535. * if (res > (2^7)-1) {
  4536. * res = 0x7f; OV = 1;
  4537. * } else if (res < -2^7) {
  4538. * res = 0x80; OV = 1;
  4539. * }
  4540. * Rd.B[x] = res[7:0];
  4541. * } else {
  4542. * Rd = Rs1;
  4543. * }
  4544. * for RV32: x=3...0,
  4545. * for RV64: x=7...0
  4546. * ~~~
  4547. *
  4548. * \param [in] a unsigned long type of value stored in a
  4549. * \param [in] b unsigned int type of value stored in b
  4550. * \return value stored in unsigned long type
  4551. */
  4552. __STATIC_FORCEINLINE unsigned long __RV_KSLL8(unsigned long a, unsigned int b)
  4553. {
  4554. unsigned long result;
  4555. __ASM volatile("ksll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4556. return result;
  4557. }
  4558. /* ===== Inline Function End for 3.55. KSLL8 ===== */
  4559. /* ===== Inline Function Start for 3.56. KSLLI8 ===== */
  4560. /**
  4561. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  4562. * \brief KSLLI8 (SIMD 8-bit Saturating Shift Left Logical Immediate)
  4563. * \details
  4564. * **Type**: SIMD
  4565. *
  4566. * **Syntax**:\n
  4567. * ~~~
  4568. * KSLLI8 Rd, Rs1, imm3u
  4569. * ~~~
  4570. *
  4571. * **Purpose**:\n
  4572. * Do 8-bit elements logical left shift operations with saturation simultaneously. The shift
  4573. * amount is an immediate value.
  4574. *
  4575. * **Description**:\n
  4576. * The 8-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
  4577. * with zero and the shift amount is specified by the imm3u constant. Any shifted value greater than
  4578. * 2^7-1 is saturated to 2^7-1. Any shifted value smaller than -2^7 is saturated to -2^7. And the saturated
  4579. * results are written to Rd. If any saturation is performed, set OV bit to 1.
  4580. *
  4581. * **Operations**:\n
  4582. * ~~~
  4583. * sa = imm3u[2:0];
  4584. * if (sa != 0) {
  4585. * res[(7+sa):0] = Rs1.B[x] << sa;
  4586. * if (res > (2^7)-1) {
  4587. * res = 0x7f; OV = 1;
  4588. * } else if (res < -2^7) {
  4589. * res = 0x80; OV = 1;
  4590. * }
  4591. * Rd.B[x] = res[7:0];
  4592. * } else {
  4593. * Rd = Rs1;
  4594. * }
  4595. * for RV32: x=3...0,
  4596. * for RV64: x=7...0
  4597. * ~~~
  4598. *
  4599. * \param [in] a unsigned long type of value stored in a
  4600. * \param [in] b unsigned int type of value stored in b
  4601. * \return value stored in unsigned long type
  4602. */
  4603. #define __RV_KSLLI8(a, b) \
  4604. ({ \
  4605. unsigned long result; \
  4606. unsigned long __a = (unsigned long)(a); \
  4607. __ASM volatile("kslli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  4608. result; \
  4609. })
  4610. /* ===== Inline Function End for 3.56. KSLLI8 ===== */
  4611. /* ===== Inline Function Start for 3.57. KSLL16 ===== */
  4612. /**
  4613. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  4614. * \brief KSLL16 (SIMD 16-bit Saturating Shift Left Logical)
  4615. * \details
  4616. * **Type**: SIMD
  4617. *
  4618. * **Syntax**:\n
  4619. * ~~~
  4620. * KSLL16 Rd, Rs1, Rs2
  4621. * ~~~
  4622. *
  4623. * **Purpose**:\n
  4624. * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift
  4625. * amount is a variable from a GPR.
  4626. *
  4627. * **Description**:\n
  4628. * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
  4629. * with zero and the shift amount is specified by the low-order 4-bits of the value in the Rs2 register.
  4630. * Any shifted value greater than 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is
  4631. * saturated to -2^15. And the saturated results are written to Rd. If any saturation is performed, set OV
  4632. * bit to 1.
  4633. *
  4634. * **Operations**:\n
  4635. * ~~~
  4636. * sa = Rs2[3:0];
  4637. * if (sa != 0) {
  4638. * res[(15+sa):0] = Rs1.H[x] << sa;
  4639. * if (res > (2^15)-1) {
  4640. * res = 0x7fff; OV = 1;
  4641. * } else if (res < -2^15) {
  4642. * res = 0x8000; OV = 1;
  4643. * }
  4644. * Rd.H[x] = res[15:0];
  4645. * } else {
  4646. * Rd = Rs1;
  4647. * }
  4648. * for RV32: x=1...0,
  4649. * for RV64: x=3...0
  4650. * ~~~
  4651. *
  4652. * \param [in] a unsigned long type of value stored in a
  4653. * \param [in] b unsigned int type of value stored in b
  4654. * \return value stored in unsigned long type
  4655. */
  4656. __STATIC_FORCEINLINE unsigned long __RV_KSLL16(unsigned long a, unsigned int b)
  4657. {
  4658. unsigned long result;
  4659. __ASM volatile("ksll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4660. return result;
  4661. }
  4662. /* ===== Inline Function End for 3.57. KSLL16 ===== */
  4663. /* ===== Inline Function Start for 3.58. KSLLI16 ===== */
  4664. /**
  4665. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  4666. * \brief KSLLI16 (SIMD 16-bit Saturating Shift Left Logical Immediate)
  4667. * \details
  4668. * **Type**: SIMD
  4669. *
  4670. * **Syntax**:\n
  4671. * ~~~
  4672. * KSLLI16 Rd, Rs1, imm4u
  4673. * ~~~
  4674. *
  4675. * **Purpose**:\n
  4676. * Do 16-bit elements logical left shift operations with saturation simultaneously. The shift
  4677. * amount is an immediate value.
  4678. *
  4679. * **Description**:\n
  4680. * The 16-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
  4681. * with zero and the shift amount is specified by the imm4u constant. Any shifted value greater than
  4682. * 2^15-1 is saturated to 2^15-1. Any shifted value smaller than -2^15 is saturated to -2^15. And the saturated
  4683. * results are written to Rd. If any saturation is performed, set OV bit to 1.
  4684. *
  4685. * **Operations**:\n
  4686. * ~~~
  4687. * sa = imm4u[3:0];
  4688. * if (sa != 0) {
  4689. * res[(15+sa):0] = Rs1.H[x] << sa;
  4690. * if (res > (2^15)-1) {
  4691. * res = 0x7fff; OV = 1;
  4692. * } else if (res < -2^15) {
  4693. * res = 0x8000; OV = 1;
  4694. * }
  4695. * Rd.H[x] = res[15:0];
  4696. * } else {
  4697. * Rd = Rs1;
  4698. * }
  4699. * for RV32: x=1...0,
  4700. * for RV64: x=3...0
  4701. * ~~~
  4702. *
  4703. * \param [in] a unsigned long type of value stored in a
  4704. * \param [in] b unsigned int type of value stored in b
  4705. * \return value stored in unsigned long type
  4706. */
  4707. #define __RV_KSLLI16(a, b) \
  4708. ({ \
  4709. unsigned long result; \
  4710. unsigned long __a = (unsigned long)(a); \
  4711. __ASM volatile("kslli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  4712. result; \
  4713. })
  4714. /* ===== Inline Function End for 3.58. KSLLI16 ===== */
  4715. /* ===== Inline Function Start for 3.59.1. KSLRA8 ===== */
  4716. /**
  4717. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  4718. * \brief KSLRA8 (SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
  4719. * \details
  4720. * **Type**: SIMD
  4721. *
  4722. * **Syntax**:\n
  4723. * ~~~
  4724. * KSLRA8 Rd, Rs1, Rs2
  4725. * KSLRA8.u Rd, Rs1, Rs2
  4726. * ~~~
  4727. *
  4728. * **Purpose**:\n
  4729. * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  4730. * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the
  4731. * right shift.
  4732. *
  4733. * **Description**:\n
  4734. * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  4735. * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
  4736. * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
  4737. * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
  4738. * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
  4739. * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form
  4740. * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
  4741. * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
  4742. * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
  4743. * this instruction.
  4744. *
  4745. * **Operations**:\n
  4746. * ~~~
  4747. * if (Rs2[3:0] < 0) {
  4748. * sa = -Rs2[3:0];
  4749. * sa = (sa == 8)? 7 : sa;
  4750. * if (`.u` form) {
  4751. * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
  4752. * Rd.B[x] = res[7:0];
  4753. * } else {
  4754. * Rd.B[x] = SE8(Rs1.B[x][7:sa]);
  4755. * }
  4756. * } else {
  4757. * sa = Rs2[2:0];
  4758. * res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
  4759. * if (res > (2^7)-1) {
  4760. * res[7:0] = 0x7f; OV = 1;
  4761. * } else if (res < -2^7) {
  4762. * res[7:0] = 0x80; OV = 1;
  4763. * }
  4764. * Rd.B[x] = res[7:0];
  4765. * }
  4766. * for RV32: x=3...0,
  4767. * for RV64: x=7...0
  4768. * ~~~
  4769. *
  4770. * \param [in] a unsigned long type of value stored in a
  4771. * \param [in] b int type of value stored in b
  4772. * \return value stored in unsigned long type
  4773. */
  4774. __STATIC_FORCEINLINE unsigned long __RV_KSLRA8(unsigned long a, int b)
  4775. {
  4776. unsigned long result;
  4777. __ASM volatile("kslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4778. return result;
  4779. }
  4780. /* ===== Inline Function End for 3.59.1. KSLRA8 ===== */
  4781. /* ===== Inline Function Start for 3.59.2. KSLRA8.u ===== */
  4782. /**
  4783. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  4784. * \brief KSLRA8.u (SIMD 8-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
  4785. * \details
  4786. * **Type**: SIMD
  4787. *
  4788. * **Syntax**:\n
  4789. * ~~~
  4790. * KSLRA8 Rd, Rs1, Rs2
  4791. * KSLRA8.u Rd, Rs1, Rs2
  4792. * ~~~
  4793. *
  4794. * **Purpose**:\n
  4795. * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  4796. * Q7 saturation for the left shift. The `.u` form performs additional rounding up operations for the
  4797. * right shift.
  4798. *
  4799. * **Description**:\n
  4800. * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  4801. * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
  4802. * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
  4803. * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
  4804. * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
  4805. * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1]. For the `.u` form
  4806. * of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
  4807. * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
  4808. * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
  4809. * this instruction.
  4810. *
  4811. * **Operations**:\n
  4812. * ~~~
  4813. * if (Rs2[3:0] < 0) {
  4814. * sa = -Rs2[3:0];
  4815. * sa = (sa == 8)? 7 : sa;
  4816. * if (`.u` form) {
  4817. * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
  4818. * Rd.B[x] = res[7:0];
  4819. * } else {
  4820. * Rd.B[x] = SE8(Rs1.B[x][7:sa]);
  4821. * }
  4822. * } else {
  4823. * sa = Rs2[2:0];
  4824. * res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
  4825. * if (res > (2^7)-1) {
  4826. * res[7:0] = 0x7f; OV = 1;
  4827. * } else if (res < -2^7) {
  4828. * res[7:0] = 0x80; OV = 1;
  4829. * }
  4830. * Rd.B[x] = res[7:0];
  4831. * }
  4832. * for RV32: x=3...0,
  4833. * for RV64: x=7...0
  4834. * ~~~
  4835. *
  4836. * \param [in] a unsigned long type of value stored in a
  4837. * \param [in] b int type of value stored in b
  4838. * \return value stored in unsigned long type
  4839. */
  4840. __STATIC_FORCEINLINE unsigned long __RV_KSLRA8_U(unsigned long a, int b)
  4841. {
  4842. unsigned long result;
  4843. __ASM volatile("kslra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4844. return result;
  4845. }
  4846. /* ===== Inline Function End for 3.59.2. KSLRA8.u ===== */
  4847. /* ===== Inline Function Start for 3.60.1. KSLRA16 ===== */
  4848. /**
  4849. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  4850. * \brief KSLRA16 (SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
  4851. * \details
  4852. * **Type**: SIMD
  4853. *
  4854. * **Syntax**:\n
  4855. * ~~~
  4856. * KSLRA16 Rd, Rs1, Rs2
  4857. * KSLRA16.u Rd, Rs1, Rs2
  4858. * ~~~
  4859. *
  4860. * **Purpose**:\n
  4861. * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  4862. * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the
  4863. * right shift.
  4864. *
  4865. * **Description**:\n
  4866. * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  4867. * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
  4868. * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
  4869. * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
  4870. * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
  4871. * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u`
  4872. * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
  4873. * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
  4874. * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
  4875. * this instruction.
  4876. *
  4877. * **Operations**:\n
  4878. * ~~~
  4879. * if (Rs2[4:0] < 0) {
  4880. * sa = -Rs2[4:0];
  4881. * sa = (sa == 16)? 15 : sa;
  4882. * if (`.u` form) {
  4883. * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
  4884. * Rd.H[x] = res[15:0];
  4885. * } else {
  4886. * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
  4887. * }
  4888. * } else {
  4889. * sa = Rs2[3:0];
  4890. * res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
  4891. * if (res > (2^15)-1) {
  4892. * res[15:0] = 0x7fff; OV = 1;
  4893. * } else if (res < -2^15) {
  4894. * res[15:0] = 0x8000; OV = 1;
  4895. * }
  4896. * d.H[x] = res[15:0];
  4897. * }
  4898. * for RV32: x=1...0,
  4899. * for RV64: x=3...0
  4900. * ~~~
  4901. *
  4902. * \param [in] a unsigned long type of value stored in a
  4903. * \param [in] b int type of value stored in b
  4904. * \return value stored in unsigned long type
  4905. */
  4906. __STATIC_FORCEINLINE unsigned long __RV_KSLRA16(unsigned long a, int b)
  4907. {
  4908. unsigned long result;
  4909. __ASM volatile("kslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4910. return result;
  4911. }
  4912. /* ===== Inline Function End for 3.60.1. KSLRA16 ===== */
  4913. /* ===== Inline Function Start for 3.60.2. KSLRA16.u ===== */
  4914. /**
  4915. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  4916. * \brief KSLRA16.u (SIMD 16-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
  4917. * \details
  4918. * **Type**: SIMD
  4919. *
  4920. * **Syntax**:\n
  4921. * ~~~
  4922. * KSLRA16 Rd, Rs1, Rs2
  4923. * KSLRA16.u Rd, Rs1, Rs2
  4924. * ~~~
  4925. *
  4926. * **Purpose**:\n
  4927. * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  4928. * Q15 saturation for the left shift. The `.u` form performs additional rounding up operations for the
  4929. * right shift.
  4930. *
  4931. * **Description**:\n
  4932. * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  4933. * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
  4934. * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
  4935. * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
  4936. * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
  4937. * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1]. For the `.u`
  4938. * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
  4939. * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
  4940. * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
  4941. * this instruction.
  4942. *
  4943. * **Operations**:\n
  4944. * ~~~
  4945. * if (Rs2[4:0] < 0) {
  4946. * sa = -Rs2[4:0];
  4947. * sa = (sa == 16)? 15 : sa;
  4948. * if (`.u` form) {
  4949. * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
  4950. * Rd.H[x] = res[15:0];
  4951. * } else {
  4952. * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
  4953. * }
  4954. * } else {
  4955. * sa = Rs2[3:0];
  4956. * res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
  4957. * if (res > (2^15)-1) {
  4958. * res[15:0] = 0x7fff; OV = 1;
  4959. * } else if (res < -2^15) {
  4960. * res[15:0] = 0x8000; OV = 1;
  4961. * }
  4962. * d.H[x] = res[15:0];
  4963. * }
  4964. * for RV32: x=1...0,
  4965. * for RV64: x=3...0
  4966. * ~~~
  4967. *
  4968. * \param [in] a unsigned long type of value stored in a
  4969. * \param [in] b int type of value stored in b
  4970. * \return value stored in unsigned long type
  4971. */
  4972. __STATIC_FORCEINLINE unsigned long __RV_KSLRA16_U(unsigned long a, int b)
  4973. {
  4974. unsigned long result;
  4975. __ASM volatile("kslra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  4976. return result;
  4977. }
  4978. /* ===== Inline Function End for 3.60.2. KSLRA16.u ===== */
  4979. /* ===== Inline Function Start for 3.61. KSLRAW ===== */
  4980. /**
  4981. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  4982. * \brief KSLRAW (Shift Left Logical with Q31 Saturation or Shift Right Arithmetic)
  4983. * \details
  4984. * **Type**: DSP
  4985. *
  4986. * **Syntax**:\n
  4987. * ~~~
  4988. * KSLRAW Rd, Rs1, Rs2
  4989. * ~~~
  4990. *
  4991. * **Purpose**:\n
  4992. * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31
  4993. * saturation for the left shift on a 32-bit data.
  4994. *
  4995. * **Description**:\n
  4996. * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically
  4997. * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
  4998. * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
  4999. * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31].
  5000. * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. After the shift
  5001. * operation, the final result is bit-31 sign-extended and written to Rd. If any saturation happens, this
  5002. * instruction sets the OV flag. The value of Rs2[31:6] will not affected the operation of this instruction.
  5003. *
  5004. * **Operations**:\n
  5005. * ~~~
  5006. * if (Rs2[5:0] < 0) {
  5007. * sa = -Rs2[5:0];
  5008. * sa = (sa == 32)? 31 : sa;
  5009. * res[31:0] = Rs1.W[0] >>(arith) sa;
  5010. * } else {
  5011. * sa = Rs2[5:0];
  5012. * tmp = Rs1.W[0] <<(logic) sa;
  5013. * if (tmp > (2^31)-1) {
  5014. * res[31:0] = (2^31)-1;
  5015. * OV = 1;
  5016. * } else if (tmp < -2^31) {
  5017. * res[31:0] = -2^31;
  5018. * OV = 1
  5019. * } else {
  5020. * res[31:0] = tmp[31:0];
  5021. * }
  5022. * }
  5023. * Rd = res[31:0]; // RV32
  5024. * Rd = SE64(res[31:0]); // RV64
  5025. * ~~~
  5026. *
  5027. * \param [in] a int type of value stored in a
  5028. * \param [in] b int type of value stored in b
  5029. * \return value stored in long type
  5030. */
  5031. __STATIC_FORCEINLINE long __RV_KSLRAW(int a, int b)
  5032. {
  5033. long result;
  5034. __ASM volatile("kslraw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5035. return result;
  5036. }
  5037. /* ===== Inline Function End for 3.61. KSLRAW ===== */
  5038. /* ===== Inline Function Start for 3.62. KSLRAW.u ===== */
  5039. /**
  5040. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  5041. * \brief KSLRAW.u (Shift Left Logical with Q31 Saturation or Rounding Shift Right Arithmetic)
  5042. * \details
  5043. * **Type**: DSP
  5044. *
  5045. * **Syntax**:\n
  5046. * ~~~
  5047. * KSLRAW.u Rd, Rs1, Rs2
  5048. * ~~~
  5049. *
  5050. * **Purpose**:\n
  5051. * Perform a logical left (positive) or arithmetic right (negative) shift operation with Q31
  5052. * saturation for the left shift and a rounding up operation for the right shift on a 32-bit data.
  5053. *
  5054. * **Description**:\n
  5055. * The lower 32-bit content of Rs1 is left-shifted logically or right-shifted arithmetically
  5056. * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
  5057. * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
  5058. * absolute value of Rs2[5:0] clamped to the actual shift range of [0, 31].
  5059. * The left-shifted result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. The right-shifted
  5060. * result is added a 1 to the most significant discarded bit position for rounding effect. After the shift,
  5061. * saturation, or rounding, the final result is bit-31 sign-extended and written to Rd. If any saturation
  5062. * happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect the operation of this
  5063. * instruction.
  5064. *
  5065. * **Operations**:\n
  5066. * ~~~
  5067. * if (Rs2[5:0] < 0) {
  5068. * sa = -Rs2[5:0];
  5069. * sa = (sa == 32)? 31 : sa;
  5070. * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
  5071. * rst[31:0] = res[31:0];
  5072. * } else {
  5073. * sa = Rs2[5:0];
  5074. * tmp = Rs1.W[0] <<(logic) sa;
  5075. * if (tmp > (2^31)-1) {
  5076. * rst[31:0] = (2^31)-1;
  5077. * OV = 1;
  5078. * } else if (tmp < -2^31) {
  5079. * rst[31:0] = -2^31;
  5080. * OV = 1
  5081. * } else {
  5082. * rst[31:0] = tmp[31:0];
  5083. * }
  5084. * }
  5085. * Rd = rst[31:0]; // RV32
  5086. * Rd = SE64(rst[31:0]); // RV64
  5087. * ~~~
  5088. *
  5089. * \param [in] a int type of value stored in a
  5090. * \param [in] b int type of value stored in b
  5091. * \return value stored in long type
  5092. */
  5093. __STATIC_FORCEINLINE long __RV_KSLRAW_U(int a, int b)
  5094. {
  5095. long result;
  5096. __ASM volatile("kslraw.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5097. return result;
  5098. }
  5099. /* ===== Inline Function End for 3.62. KSLRAW.u ===== */
  5100. /* ===== Inline Function Start for 3.63. KSTAS16 ===== */
  5101. /**
  5102. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  5103. * \brief KSTAS16 (SIMD 16-bit Signed Saturating Straight Addition & Subtraction)
  5104. * \details
  5105. * **Type**: SIMD
  5106. *
  5107. * **Syntax**:\n
  5108. * ~~~
  5109. * KSTAS16 Rd, Rs1, Rs2
  5110. * ~~~
  5111. *
  5112. * **Purpose**:\n
  5113. * Do 16-bit signed integer element saturating addition and 16-bit signed integer element
  5114. * saturating subtraction in a 32-bit chunk simultaneously. Operands are from corresponding
  5115. * positions in 32-bit chunks.
  5116. *
  5117. * **Description**:\n
  5118. * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
  5119. * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it
  5120. * subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed
  5121. * integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number
  5122. * range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated
  5123. * results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for
  5124. * subtraction.
  5125. *
  5126. * **Operations**:\n
  5127. * ~~~
  5128. * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
  5129. * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
  5130. * for (res in [res1, res2]) {
  5131. * if (res > (2^15)-1) {
  5132. * res = (2^15)-1;
  5133. * OV = 1;
  5134. * } else if (res < -2^15) {
  5135. * res = -2^15;
  5136. * OV = 1;
  5137. * }
  5138. * }
  5139. * Rd.W[x][31:16] = res1;
  5140. * Rd.W[x][15:0] = res2;
  5141. * for RV32, x=0
  5142. * for RV64, x=1...0
  5143. * ~~~
  5144. *
  5145. * \param [in] a unsigned long type of value stored in a
  5146. * \param [in] b unsigned long type of value stored in b
  5147. * \return value stored in unsigned long type
  5148. */
  5149. __STATIC_FORCEINLINE unsigned long __RV_KSTAS16(unsigned long a, unsigned long b)
  5150. {
  5151. unsigned long result;
  5152. __ASM volatile("kstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5153. return result;
  5154. }
  5155. /* ===== Inline Function End for 3.63. KSTAS16 ===== */
  5156. /* ===== Inline Function Start for 3.64. KSTSA16 ===== */
  5157. /**
  5158. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  5159. * \brief KSTSA16 (SIMD 16-bit Signed Saturating Straight Subtraction & Addition)
  5160. * \details
  5161. * **Type**: SIMD
  5162. *
  5163. * **Syntax**:\n
  5164. * ~~~
  5165. * KSTSA16 Rd, Rs1, Rs2
  5166. * ~~~
  5167. *
  5168. * **Purpose**:\n
  5169. * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element
  5170. * saturating addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in
  5171. * 32-bit chunks.
  5172. *
  5173. * **Description**:\n
  5174. * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks
  5175. * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it
  5176. * adds the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 with the 16-bit signed integer
  5177. * element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number range (-2^15
  5178. * <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
  5179. * written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd for
  5180. * addition.
  5181. *
  5182. * **Operations**:\n
  5183. * ~~~
  5184. * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
  5185. * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
  5186. * for (res in [res1, res2]) {
  5187. * if (res > (2^15)-1) {
  5188. * res = (2^15)-1;
  5189. * OV = 1;
  5190. * } else if (res < -2^15) {
  5191. * res = -2^15;
  5192. * OV = 1;
  5193. * }
  5194. * }
  5195. * Rd.W[x][31:16] = res1;
  5196. * Rd.W[x][15:0] = res2;
  5197. * for RV32, x=0
  5198. * for RV64, x=1...0
  5199. * ~~~
  5200. *
  5201. * \param [in] a unsigned long type of value stored in a
  5202. * \param [in] b unsigned long type of value stored in b
  5203. * \return value stored in unsigned long type
  5204. */
  5205. __STATIC_FORCEINLINE unsigned long __RV_KSTSA16(unsigned long a, unsigned long b)
  5206. {
  5207. unsigned long result;
  5208. __ASM volatile("kstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5209. return result;
  5210. }
  5211. /* ===== Inline Function End for 3.64. KSTSA16 ===== */
  5212. /* ===== Inline Function Start for 3.65. KSUB8 ===== */
  5213. /**
  5214. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  5215. * \brief KSUB8 (SIMD 8-bit Signed Saturating Subtraction)
  5216. * \details
  5217. * **Type**: SIMD
  5218. *
  5219. * **Syntax**:\n
  5220. * ~~~
  5221. * KSUB8 Rd, Rs1, Rs2
  5222. * ~~~
  5223. *
  5224. * **Purpose**:\n
  5225. * Do 8-bit signed elements saturating subtractions simultaneously.
  5226. *
  5227. * **Description**:\n
  5228. * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
  5229. * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 27
  5230. * -1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
  5231. *
  5232. * **Operations**:\n
  5233. * ~~~
  5234. * res[x] = Rs1.B[x] - Rs2.B[x];
  5235. * if (res[x] > (2^7)-1) {
  5236. * res[x] = (2^7)-1;
  5237. * OV = 1;
  5238. * } else if (res[x] < -2^7) {
  5239. * res[x] = -2^7;
  5240. * OV = 1;
  5241. * }
  5242. * Rd.B[x] = res[x];
  5243. * for RV32: x=3...0,
  5244. * for RV64: x=7...0
  5245. * ~~~
  5246. *
  5247. * \param [in] a unsigned long type of value stored in a
  5248. * \param [in] b unsigned long type of value stored in b
  5249. * \return value stored in unsigned long type
  5250. */
  5251. __STATIC_FORCEINLINE unsigned long __RV_KSUB8(unsigned long a, unsigned long b)
  5252. {
  5253. unsigned long result;
  5254. __ASM volatile("ksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5255. return result;
  5256. }
  5257. /* ===== Inline Function End for 3.65. KSUB8 ===== */
  5258. /* ===== Inline Function Start for 3.66. KSUB16 ===== */
  5259. /**
  5260. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  5261. * \brief KSUB16 (SIMD 16-bit Signed Saturating Subtraction)
  5262. * \details
  5263. * **Type**: SIMD
  5264. *
  5265. * **Syntax**:\n
  5266. * ~~~
  5267. * KSUB16 Rd, Rs1, Rs2
  5268. * ~~~
  5269. *
  5270. * **Purpose**:\n
  5271. * Do 16-bit signed integer elements saturating subtractions simultaneously.
  5272. *
  5273. * **Description**:\n
  5274. * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
  5275. * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <=
  5276. * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
  5277. * Rd.
  5278. *
  5279. * **Operations**:\n
  5280. * ~~~
  5281. * res[x] = Rs1.H[x] - Rs2.H[x];
  5282. * if (res[x] > (2^15)-1) {
  5283. * res[x] = (2^15)-1;
  5284. * OV = 1;
  5285. * } else if (res[x] < -2^15) {
  5286. * res[x] = -2^15;
  5287. * OV = 1;
  5288. * }
  5289. * Rd.H[x] = res[x];
  5290. * for RV32: x=1...0,
  5291. * for RV64: x=3...0
  5292. * ~~~
  5293. *
  5294. * \param [in] a unsigned long type of value stored in a
  5295. * \param [in] b unsigned long type of value stored in b
  5296. * \return value stored in unsigned long type
  5297. */
  5298. __STATIC_FORCEINLINE unsigned long __RV_KSUB16(unsigned long a, unsigned long b)
  5299. {
  5300. unsigned long result;
  5301. __ASM volatile("ksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5302. return result;
  5303. }
  5304. /* ===== Inline Function End for 3.66. KSUB16 ===== */
  5305. /* ===== Inline Function Start for 3.67. KSUB64 ===== */
  5306. /**
  5307. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  5308. * \brief KSUB64 (64-bit Signed Saturating Subtraction)
  5309. * \details
  5310. * **Type**: DSP (64-bit Profile)
  5311. *
  5312. * **Syntax**:\n
  5313. * ~~~
  5314. * KSUB64 Rd, Rs1, Rs2
  5315. * ~~~
  5316. *
  5317. * **Purpose**:\n
  5318. * Perform a 64-bit signed integer subtraction. The result is saturated to the Q63 range.
  5319. *
  5320. * **RV32 Description**:\n
  5321. * This instruction subtracts the 64-bit signed integer of an even/odd pair of
  5322. * registers specified by Rs2(4,1) from the 64-bit signed integer of an even/odd pair of registers
  5323. * specified by Rs1(4,1). If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is
  5324. * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd
  5325. * pair of registers specified by Rd(4,1).
  5326. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  5327. * includes register 2d and 2d+1.
  5328. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  5329. * register of the pair contains the low 32-bit of the operand.
  5330. *
  5331. * **RV64 Description**:\n
  5332. * This instruction subtracts the 64-bit signed integer of Rs2 from the 64-bit signed
  5333. * integer of Rs1. If the 64-bit result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated
  5334. * to the range and the OV bit is set to 1. The saturated result is then written to Rd.
  5335. *
  5336. * **Operations**:\n
  5337. * ~~~
  5338. * RV32:
  5339. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  5340. * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
  5341. * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
  5342. * result = R[a_H].R[a_L] - R[b_H].R[b_L];
  5343. * if (result > (2^63)-1) {
  5344. * result = (2^63)-1; OV = 1;
  5345. * } else if (result < -2^63) {
  5346. * result = -2^63; OV = 1;
  5347. * }
  5348. * R[t_H].R[t_L] = result;
  5349. * RV64:
  5350. * result = Rs1 - Rs2;
  5351. * if (result > (2^63)-1) {
  5352. * result = (2^63)-1; OV = 1;
  5353. * } else if (result < -2^63) {
  5354. * result = -2^63; OV = 1;
  5355. * }
  5356. * Rd = result;
  5357. * ~~~
  5358. *
  5359. * \param [in] a long long type of value stored in a
  5360. * \param [in] b long long type of value stored in b
  5361. * \return value stored in long long type
  5362. */
  5363. __STATIC_FORCEINLINE long long __RV_KSUB64(long long a, long long b)
  5364. {
  5365. long long result;
  5366. __ASM volatile("ksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5367. return result;
  5368. }
  5369. /* ===== Inline Function End for 3.67. KSUB64 ===== */
  5370. /* ===== Inline Function Start for 3.68. KSUBH ===== */
  5371. /**
  5372. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
  5373. * \brief KSUBH (Signed Subtraction with Q15 Saturation)
  5374. * \details
  5375. * **Type**: DSP
  5376. *
  5377. * **Syntax**:\n
  5378. * ~~~
  5379. * KSUBH Rd, Rs1, Rs2
  5380. * ~~~
  5381. *
  5382. * **Purpose**:\n
  5383. * Subtract the signed lower 32-bit content of two registers with Q15 saturation.
  5384. *
  5385. * **Description**:\n
  5386. * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit
  5387. * content of Rs1. And the result is saturated to the 16-bit signed integer range of [-2^15, 2^15-1] and then
  5388. * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
  5389. *
  5390. * **Operations**:\n
  5391. * ~~~
  5392. * tmp = Rs1.W[0] - Rs2.W[0];
  5393. * if (tmp > (2^15)-1) {
  5394. * res = (2^15)-1;
  5395. * OV = 1;
  5396. * } else if (tmp < -2^15) {
  5397. * res = -2^15;
  5398. * OV = 1
  5399. * } else {
  5400. * res = tmp;
  5401. * }
  5402. * Rd = SE(res[15:0]);
  5403. * ~~~
  5404. *
  5405. * \param [in] a int type of value stored in a
  5406. * \param [in] b int type of value stored in b
  5407. * \return value stored in long type
  5408. */
  5409. __STATIC_FORCEINLINE long __RV_KSUBH(int a, int b)
  5410. {
  5411. long result;
  5412. __ASM volatile("ksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5413. return result;
  5414. }
  5415. /* ===== Inline Function End for 3.68. KSUBH ===== */
  5416. /* ===== Inline Function Start for 3.69. KSUBW ===== */
  5417. /**
  5418. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  5419. * \brief KSUBW (Signed Subtraction with Q31 Saturation)
  5420. * \details
  5421. * **Type**: DSP
  5422. *
  5423. * **Syntax**:\n
  5424. * ~~~
  5425. * KSUBW Rd, Rs1, Rs2
  5426. * ~~~
  5427. *
  5428. * **Purpose**:\n
  5429. * Subtract the signed lower 32-bit content of two registers with Q31 saturation.
  5430. *
  5431. * **Description**:\n
  5432. * The signed lower 32-bit content of Rs2 is subtracted from the signed lower 32-bit
  5433. * content of Rs1. And the result is saturated to the 32-bit signed integer range of [-2^31, 2^31-1] and then
  5434. * sign-extened and written to Rd. If saturation happens, this instruction sets the OV flag.
  5435. *
  5436. * **Operations**:\n
  5437. * ~~~
  5438. * tmp = Rs1.W[0] - Rs2.W[0];
  5439. * if (tmp > (2^31)-1) {
  5440. * res = (2^31)-1;
  5441. * OV = 1;
  5442. * } else if (tmp < -2^31) {
  5443. * res = -2^31;
  5444. * OV = 1
  5445. * } else {
  5446. * res = tmp;
  5447. * }
  5448. * Rd = res[31:0]; // RV32
  5449. * Rd = SE(res[31:0]); // RV64
  5450. * ~~~
  5451. *
  5452. * \param [in] a int type of value stored in a
  5453. * \param [in] b int type of value stored in b
  5454. * \return value stored in long type
  5455. */
  5456. __STATIC_FORCEINLINE long __RV_KSUBW(int a, int b)
  5457. {
  5458. long result;
  5459. __ASM volatile("ksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5460. return result;
  5461. }
  5462. /* ===== Inline Function End for 3.69. KSUBW ===== */
  5463. /* ===== Inline Function Start for 3.70.1. KWMMUL ===== */
  5464. /**
  5465. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  5466. * \brief KWMMUL (SIMD Saturating MSW Signed Multiply Word & Double)
  5467. * \details
  5468. * **Type**: SIMD
  5469. *
  5470. * **Syntax**:\n
  5471. * ~~~
  5472. * KWMMUL Rd, Rs1, Rs2
  5473. * KWMMUL.u Rd, Rs1, Rs2
  5474. * ~~~
  5475. *
  5476. * **Purpose**:\n
  5477. * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit,
  5478. * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally
  5479. * rounds up the multiplication results from the most signification discarded bit.
  5480. *
  5481. * **Description**:\n
  5482. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
  5483. * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
  5484. * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
  5485. * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u`
  5486. * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
  5487. * 30 before the shift and saturation operations.
  5488. *
  5489. * **Operations**:\n
  5490. * ~~~
  5491. * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) {
  5492. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  5493. * if (`.u` form) {
  5494. * Round[x][33:0] = Mres[x][63:30] + 1;
  5495. * Rd.W[x] = Round[x][32:1];
  5496. * } else {
  5497. * Rd.W[x] = Mres[x][62:31];
  5498. * }
  5499. * } else {
  5500. * Rd.W[x] = 0x7fffffff;
  5501. * OV = 1;
  5502. * }
  5503. * for RV32: x=0
  5504. * for RV64: x=1...0
  5505. * ~~~
  5506. *
  5507. * \param [in] a long type of value stored in a
  5508. * \param [in] b long type of value stored in b
  5509. * \return value stored in long type
  5510. */
  5511. __STATIC_FORCEINLINE long __RV_KWMMUL(long a, long b)
  5512. {
  5513. long result;
  5514. __ASM volatile("kwmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5515. return result;
  5516. }
  5517. /* ===== Inline Function End for 3.70.1. KWMMUL ===== */
  5518. /* ===== Inline Function Start for 3.70.2. KWMMUL.u ===== */
  5519. /**
  5520. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  5521. * \brief KWMMUL.u (SIMD Saturating MSW Signed Multiply Word & Double with Rounding)
  5522. * \details
  5523. * **Type**: SIMD
  5524. *
  5525. * **Syntax**:\n
  5526. * ~~~
  5527. * KWMMUL Rd, Rs1, Rs2
  5528. * KWMMUL.u Rd, Rs1, Rs2
  5529. * ~~~
  5530. *
  5531. * **Purpose**:\n
  5532. * Multiply the signed 32-bit integer elements of two registers, shift the results left 1-bit,
  5533. * saturate, and write the most significant 32-bit results to a register. The `.u` form additionally
  5534. * rounds up the multiplication results from the most signification discarded bit.
  5535. *
  5536. * **Description**:\n
  5537. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
  5538. * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
  5539. * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
  5540. * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The `.u`
  5541. * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
  5542. * 30 before the shift and saturation operations.
  5543. *
  5544. * **Operations**:\n
  5545. * ~~~
  5546. * if ((0x80000000 != Rs1.W[x]) | (0x80000000 != Rs2.W[x])) {
  5547. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  5548. * if (`.u` form) {
  5549. * Round[x][33:0] = Mres[x][63:30] + 1;
  5550. * Rd.W[x] = Round[x][32:1];
  5551. * } else {
  5552. * Rd.W[x] = Mres[x][62:31];
  5553. * }
  5554. * } else {
  5555. * Rd.W[x] = 0x7fffffff;
  5556. * OV = 1;
  5557. * }
  5558. * for RV32: x=0
  5559. * for RV64: x=1...0
  5560. * ~~~
  5561. *
  5562. * \param [in] a long type of value stored in a
  5563. * \param [in] b long type of value stored in b
  5564. * \return value stored in long type
  5565. */
  5566. __STATIC_FORCEINLINE long __RV_KWMMUL_U(long a, long b)
  5567. {
  5568. long result;
  5569. __ASM volatile("kwmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5570. return result;
  5571. }
  5572. /* ===== Inline Function End for 3.70.2. KWMMUL.u ===== */
  5573. /* ===== Inline Function Start for 3.71. MADDR32 ===== */
  5574. /**
  5575. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  5576. * \brief MADDR32 (Multiply and Add to 32-Bit Word)
  5577. * \details
  5578. * **Type**: DSP
  5579. *
  5580. * **Syntax**:\n
  5581. * ~~~
  5582. * MADDR32 Rd, Rs1, Rs2
  5583. * ~~~
  5584. *
  5585. * **Purpose**:\n
  5586. * Multiply the 32-bit contents of two registers and add the lower 32-bit multiplication result
  5587. * to the 32-bit content of a destination register. Write the final result back to the destination register.
  5588. *
  5589. * **Description**:\n
  5590. * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2. It adds the
  5591. * lower 32-bit multiplication result to the lower 32-bit content of Rd and writes the final result (RV32)
  5592. * or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either signed or
  5593. * unsigned integers.
  5594. *
  5595. * **Operations**:\n
  5596. * ~~~
  5597. * RV32:
  5598. * Mresult = Rs1 * Rs2;
  5599. * Rd = Rd + Mresult.W[0];
  5600. * RV64:
  5601. * Mresult = Rs1.W[0] * Rs2.W[0];
  5602. * tres[31:0] = Rd.W[0] + Mresult.W[0];
  5603. * Rd = SE64(tres[31:0]);
  5604. * ~~~
  5605. *
  5606. * \param [in] t unsigned long type of value stored in t
  5607. * \param [in] a unsigned long type of value stored in a
  5608. * \param [in] b unsigned long type of value stored in b
  5609. * \return value stored in unsigned long type
  5610. */
  5611. __STATIC_FORCEINLINE unsigned long __RV_MADDR32(unsigned long t, unsigned long a, unsigned long b)
  5612. {
  5613. __ASM volatile("maddr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  5614. return t;
  5615. }
  5616. /* ===== Inline Function End for 3.71. MADDR32 ===== */
  5617. /* ===== Inline Function Start for 3.72. MAXW ===== */
  5618. /**
  5619. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  5620. * \brief MAXW (32-bit Signed Word Maximum)
  5621. * \details
  5622. * **Type**: DSP
  5623. *
  5624. * **Syntax**:\n
  5625. * ~~~
  5626. * MAXW Rd, Rs1, Rs2
  5627. * ~~~
  5628. *
  5629. * **Purpose**:\n
  5630. * Get the larger value from the 32-bit contents of two general registers.
  5631. *
  5632. * **Description**:\n
  5633. * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the
  5634. * larger value as the result, and writes the result to Rd.
  5635. *
  5636. * **Operations**:\n
  5637. * ~~~
  5638. * if (Rs1.W[0] >= Rs2.W[0]) {
  5639. * Rd = SE(Rs1.W[0]);
  5640. * } else {
  5641. * Rd = SE(Rs2.W[0]);
  5642. * }
  5643. * ~~~
  5644. *
  5645. * \param [in] a int type of value stored in a
  5646. * \param [in] b int type of value stored in b
  5647. * \return value stored in long type
  5648. */
  5649. __STATIC_FORCEINLINE long __RV_MAXW(int a, int b)
  5650. {
  5651. long result;
  5652. __ASM volatile("maxw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5653. return result;
  5654. }
  5655. /* ===== Inline Function End for 3.72. MAXW ===== */
  5656. /* ===== Inline Function Start for 3.73. MINW ===== */
  5657. /**
  5658. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  5659. * \brief MINW (32-bit Signed Word Minimum)
  5660. * \details
  5661. * **Type**: DSP
  5662. *
  5663. * **Syntax**:\n
  5664. * ~~~
  5665. * MINW Rd, Rs1, Rs2
  5666. * ~~~
  5667. *
  5668. * **Purpose**:\n
  5669. * Get the smaller value from the 32-bit contents of two general registers.
  5670. *
  5671. * **Description**:\n
  5672. * This instruction compares two signed 32-bit integers stored in Rs1 and Rs2, picks the
  5673. * smaller value as the result, and writes the result to Rd.
  5674. *
  5675. * **Operations**:\n
  5676. * ~~~
  5677. * if (Rs1.W[0] >= Rs2.W[0]) { Rd = SE(Rs2.W[0]); } else { Rd = SE(Rs1.W[0]); }
  5678. * ~~~
  5679. *
  5680. * \param [in] a int type of value stored in a
  5681. * \param [in] b int type of value stored in b
  5682. * \return value stored in long type
  5683. */
  5684. __STATIC_FORCEINLINE long __RV_MINW(int a, int b)
  5685. {
  5686. long result;
  5687. __ASM volatile("minw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5688. return result;
  5689. }
  5690. /* ===== Inline Function End for 3.73. MINW ===== */
  5691. /* ===== Inline Function Start for 3.74. MSUBR32 ===== */
  5692. /**
  5693. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  5694. * \brief MSUBR32 (Multiply and Subtract from 32-Bit Word)
  5695. * \details
  5696. * **Type**: DSP
  5697. *
  5698. * **Syntax**:\n
  5699. * ~~~
  5700. * MSUBR32 Rd, Rs1, Rs2
  5701. * ~~~
  5702. *
  5703. * **Purpose**:\n
  5704. * Multiply the 32-bit contents of two registers and subtract the lower 32-bit multiplication
  5705. * result from the 32-bit content of a destination register. Write the final result back to the destination
  5706. * register.
  5707. *
  5708. * **Description**:\n
  5709. * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2, subtracts
  5710. * the lower 32-bit multiplication result from the lower 32-bit content of Rd, then writes the final
  5711. * result (RV32) or sign-extended result (RV64) back to Rd. The contents of Rs1 and Rs2 can be either
  5712. * signed or unsigned integers.
  5713. *
  5714. * **Operations**:\n
  5715. * ~~~
  5716. * RV32:
  5717. * Mresult = Rs1 * Rs2;
  5718. * Rd = Rd - Mresult.W[0];
  5719. * RV64:
  5720. * Mresult = Rs1.W[0] * Rs2.W[0];
  5721. * tres[31:0] = Rd.W[0] - Mresult.W[0];
  5722. * Rd = SE64(tres[31:0]);
  5723. * ~~~
  5724. *
  5725. * \param [in] t unsigned long type of value stored in t
  5726. * \param [in] a unsigned long type of value stored in a
  5727. * \param [in] b unsigned long type of value stored in b
  5728. * \return value stored in unsigned long type
  5729. */
  5730. __STATIC_FORCEINLINE unsigned long __RV_MSUBR32(unsigned long t, unsigned long a, unsigned long b)
  5731. {
  5732. __ASM volatile("msubr32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  5733. return t;
  5734. }
  5735. /* ===== Inline Function End for 3.74. MSUBR32 ===== */
  5736. /* ===== Inline Function Start for 3.75. MULR64 ===== */
  5737. /**
  5738. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  5739. * \brief MULR64 (Multiply Word Unsigned to 64-bit Data)
  5740. * \details
  5741. * **Type**: DSP
  5742. *
  5743. * **Syntax**:\n
  5744. * ~~~
  5745. * MULR64 Rd, Rs1, Rs2
  5746. * ~~~
  5747. *
  5748. * **Purpose**:\n
  5749. * Multiply the 32-bit unsigned integer contents of two registers and write the 64-bit result.
  5750. *
  5751. * **RV32 Description**:\n
  5752. * This instruction multiplies the 32-bit content of Rs1 with that of Rs2 and writes the 64-bit
  5753. * multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d determines the
  5754. * even/odd pair group of the two registers. Specifically, the register pair includes register 2d and
  5755. * 2d+1.
  5756. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  5757. * of the pair contains the low 32-bit of the result.
  5758. * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers.
  5759. *
  5760. * **RV64 Description**:\n
  5761. * This instruction multiplies the lower 32-bit content of Rs1 with that of Rs2 and writes the 64-bit
  5762. * multiplication result to Rd.
  5763. * The lower 32-bit contents of Rs1 and Rs2 are treated as unsigned integers.
  5764. *
  5765. * **Operations**:\n
  5766. * ~~~
  5767. * RV32:
  5768. * Mresult = CONCAT(1`b0,Rs1) u* CONCAT(1`b0,Rs2);
  5769. * R[Rd(4,1).1(0)][31:0] = Mresult[63:32];
  5770. * R[Rd(4,1).0(0)][31:0] = Mresult[31:0];
  5771. * RV64:
  5772. * Rd = Mresult[63:0];
  5773. * Mresult = CONCAT(1`b0,Rs1.W[0]) u* CONCAT(1`b0,Rs2.W[0]);
  5774. * ~~~
  5775. *
  5776. * \param [in] a unsigned long type of value stored in a
  5777. * \param [in] b unsigned long type of value stored in b
  5778. * \return value stored in unsigned long long type
  5779. */
  5780. __STATIC_FORCEINLINE unsigned long long __RV_MULR64(unsigned long a, unsigned long b)
  5781. {
  5782. unsigned long long result;
  5783. __ASM volatile("mulr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5784. return result;
  5785. }
  5786. /* ===== Inline Function End for 3.75. MULR64 ===== */
  5787. /* ===== Inline Function Start for 3.76. MULSR64 ===== */
  5788. /**
  5789. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  5790. * \brief MULSR64 (Multiply Word Signed to 64-bit Data)
  5791. * \details
  5792. * **Type**: DSP
  5793. *
  5794. * **Syntax**:\n
  5795. * ~~~
  5796. * MULSR64 Rd, Rs1, Rs2
  5797. * ~~~
  5798. *
  5799. * **Purpose**:\n
  5800. * Multiply the 32-bit signed integer contents of two registers and write the 64-bit result.
  5801. *
  5802. * **RV32 Description**:\n
  5803. * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and
  5804. * writes the 64-bit multiplication result to an even/odd pair of registers containing Rd. Rd(4,1) index d
  5805. * determines the even/odd pair group of the two registers. Specifically, the register pair includes
  5806. * register 2d and 2d+1.
  5807. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  5808. * of the pair contains the low 32-bit of the result.
  5809. * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  5810. *
  5811. * **RV64 Description**:\n
  5812. * This instruction multiplies the lower 32-bit content of Rs1 with the lower 32-bit content of Rs2 and
  5813. * writes the 64-bit multiplication result to Rd.
  5814. * The lower 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  5815. *
  5816. * **Operations**:\n
  5817. * ~~~
  5818. * RV32:
  5819. * Mresult = Ra s* Rb;
  5820. * R[Rd(4,1).1(0)][31:0] = Mresult[63:32];
  5821. * R[Rd(4,1).0(0)][31:0] = Mresult[31:0];
  5822. * RV64:
  5823. * Mresult = Ra.W[0] s* Rb.W[0];
  5824. * Rd = Mresult[63:0];
  5825. * ~~~
  5826. *
  5827. * \param [in] a long type of value stored in a
  5828. * \param [in] b long type of value stored in b
  5829. * \return value stored in long long type
  5830. */
  5831. __STATIC_FORCEINLINE long long __RV_MULSR64(long a, long b)
  5832. {
  5833. long long result;
  5834. __ASM volatile("mulsr64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5835. return result;
  5836. }
  5837. /* ===== Inline Function End for 3.76. MULSR64 ===== */
  5838. /* ===== Inline Function Start for 3.77. PBSAD ===== */
  5839. /**
  5840. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
  5841. * \brief PBSAD (Parallel Byte Sum of Absolute Difference)
  5842. * \details
  5843. * **Type**: DSP
  5844. *
  5845. * **Syntax**:\n
  5846. * ~~~
  5847. * PBSAD Rd, Rs1, Rs2
  5848. * ~~~
  5849. *
  5850. * **Purpose**:\n
  5851. * Calculate the sum of absolute difference of unsigned 8-bit data elements.
  5852. *
  5853. * **Description**:\n
  5854. * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. Then
  5855. * it adds the absolute value of each difference together and writes the result to Rd.
  5856. *
  5857. * **Operations**:\n
  5858. * ~~~
  5859. * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]);
  5860. * Rd = SUM(absdiff[x]);
  5861. * for RV32: x=3...0,
  5862. * for RV64: x=7...0
  5863. * ~~~
  5864. *
  5865. * \param [in] a unsigned long type of value stored in a
  5866. * \param [in] b unsigned long type of value stored in b
  5867. * \return value stored in unsigned long type
  5868. */
  5869. __STATIC_FORCEINLINE unsigned long __RV_PBSAD(unsigned long a, unsigned long b)
  5870. {
  5871. unsigned long result;
  5872. __ASM volatile("pbsad %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5873. return result;
  5874. }
  5875. /* ===== Inline Function End for 3.77. PBSAD ===== */
  5876. /* ===== Inline Function Start for 3.78. PBSADA ===== */
  5877. /**
  5878. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
  5879. * \brief PBSADA (Parallel Byte Sum of Absolute Difference Accum)
  5880. * \details
  5881. * **Type**: DSP
  5882. *
  5883. * **Syntax**:\n
  5884. * ~~~
  5885. * PBSADA Rd, Rs1, Rs2
  5886. * ~~~
  5887. *
  5888. * **Purpose**:\n
  5889. * Calculate the sum of absolute difference of four unsigned 8-bit data elements and
  5890. * accumulate it into a register.
  5891. *
  5892. * **Description**:\n
  5893. * This instruction subtracts the un-signed 8-bit elements of Rs2 from those of Rs1. It
  5894. * then adds the absolute value of each difference together along with the content of Rd and writes the
  5895. * accumulated result back to Rd.
  5896. *
  5897. * **Operations**:\n
  5898. * ~~~
  5899. * absdiff[x] = ABS(Rs1.B[x] - Rs2.B[x]);
  5900. * Rd = Rd + SUM(absdiff[x]);
  5901. * for RV32: x=3...0,
  5902. * for RV64: x=7...0
  5903. * ~~~
  5904. *
  5905. * \param [in] t unsigned long type of value stored in t
  5906. * \param [in] a unsigned long type of value stored in a
  5907. * \param [in] b unsigned long type of value stored in b
  5908. * \return value stored in unsigned long type
  5909. */
  5910. __STATIC_FORCEINLINE unsigned long __RV_PBSADA(unsigned long t, unsigned long a, unsigned long b)
  5911. {
  5912. __ASM volatile("pbsada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  5913. return t;
  5914. }
  5915. /* ===== Inline Function End for 3.78. PBSADA ===== */
  5916. /* ===== Inline Function Start for 3.79.1. PKBB16 ===== */
  5917. /**
  5918. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
  5919. * \brief PKBB16 (Pack Two 16-bit Data from Both Bottom Half)
  5920. * \details
  5921. * **Type**: DSP
  5922. *
  5923. * **Syntax**:\n
  5924. * ~~~
  5925. * PKBB16 Rd, Rs1, Rs2
  5926. * PKBT16 Rd, Rs1, Rs2
  5927. * PKTT16 Rd, Rs1, Rs2
  5928. * PKTB16 Rd, Rs1, Rs2
  5929. * ~~~
  5930. *
  5931. * **Purpose**:\n
  5932. * Pack 16-bit data from 32-bit chunks in two registers.
  5933. * * PKBB16: bottom.bottom
  5934. * * PKBT16 bottom.top
  5935. * * PKTT16 top.top
  5936. * * PKTB16 top.bottom
  5937. *
  5938. * **Description**:\n
  5939. * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
  5940. * Rd.W[x] [15:0].
  5941. * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  5942. * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  5943. * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
  5944. *
  5945. * **Operations**:\n
  5946. * ~~~
  5947. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
  5948. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
  5949. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
  5950. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
  5951. * for RV32: x=0,
  5952. * for RV64: x=1...0
  5953. * ~~~
  5954. *
  5955. * \param [in] a unsigned long type of value stored in a
  5956. * \param [in] b unsigned long type of value stored in b
  5957. * \return value stored in unsigned long type
  5958. */
  5959. __STATIC_FORCEINLINE unsigned long __RV_PKBB16(unsigned long a, unsigned long b)
  5960. {
  5961. unsigned long result;
  5962. __ASM volatile("pkbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  5963. return result;
  5964. }
  5965. /* ===== Inline Function End for 3.79.1. PKBB16 ===== */
  5966. /* ===== Inline Function Start for 3.79.2. PKBT16 ===== */
  5967. /**
  5968. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
  5969. * \brief PKBT16 (Pack Two 16-bit Data from Bottom and Top Half)
  5970. * \details
  5971. * **Type**: DSP
  5972. *
  5973. * **Syntax**:\n
  5974. * ~~~
  5975. * PKBB16 Rd, Rs1, Rs2
  5976. * PKBT16 Rd, Rs1, Rs2
  5977. * PKTT16 Rd, Rs1, Rs2
  5978. * PKTB16 Rd, Rs1, Rs2
  5979. * ~~~
  5980. *
  5981. * **Purpose**:\n
  5982. * Pack 16-bit data from 32-bit chunks in two registers.
  5983. * * PKBB16: bottom.bottom
  5984. * * PKBT16 bottom.top
  5985. * * PKTT16 top.top
  5986. * * PKTB16 top.bottom
  5987. *
  5988. * **Description**:\n
  5989. * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
  5990. * Rd.W[x] [15:0].
  5991. * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  5992. * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  5993. * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
  5994. *
  5995. * **Operations**:\n
  5996. * ~~~
  5997. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
  5998. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
  5999. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
  6000. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
  6001. * for RV32: x=0,
  6002. * for RV64: x=1...0
  6003. * ~~~
  6004. *
  6005. * \param [in] a unsigned long type of value stored in a
  6006. * \param [in] b unsigned long type of value stored in b
  6007. * \return value stored in unsigned long type
  6008. */
  6009. __STATIC_FORCEINLINE unsigned long __RV_PKBT16(unsigned long a, unsigned long b)
  6010. {
  6011. unsigned long result;
  6012. __ASM volatile("pkbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6013. return result;
  6014. }
  6015. /* ===== Inline Function End for 3.79.2. PKBT16 ===== */
  6016. /* ===== Inline Function Start for 3.79.3. PKTT16 ===== */
  6017. /**
  6018. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
  6019. * \brief PKTT16 (Pack Two 16-bit Data from Both Top Half)
  6020. * \details
  6021. * **Type**: DSP
  6022. *
  6023. * **Syntax**:\n
  6024. * ~~~
  6025. * PKBB16 Rd, Rs1, Rs2
  6026. * PKBT16 Rd, Rs1, Rs2
  6027. * PKTT16 Rd, Rs1, Rs2
  6028. * PKTB16 Rd, Rs1, Rs2
  6029. * ~~~
  6030. *
  6031. * **Purpose**:\n
  6032. * Pack 16-bit data from 32-bit chunks in two registers.
  6033. * * PKBB16: bottom.bottom
  6034. * * PKBT16 bottom.top
  6035. * * PKTT16 top.top
  6036. * * PKTB16 top.bottom
  6037. *
  6038. * **Description**:\n
  6039. * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
  6040. * Rd.W[x] [15:0].
  6041. * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  6042. * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  6043. * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
  6044. *
  6045. * **Operations**:\n
  6046. * ~~~
  6047. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
  6048. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
  6049. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
  6050. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
  6051. * for RV32: x=0,
  6052. * for RV64: x=1...0
  6053. * ~~~
  6054. *
  6055. * \param [in] a unsigned long type of value stored in a
  6056. * \param [in] b unsigned long type of value stored in b
  6057. * \return value stored in unsigned long type
  6058. */
  6059. __STATIC_FORCEINLINE unsigned long __RV_PKTT16(unsigned long a, unsigned long b)
  6060. {
  6061. unsigned long result;
  6062. __ASM volatile("pktt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6063. return result;
  6064. }
  6065. /* ===== Inline Function End for 3.79.3. PKTT16 ===== */
  6066. /* ===== Inline Function Start for 3.79.4. PKTB16 ===== */
  6067. /**
  6068. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_PACK
  6069. * \brief PKTB16 (Pack Two 16-bit Data from Top and Bottom Half)
  6070. * \details
  6071. * **Type**: DSP
  6072. *
  6073. * **Syntax**:\n
  6074. * ~~~
  6075. * PKBB16 Rd, Rs1, Rs2
  6076. * PKBT16 Rd, Rs1, Rs2
  6077. * PKTT16 Rd, Rs1, Rs2
  6078. * PKTB16 Rd, Rs1, Rs2
  6079. * ~~~
  6080. *
  6081. * **Purpose**:\n
  6082. * Pack 16-bit data from 32-bit chunks in two registers.
  6083. * * PKBB16: bottom.bottom
  6084. * * PKBT16 bottom.top
  6085. * * PKTT16 top.top
  6086. * * PKTB16 top.bottom
  6087. *
  6088. * **Description**:\n
  6089. * (PKBB16) moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to
  6090. * Rd.W[x] [15:0].
  6091. * (PKBT16) moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  6092. * (PKTT16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  6093. * (PKTB16) moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
  6094. *
  6095. * **Operations**:\n
  6096. * ~~~
  6097. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]); // PKBB16
  6098. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]); // PKBT16
  6099. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]); // PKTB16
  6100. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]); // PKTT16
  6101. * for RV32: x=0,
  6102. * for RV64: x=1...0
  6103. * ~~~
  6104. *
  6105. * \param [in] a unsigned long type of value stored in a
  6106. * \param [in] b unsigned long type of value stored in b
  6107. * \return value stored in unsigned long type
  6108. */
  6109. __STATIC_FORCEINLINE unsigned long __RV_PKTB16(unsigned long a, unsigned long b)
  6110. {
  6111. unsigned long result;
  6112. __ASM volatile("pktb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6113. return result;
  6114. }
  6115. /* ===== Inline Function End for 3.79.4. PKTB16 ===== */
  6116. /* ===== Inline Function Start for 3.80. RADD8 ===== */
  6117. /**
  6118. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  6119. * \brief RADD8 (SIMD 8-bit Signed Halving Addition)
  6120. * \details
  6121. * **Type**: SIMD
  6122. *
  6123. * **Syntax**:\n
  6124. * ~~~
  6125. * RADD8 Rd, Rs1, Rs2
  6126. * ~~~
  6127. *
  6128. * **Purpose**:\n
  6129. * Do 8-bit signed integer element additions simultaneously. The element results are halved
  6130. * to avoid overflow or saturation.
  6131. *
  6132. * **Description**:\n
  6133. * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
  6134. * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
  6135. * Rd.
  6136. *
  6137. * **Examples**:\n
  6138. * ~~~
  6139. * * Rs1 = 0x7F, Rs2 = 0x7F, Rd = 0x7F
  6140. * * Rs1 = 0x80, Rs2 = 0x80, Rd = 0x80
  6141. * * Rs1 = 0x40, Rs2 = 0x80, Rd = 0xE0
  6142. * ~~~
  6143. *
  6144. * **Operations**:\n
  6145. * ~~~
  6146. * Rd.B[x] = (Rs1.B[x] + Rs2.B[x]) s>> 1; for RV32: x=3...0, for RV64: x=7...0
  6147. * ~~~
  6148. *
  6149. * \param [in] a unsigned long type of value stored in a
  6150. * \param [in] b unsigned long type of value stored in b
  6151. * \return value stored in unsigned long type
  6152. */
  6153. __STATIC_FORCEINLINE unsigned long __RV_RADD8(unsigned long a, unsigned long b)
  6154. {
  6155. unsigned long result;
  6156. __ASM volatile("radd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6157. return result;
  6158. }
  6159. /* ===== Inline Function End for 3.80. RADD8 ===== */
  6160. /* ===== Inline Function Start for 3.81. RADD16 ===== */
  6161. /**
  6162. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  6163. * \brief RADD16 (SIMD 16-bit Signed Halving Addition)
  6164. * \details
  6165. * **Type**: SIMD
  6166. *
  6167. * **Syntax**:\n
  6168. * ~~~
  6169. * RADD16 Rd, Rs1, Rs2
  6170. * ~~~
  6171. *
  6172. * **Purpose**:\n
  6173. * Do 16-bit signed integer element additions simultaneously. The results are halved to avoid
  6174. * overflow or saturation.
  6175. *
  6176. * **Description**:\n
  6177. * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
  6178. * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
  6179. * Rd.
  6180. *
  6181. * **Examples**:\n
  6182. * ~~~
  6183. * * Rs1 = 0x7FFF, Rs2 = 0x7FFF, Rd = 0x7FFF
  6184. * * Rs1 = 0x8000, Rs2 = 0x8000, Rd = 0x8000
  6185. * * Rs1 = 0x4000, Rs2 = 0x8000, Rd = 0xE000
  6186. * ~~~
  6187. *
  6188. * **Operations**:\n
  6189. * ~~~
  6190. * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) s>> 1; for RV32: x=1...0, for RV64: x=3...0
  6191. * ~~~
  6192. *
  6193. * \param [in] a unsigned long type of value stored in a
  6194. * \param [in] b unsigned long type of value stored in b
  6195. * \return value stored in unsigned long type
  6196. */
  6197. __STATIC_FORCEINLINE unsigned long __RV_RADD16(unsigned long a, unsigned long b)
  6198. {
  6199. unsigned long result;
  6200. __ASM volatile("radd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6201. return result;
  6202. }
  6203. /* ===== Inline Function End for 3.81. RADD16 ===== */
  6204. /* ===== Inline Function Start for 3.82. RADD64 ===== */
  6205. /**
  6206. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  6207. * \brief RADD64 (64-bit Signed Halving Addition)
  6208. * \details
  6209. * **Type**: DSP (64-bit Profile)
  6210. *
  6211. * **Syntax**:\n
  6212. * ~~~
  6213. * RADD64 Rd, Rs1, Rs2
  6214. * ~~~
  6215. *
  6216. * **Purpose**:\n
  6217. * Add two 64-bit signed integers. The result is halved to avoid overflow or saturation.
  6218. *
  6219. * **RV32 Description**:\n
  6220. * This instruction adds the 64-bit signed integer of an even/odd pair of registers
  6221. * specified by Rs1(4,1) with the 64-bit signed integer of an even/odd pair of registers specified by
  6222. * Rs2(4,1). The 64-bit addition result is first arithmetically right-shifted by 1 bit and then written to an
  6223. * even/odd pair of registers specified by Rd(4,1).
  6224. * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
  6225. * pair includes register 2d and 2d+1.
  6226. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  6227. * of the pair contains the low 32-bit of the result.
  6228. *
  6229. * **RV64 Description**:\n
  6230. * This instruction adds the 64-bit signed integer in Rs1 with the 64-bit signed
  6231. * integer in Rs2. The 64-bit addition result is first arithmetically right-shifted by 1 bit and then
  6232. * written to Rd.
  6233. *
  6234. * **Operations**:\n
  6235. * ~~~
  6236. * RV32:
  6237. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  6238. * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
  6239. * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
  6240. * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) s>> 1;
  6241. * RV64:
  6242. * Rd = (Rs1 + Rs2) s>> 1;
  6243. * ~~~
  6244. *
  6245. * \param [in] a long long type of value stored in a
  6246. * \param [in] b long long type of value stored in b
  6247. * \return value stored in long long type
  6248. */
  6249. __STATIC_FORCEINLINE long long __RV_RADD64(long long a, long long b)
  6250. {
  6251. long long result;
  6252. __ASM volatile("radd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6253. return result;
  6254. }
  6255. /* ===== Inline Function End for 3.82. RADD64 ===== */
  6256. /* ===== Inline Function Start for 3.83. RADDW ===== */
  6257. /**
  6258. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  6259. * \brief RADDW (32-bit Signed Halving Addition)
  6260. * \details
  6261. * **Type**: DSP
  6262. *
  6263. * **Syntax**:\n
  6264. * ~~~
  6265. * RADDW Rd, Rs1, Rs2
  6266. * ~~~
  6267. *
  6268. * **Purpose**:\n
  6269. * Add 32-bit signed integers and the results are halved to avoid overflow or saturation.
  6270. *
  6271. * **Description**:\n
  6272. * This instruction adds the first 32-bit signed integer in Rs1 with the first 32-bit signed
  6273. * integer in Rs2. The result is first arithmetically right-shifted by 1 bit and then sign-extended and
  6274. * written to Rd.
  6275. *
  6276. * **Examples**:\n
  6277. * ~~~
  6278. * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF, Rd = 0x7FFFFFFF
  6279. * * Rs1 = 0x80000000, Rs2 = 0x80000000, Rd = 0x80000000
  6280. * * Rs1 = 0x40000000, Rs2 = 0x80000000, Rd = 0xE0000000
  6281. * ~~~
  6282. *
  6283. * **Operations**:\n
  6284. * ~~~
  6285. * RV32:
  6286. * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1;
  6287. * RV64:
  6288. * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) s>> 1;
  6289. * Rd[63:0] = SE(resw[31:0]);
  6290. * ~~~
  6291. *
  6292. * \param [in] a int type of value stored in a
  6293. * \param [in] b int type of value stored in b
  6294. * \return value stored in long type
  6295. */
  6296. __STATIC_FORCEINLINE long __RV_RADDW(int a, int b)
  6297. {
  6298. long result;
  6299. __ASM volatile("raddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6300. return result;
  6301. }
  6302. /* ===== Inline Function End for 3.83. RADDW ===== */
  6303. /* ===== Inline Function Start for 3.84. RCRAS16 ===== */
  6304. /**
  6305. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  6306. * \brief RCRAS16 (SIMD 16-bit Signed Halving Cross Addition & Subtraction)
  6307. * \details
  6308. * **Type**: SIMD
  6309. *
  6310. * **Syntax**:\n
  6311. * ~~~
  6312. * RCRAS16 Rd, Rs1, Rs2
  6313. * ~~~
  6314. *
  6315. * **Purpose**:\n
  6316. * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in
  6317. * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results
  6318. * are halved to avoid overflow or saturation.
  6319. *
  6320. * **Description**:\n
  6321. * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
  6322. * Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit
  6323. * signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer element in
  6324. * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and
  6325. * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  6326. *
  6327. * **Examples**:\n
  6328. * ~~~
  6329. * Please see `RADD16` and `RSUB16` instructions.
  6330. * ~~~
  6331. *
  6332. * **Operations**:\n
  6333. * ~~~
  6334. * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) s>> 1;
  6335. * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) s>> 1;
  6336. * for RV32, x=0
  6337. * for RV64, x=1...0
  6338. * ~~~
  6339. *
  6340. * \param [in] a unsigned long type of value stored in a
  6341. * \param [in] b unsigned long type of value stored in b
  6342. * \return value stored in unsigned long type
  6343. */
  6344. __STATIC_FORCEINLINE unsigned long __RV_RCRAS16(unsigned long a, unsigned long b)
  6345. {
  6346. unsigned long result;
  6347. __ASM volatile("rcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6348. return result;
  6349. }
  6350. /* ===== Inline Function End for 3.84. RCRAS16 ===== */
  6351. /* ===== Inline Function Start for 3.85. RCRSA16 ===== */
  6352. /**
  6353. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  6354. * \brief RCRSA16 (SIMD 16-bit Signed Halving Cross Subtraction & Addition)
  6355. * \details
  6356. * **Type**: SIMD
  6357. *
  6358. * **Syntax**:\n
  6359. * ~~~
  6360. * RCRSA16 Rd, Rs1, Rs2
  6361. * ~~~
  6362. *
  6363. * **Purpose**:\n
  6364. * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in
  6365. * a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results
  6366. * are halved to avoid overflow or saturation.
  6367. *
  6368. * **Description**:\n
  6369. * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks
  6370. * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit
  6371. * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in
  6372. * [31:16] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and
  6373. * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  6374. *
  6375. * **Examples**:\n
  6376. * ~~~
  6377. * Please see `RADD16` and `RSUB16` instructions.
  6378. * ~~~
  6379. *
  6380. * **Operations**:\n
  6381. * ~~~
  6382. * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) s>> 1;
  6383. * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) s>> 1;
  6384. * for RV32, x=0
  6385. * for RV64, x=1...0
  6386. * ~~~
  6387. *
  6388. * \param [in] a unsigned long type of value stored in a
  6389. * \param [in] b unsigned long type of value stored in b
  6390. * \return value stored in unsigned long type
  6391. */
  6392. __STATIC_FORCEINLINE unsigned long __RV_RCRSA16(unsigned long a, unsigned long b)
  6393. {
  6394. unsigned long result;
  6395. __ASM volatile("rcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6396. return result;
  6397. }
  6398. /* ===== Inline Function End for 3.85. RCRSA16 ===== */
  6399. /* ===== Inline Function Start for 3.86. RDOV ===== */
  6400. /**
  6401. * \ingroup NMSIS_Core_DSP_Intrinsic_OV_FLAG_SC
  6402. * \brief RDOV (Read OV flag)
  6403. * \details
  6404. * **Type**: DSP
  6405. *
  6406. * **Syntax**:\n
  6407. * ~~~
  6408. * RDOV Rd # pseudo mnemonic
  6409. * ~~~
  6410. *
  6411. * **Purpose**:\n
  6412. * This pseudo instruction is an alias to `CSRR Rd, ucode` instruction which maps to the real
  6413. * instruction of `CSRRS Rd, ucode, x0`.
  6414. *
  6415. *
  6416. * \return value stored in unsigned long type
  6417. */
  6418. __STATIC_FORCEINLINE unsigned long __RV_RDOV(void)
  6419. {
  6420. unsigned long result;
  6421. __ASM volatile("rdov %0" : "=r"(result));
  6422. return result;
  6423. }
  6424. /* ===== Inline Function End for 3.86. RDOV ===== */
  6425. /* ===== Inline Function Start for 3.87. RSTAS16 ===== */
  6426. /**
  6427. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  6428. * \brief RSTAS16 (SIMD 16-bit Signed Halving Straight Addition & Subtraction)
  6429. * \details
  6430. * **Type**: SIMD
  6431. *
  6432. * **Syntax**:\n
  6433. * ~~~
  6434. * RSTAS16 Rd, Rs1, Rs2
  6435. * ~~~
  6436. *
  6437. * **Purpose**:\n
  6438. * Do 16-bit signed integer element addition and 16-bit signed integer element subtraction in
  6439. * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The
  6440. * results are halved to avoid overflow or saturation.
  6441. *
  6442. * **Description**:\n
  6443. * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in
  6444. * Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit
  6445. * signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer element in
  6446. * [15:0] of 32-bit chunks in Rs1. The element results are first arithmetically right-shifted by 1 bit and
  6447. * then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  6448. *
  6449. * **Examples**:\n
  6450. * ~~~
  6451. * Please see `RADD16` and `RSUB16` instructions.
  6452. * ~~~
  6453. *
  6454. * **Operations**:\n
  6455. * ~~~
  6456. * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) s>> 1;
  6457. * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) s>> 1;
  6458. * for RV32, x=0
  6459. * for RV64, x=1...0
  6460. * ~~~
  6461. *
  6462. * \param [in] a unsigned long type of value stored in a
  6463. * \param [in] b unsigned long type of value stored in b
  6464. * \return value stored in unsigned long type
  6465. */
  6466. __STATIC_FORCEINLINE unsigned long __RV_RSTAS16(unsigned long a, unsigned long b)
  6467. {
  6468. unsigned long result;
  6469. __ASM volatile("rstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6470. return result;
  6471. }
  6472. /* ===== Inline Function End for 3.87. RSTAS16 ===== */
  6473. /* ===== Inline Function Start for 3.88. RSTSA16 ===== */
  6474. /**
  6475. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  6476. * \brief RSTSA16 (SIMD 16-bit Signed Halving Straight Subtraction & Addition)
  6477. * \details
  6478. * **Type**: SIMD
  6479. *
  6480. * **Syntax**:\n
  6481. * ~~~
  6482. * RSTSA16 Rd, Rs1, Rs2
  6483. * ~~~
  6484. *
  6485. * **Purpose**:\n
  6486. * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in
  6487. * a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks. The
  6488. * results are halved to avoid overflow or saturation.
  6489. *
  6490. * **Description**:\n
  6491. * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks
  6492. * in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit
  6493. * signed element integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit signed integer element in
  6494. * [15:0] of 32-bit chunks in Rs2. The two results are first arithmetically right-shifted by 1 bit and then
  6495. * written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  6496. *
  6497. * **Examples**:\n
  6498. * ~~~
  6499. * Please see `RADD16` and `RSUB16` instructions.
  6500. * ~~~
  6501. *
  6502. * **Operations**:\n
  6503. * ~~~
  6504. * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) s>> 1;
  6505. * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) s>> 1;
  6506. * for RV32, x=0
  6507. * for RV64, x=1...0
  6508. * ~~~
  6509. *
  6510. * \param [in] a unsigned long type of value stored in a
  6511. * \param [in] b unsigned long type of value stored in b
  6512. * \return value stored in unsigned long type
  6513. */
  6514. __STATIC_FORCEINLINE unsigned long __RV_RSTSA16(unsigned long a, unsigned long b)
  6515. {
  6516. unsigned long result;
  6517. __ASM volatile("rstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6518. return result;
  6519. }
  6520. /* ===== Inline Function End for 3.88. RSTSA16 ===== */
  6521. /* ===== Inline Function Start for 3.89. RSUB8 ===== */
  6522. /**
  6523. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  6524. * \brief RSUB8 (SIMD 8-bit Signed Halving Subtraction)
  6525. * \details
  6526. * **Type**: SIMD
  6527. *
  6528. * **Syntax**:\n
  6529. * ~~~
  6530. * RSUB8 Rd, Rs1, Rs2
  6531. * ~~~
  6532. *
  6533. * **Purpose**:\n
  6534. * Do 8-bit signed integer element subtractions simultaneously. The results are halved to
  6535. * avoid overflow or saturation.
  6536. *
  6537. * **Description**:\n
  6538. * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
  6539. * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
  6540. * written to Rd.
  6541. *
  6542. * **Examples**:\n
  6543. * ~~~
  6544. * * Rs1 = 0x7F, Rs2 = 0x80, Rd = 0x7F
  6545. * * Rs1 = 0x80, Rs2 = 0x7F, Rd = 0x80
  6546. * * Rs1= 0x80, Rs2 = 0x40, Rd = 0xA0
  6547. * ~~~
  6548. *
  6549. * **Operations**:\n
  6550. * ~~~
  6551. * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) s>> 1;
  6552. * for RV32: x=3...0,
  6553. * for RV64: x=7...0
  6554. * ~~~
  6555. *
  6556. * \param [in] a unsigned long type of value stored in a
  6557. * \param [in] b unsigned long type of value stored in b
  6558. * \return value stored in unsigned long type
  6559. */
  6560. __STATIC_FORCEINLINE unsigned long __RV_RSUB8(unsigned long a, unsigned long b)
  6561. {
  6562. unsigned long result;
  6563. __ASM volatile("rsub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6564. return result;
  6565. }
  6566. /* ===== Inline Function End for 3.89. RSUB8 ===== */
  6567. /* ===== Inline Function Start for 3.90. RSUB16 ===== */
  6568. /**
  6569. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  6570. * \brief RSUB16 (SIMD 16-bit Signed Halving Subtraction)
  6571. * \details
  6572. * **Type**: SIMD
  6573. *
  6574. * **Syntax**:\n
  6575. * ~~~
  6576. * RSUB16 Rd, Rs1, Rs2
  6577. * ~~~
  6578. *
  6579. * **Purpose**:\n
  6580. * Do 16-bit signed integer element subtractions simultaneously. The results are halved to
  6581. * avoid overflow or saturation.
  6582. *
  6583. * **Description**:\n
  6584. * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
  6585. * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
  6586. * written to Rd.
  6587. *
  6588. * **Examples**:\n
  6589. * ~~~
  6590. * * Ra = 0x7FFF, Rb = 0x8000, Rt = 0x7FFF
  6591. * * Ra = 0x8000, Rb = 0x7FFF, Rt = 0x8000
  6592. * * Ra = 0x8000, Rb = 0x4000, Rt = 0xA000
  6593. * ~~~
  6594. *
  6595. * **Operations**:\n
  6596. * ~~~
  6597. * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) s>> 1;
  6598. * for RV32: x=1...0,
  6599. * for RV64: x=3...0
  6600. * ~~~
  6601. *
  6602. * \param [in] a unsigned long type of value stored in a
  6603. * \param [in] b unsigned long type of value stored in b
  6604. * \return value stored in unsigned long type
  6605. */
  6606. __STATIC_FORCEINLINE unsigned long __RV_RSUB16(unsigned long a, unsigned long b)
  6607. {
  6608. unsigned long result;
  6609. __ASM volatile("rsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6610. return result;
  6611. }
  6612. /* ===== Inline Function End for 3.90. RSUB16 ===== */
  6613. /* ===== Inline Function Start for 3.91. RSUB64 ===== */
  6614. /**
  6615. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  6616. * \brief RSUB64 (64-bit Signed Halving Subtraction)
  6617. * \details
  6618. * **Type**: DSP (64-bit Profile)
  6619. *
  6620. * **Syntax**:\n
  6621. * ~~~
  6622. * RSUB64 Rd, Rs1, Rs2
  6623. * ~~~
  6624. *
  6625. * **Purpose**:\n
  6626. * Perform a 64-bit signed integer subtraction. The result is halved to avoid overflow or
  6627. * saturation.
  6628. *
  6629. * **RV32 Description**:\n
  6630. * This instruction subtracts the 64-bit signed integer of an even/odd pair of
  6631. * registers specified by Rb(4,1) from the 64-bit signed integer of an even/odd pair of registers
  6632. * specified by Ra(4,1). The subtraction result is first arithmetically right-shifted by 1 bit and then
  6633. * written to an even/odd pair of registers specified by Rt(4,1).
  6634. * Rx(4,1), i.e., value d, determines the even/odd pair group of two registers. Specifically, the register
  6635. * pair includes register 2d and 2d+1.
  6636. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  6637. * of the pair contains the low 32-bit of the result.
  6638. *
  6639. * **RV64 Description**:\n
  6640. * This instruction subtracts the 64-bit signed integer in Rs2 from the 64-bit signed
  6641. * integer in Rs1. The 64-bit subtraction result is first arithmetically right-shifted by 1 bit and then
  6642. * written to Rd.
  6643. *
  6644. * **Operations**:\n
  6645. * ~~~
  6646. * RV32:
  6647. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  6648. * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
  6649. * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
  6650. * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) s>> 1;
  6651. * RV64:
  6652. * Rd = (Rs1 - Rs2) s>> 1;
  6653. * ~~~
  6654. *
  6655. * \param [in] a long long type of value stored in a
  6656. * \param [in] b long long type of value stored in b
  6657. * \return value stored in long long type
  6658. */
  6659. __STATIC_FORCEINLINE long long __RV_RSUB64(long long a, long long b)
  6660. {
  6661. long long result;
  6662. __ASM volatile("rsub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6663. return result;
  6664. }
  6665. /* ===== Inline Function End for 3.91. RSUB64 ===== */
  6666. /* ===== Inline Function Start for 3.92. RSUBW ===== */
  6667. /**
  6668. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  6669. * \brief RSUBW (32-bit Signed Halving Subtraction)
  6670. * \details
  6671. * **Type**: DSP
  6672. *
  6673. * **Syntax**:\n
  6674. * ~~~
  6675. * RSUBW Rd, Rs1, Rs2
  6676. * ~~~
  6677. *
  6678. * **Purpose**:\n
  6679. * Subtract 32-bit signed integers and the result is halved to avoid overflow or saturation.
  6680. *
  6681. * **Description**:\n
  6682. * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit
  6683. * signed integer in Rs1. The result is first arithmetically right-shifted by 1 bit and then sign-extended
  6684. * and written to Rd.
  6685. *
  6686. * **Examples**:\n
  6687. * ~~~
  6688. * * Rs1 = 0x7FFFFFFF, Rs2 = 0x80000000, Rd = 0x7FFFFFFF
  6689. * * Rs1 = 0x80000000, Rs2 = 0x7FFFFFFF, Rd = 0x80000000
  6690. * * Rs1 = 0x80000000, Rs2 = 0x40000000, Rd = 0xA0000000
  6691. * ~~~
  6692. *
  6693. * **Operations**:\n
  6694. * ~~~
  6695. * RV32:
  6696. * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1;
  6697. * RV64:
  6698. * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) s>> 1;
  6699. * Rd[63:0] = SE(resw[31:0]);
  6700. * ~~~
  6701. *
  6702. * \param [in] a int type of value stored in a
  6703. * \param [in] b int type of value stored in b
  6704. * \return value stored in long type
  6705. */
  6706. __STATIC_FORCEINLINE long __RV_RSUBW(int a, int b)
  6707. {
  6708. long result;
  6709. __ASM volatile("rsubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6710. return result;
  6711. }
  6712. /* ===== Inline Function End for 3.92. RSUBW ===== */
  6713. /* ===== Inline Function Start for 3.93. SCLIP8 ===== */
  6714. /**
  6715. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  6716. * \brief SCLIP8 (SIMD 8-bit Signed Clip Value)
  6717. * \details
  6718. * **Type**: SIMD
  6719. *
  6720. * **Syntax**:\n
  6721. * ~~~
  6722. * SCLIP8 Rd, Rs1, imm3u[2:0]
  6723. * ~~~
  6724. *
  6725. * **Purpose**:\n
  6726. * Limit the 8-bit signed integer elements of a register into a signed range simultaneously.
  6727. *
  6728. * **Description**:\n
  6729. * This instruction limits the 8-bit signed integer elements stored in Rs1 into a signed
  6730. * integer range between 2^imm3u-1 and -2^imm3u, and writes the limited results to Rd. For example, if
  6731. * imm3u is 3, the 8-bit input values should be saturated between 7 and -8. If saturation is performed,
  6732. * set OV bit to 1.
  6733. *
  6734. * **Operations**:\n
  6735. * ~~~
  6736. * src = Rs1.B[x];
  6737. * if (src > (2^imm3u)-1) {
  6738. * src = (2^imm3u)-1;
  6739. * OV = 1;
  6740. * } else if (src < -2^imm3u) {
  6741. * src = -2^imm3u;
  6742. * OV = 1;
  6743. * }
  6744. * Rd.B[x] = src
  6745. * for RV32: x=3...0,
  6746. * for RV64: x=7...0
  6747. * ~~~
  6748. *
  6749. * \param [in] a unsigned long type of value stored in a
  6750. * \param [in] b unsigned int type of value stored in b
  6751. * \return value stored in unsigned long type
  6752. */
  6753. #define __RV_SCLIP8(a, b) \
  6754. ({ \
  6755. unsigned long result; \
  6756. unsigned long __a = (unsigned long)(a); \
  6757. __ASM volatile("sclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  6758. result; \
  6759. })
  6760. /* ===== Inline Function End for 3.93. SCLIP8 ===== */
  6761. /* ===== Inline Function Start for 3.94. SCLIP16 ===== */
  6762. /**
  6763. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  6764. * \brief SCLIP16 (SIMD 16-bit Signed Clip Value)
  6765. * \details
  6766. * **Type**: SIMD
  6767. *
  6768. * **Syntax**:\n
  6769. * ~~~
  6770. * SCLIP16 Rd, Rs1, imm4u[3:0]
  6771. * ~~~
  6772. *
  6773. * **Purpose**:\n
  6774. * Limit the 16-bit signed integer elements of a register into a signed range simultaneously.
  6775. *
  6776. * **Description**:\n
  6777. * This instruction limits the 16-bit signed integer elements stored in Rs1 into a signed
  6778. * integer range between 2imm4u-1 and -2imm4u, and writes the limited results to Rd. For example, if
  6779. * imm4u is 3, the 16-bit input values should be saturated between 7 and -8. If saturation is performed,
  6780. * set OV bit to 1.
  6781. *
  6782. * **Operations**:\n
  6783. * ~~~
  6784. * src = Rs1.H[x];
  6785. * if (src > (2^imm4u)-1) {
  6786. * src = (2^imm4u)-1;
  6787. * OV = 1;
  6788. * } else if (src < -2^imm4u) {
  6789. * src = -2^imm4u;
  6790. * OV = 1;
  6791. * }
  6792. * Rd.H[x] = src
  6793. * for RV32: x=1...0,
  6794. * for RV64: x=3...0
  6795. * ~~~
  6796. *
  6797. * \param [in] a unsigned long type of value stored in a
  6798. * \param [in] b unsigned int type of value stored in b
  6799. * \return value stored in unsigned long type
  6800. */
  6801. #define __RV_SCLIP16(a, b) \
  6802. ({ \
  6803. unsigned long result; \
  6804. unsigned long __a = (unsigned long)(a); \
  6805. __ASM volatile("sclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  6806. result; \
  6807. })
  6808. /* ===== Inline Function End for 3.94. SCLIP16 ===== */
  6809. /* ===== Inline Function Start for 3.95. SCLIP32 ===== */
  6810. /**
  6811. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
  6812. * \brief SCLIP32 (SIMD 32-bit Signed Clip Value)
  6813. * \details
  6814. * **Type**: DSP
  6815. *
  6816. * **Syntax**:\n
  6817. * ~~~
  6818. * SCLIP32 Rd, Rs1, imm5u[4:0]
  6819. * ~~~
  6820. *
  6821. * **Purpose**:\n
  6822. * Limit the 32-bit signed integer elements of a register into a signed range simultaneously.
  6823. *
  6824. * **Description**:\n
  6825. * This instruction limits the 32-bit signed integer elements stored in Rs1 into a signed
  6826. * integer range between 2imm5u-1 and -2imm5u, and writes the limited results to Rd. For example, if
  6827. * imm5u is 3, the 32-bit input values should be saturated between 7 and -8. If saturation is performed,
  6828. * set OV bit to 1.
  6829. *
  6830. * **Operations**:\n
  6831. * ~~~
  6832. * src = Rs1.W[x];
  6833. * if (src > (2^imm5u)-1) {
  6834. * src = (2^imm5u)-1;
  6835. * OV = 1;
  6836. * } else if (src < -2^imm5u) {
  6837. * src = -2^imm5u;
  6838. * OV = 1;
  6839. * }
  6840. * Rd.W[x] = src
  6841. * for RV32: x=0,
  6842. * for RV64: x=1...0
  6843. * ~~~
  6844. *
  6845. * \param [in] a long type of value stored in a
  6846. * \param [in] b unsigned int type of value stored in b
  6847. * \return value stored in long type
  6848. */
  6849. #define __RV_SCLIP32(a, b) \
  6850. ({ \
  6851. long result; \
  6852. long __a = (long)(a); \
  6853. __ASM volatile("sclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  6854. result; \
  6855. })
  6856. /* ===== Inline Function End for 3.95. SCLIP32 ===== */
  6857. /* ===== Inline Function Start for 3.96. SCMPLE8 ===== */
  6858. /**
  6859. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
  6860. * \brief SCMPLE8 (SIMD 8-bit Signed Compare Less Than & Equal)
  6861. * \details
  6862. * **Type**: SIMD
  6863. *
  6864. * **Syntax**:\n
  6865. * ~~~
  6866. * SCMPLE8 Rd, Rs1, Rs2
  6867. * ~~~
  6868. *
  6869. * **Purpose**:\n
  6870. * Do 8-bit signed integer elements less than & equal comparisons simultaneously.
  6871. *
  6872. * **Description**:\n
  6873. * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
  6874. * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is
  6875. * true, the result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to
  6876. * Rd
  6877. *
  6878. * **Operations**:\n
  6879. * ~~~
  6880. * Rd.B[x] = (Rs1.B[x] {le} Rs2.B[x])? 0xff : 0x0;
  6881. * for RV32: x=3...0,
  6882. * for RV64: x=7...0
  6883. * ~~~
  6884. *
  6885. * \param [in] a unsigned long type of value stored in a
  6886. * \param [in] b unsigned long type of value stored in b
  6887. * \return value stored in unsigned long type
  6888. */
  6889. __STATIC_FORCEINLINE unsigned long __RV_SCMPLE8(unsigned long a, unsigned long b)
  6890. {
  6891. unsigned long result;
  6892. __ASM volatile("scmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6893. return result;
  6894. }
  6895. /* ===== Inline Function End for 3.96. SCMPLE8 ===== */
  6896. /* ===== Inline Function Start for 3.97. SCMPLE16 ===== */
  6897. /**
  6898. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
  6899. * \brief SCMPLE16 (SIMD 16-bit Signed Compare Less Than & Equal)
  6900. * \details
  6901. * **Type**: SIMD
  6902. *
  6903. * **Syntax**:\n
  6904. * ~~~
  6905. * SCMPLE16 Rd, Rs1, Rs2
  6906. * ~~~
  6907. *
  6908. * **Purpose**:\n
  6909. * Do 16-bit signed integer elements less than & equal comparisons simultaneously.
  6910. *
  6911. * **Description**:\n
  6912. * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
  6913. * signed integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it is
  6914. * true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written
  6915. * to Rd.
  6916. *
  6917. * **Operations**:\n
  6918. * ~~~
  6919. * Rd.H[x] = (Rs1.H[x] {le} Rs2.H[x])? 0xffff : 0x0;
  6920. * for RV32: x=1...0,
  6921. * for RV64: x=3...0
  6922. * ~~~
  6923. *
  6924. * \param [in] a unsigned long type of value stored in a
  6925. * \param [in] b unsigned long type of value stored in b
  6926. * \return value stored in unsigned long type
  6927. */
  6928. __STATIC_FORCEINLINE unsigned long __RV_SCMPLE16(unsigned long a, unsigned long b)
  6929. {
  6930. unsigned long result;
  6931. __ASM volatile("scmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6932. return result;
  6933. }
  6934. /* ===== Inline Function End for 3.97. SCMPLE16 ===== */
  6935. /* ===== Inline Function Start for 3.98. SCMPLT8 ===== */
  6936. /**
  6937. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
  6938. * \brief SCMPLT8 (SIMD 8-bit Signed Compare Less Than)
  6939. * \details
  6940. * **Type**: SIMD
  6941. *
  6942. * **Syntax**:\n
  6943. * ~~~
  6944. * SCMPLT8 Rd, Rs1, Rs2
  6945. * ~~~
  6946. *
  6947. * **Purpose**:\n
  6948. * Do 8-bit signed integer elements less than comparisons simultaneously.
  6949. *
  6950. * **Description**:\n
  6951. * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
  6952. * signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
  6953. * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
  6954. *
  6955. * **Operations**:\n
  6956. * ~~~
  6957. * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? 0xff : 0x0;
  6958. * for RV32: x=3...0,
  6959. * for RV64: x=7...0
  6960. * ~~~
  6961. *
  6962. * \param [in] a unsigned long type of value stored in a
  6963. * \param [in] b unsigned long type of value stored in b
  6964. * \return value stored in unsigned long type
  6965. */
  6966. __STATIC_FORCEINLINE unsigned long __RV_SCMPLT8(unsigned long a, unsigned long b)
  6967. {
  6968. unsigned long result;
  6969. __ASM volatile("scmplt8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  6970. return result;
  6971. }
  6972. /* ===== Inline Function End for 3.98. SCMPLT8 ===== */
  6973. /* ===== Inline Function Start for 3.99. SCMPLT16 ===== */
  6974. /**
  6975. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
  6976. * \brief SCMPLT16 (SIMD 16-bit Signed Compare Less Than)
  6977. * \details
  6978. * **Type**: SIMD
  6979. *
  6980. * **Syntax**:\n
  6981. * ~~~
  6982. * SCMPLT16 Rd, Rs1, Rs2
  6983. * ~~~
  6984. *
  6985. * **Purpose**:\n
  6986. * Do 16-bit signed integer elements less than comparisons simultaneously.
  6987. *
  6988. * **Description**:\n
  6989. * This instruction compares the 16-bit signed integer elements in Rs1 with the two 16-
  6990. * bit signed integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
  6991. * result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
  6992. *
  6993. * **Operations**:\n
  6994. * ~~~
  6995. * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? 0xffff : 0x0;
  6996. * for RV32: x=1...0,
  6997. * for RV64: x=3...0
  6998. * ~~~
  6999. *
  7000. * \param [in] a unsigned long type of value stored in a
  7001. * \param [in] b unsigned long type of value stored in b
  7002. * \return value stored in unsigned long type
  7003. */
  7004. __STATIC_FORCEINLINE unsigned long __RV_SCMPLT16(unsigned long a, unsigned long b)
  7005. {
  7006. unsigned long result;
  7007. __ASM volatile("scmplt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  7008. return result;
  7009. }
  7010. /* ===== Inline Function End for 3.99. SCMPLT16 ===== */
  7011. /* ===== Inline Function Start for 3.100. SLL8 ===== */
  7012. /**
  7013. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  7014. * \brief SLL8 (SIMD 8-bit Shift Left Logical)
  7015. * \details
  7016. * **Type**: SIMD
  7017. *
  7018. * **Syntax**:\n
  7019. * ~~~
  7020. * SLL8 Rd, Rs1, Rs2
  7021. * ~~~
  7022. *
  7023. * **Purpose**:\n
  7024. * Do 8-bit elements logical left shift operations simultaneously. The shift amount is a
  7025. * variable from a GPR.
  7026. *
  7027. * **Description**:\n
  7028. * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
  7029. * The shifted out bits are filled with zero and the shift amount is specified by the low-order 3-bits of
  7030. * the value in the Rs2 register.
  7031. *
  7032. * **Operations**:\n
  7033. * ~~~
  7034. * sa = Rs2[2:0];
  7035. * Rd.B[x] = Rs1.B[x] << sa;
  7036. * for RV32: x=3...0,
  7037. * for RV64: x=7...0
  7038. * ~~~
  7039. *
  7040. * \param [in] a unsigned long type of value stored in a
  7041. * \param [in] b unsigned int type of value stored in b
  7042. * \return value stored in unsigned long type
  7043. */
  7044. __STATIC_FORCEINLINE unsigned long __RV_SLL8(unsigned long a, unsigned int b)
  7045. {
  7046. unsigned long result;
  7047. __ASM volatile("sll8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  7048. return result;
  7049. }
  7050. /* ===== Inline Function End for 3.100. SLL8 ===== */
  7051. /* ===== Inline Function Start for 3.101. SLLI8 ===== */
  7052. /**
  7053. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  7054. * \brief SLLI8 (SIMD 8-bit Shift Left Logical Immediate)
  7055. * \details
  7056. * **Type**: SIMD
  7057. *
  7058. * **Syntax**:\n
  7059. * ~~~
  7060. * SLLI8 Rd, Rs1, imm3u
  7061. * ~~~
  7062. *
  7063. * **Purpose**:\n
  7064. * Do 8-bit elements logical left shift operations simultaneously. The shift amount is an
  7065. * immediate value.
  7066. *
  7067. * **Description**:\n
  7068. * The 8-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
  7069. * The shifted out bits are filled with zero and the shift amount is specified by the imm3u constant.
  7070. *
  7071. * **Operations**:\n
  7072. * ~~~
  7073. * sa = imm3u[2:0];
  7074. * Rd.B[x] = Rs1.B[x] << sa;
  7075. * for RV32: x=3...0,
  7076. * for RV64: x=7...0
  7077. * ~~~
  7078. *
  7079. * \param [in] a unsigned long type of value stored in a
  7080. * \param [in] b unsigned int type of value stored in b
  7081. * \return value stored in unsigned long type
  7082. */
  7083. #define __RV_SLLI8(a, b) \
  7084. ({ \
  7085. unsigned long result; \
  7086. unsigned long __a = (unsigned long)(a); \
  7087. __ASM volatile("slli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  7088. result; \
  7089. })
  7090. /* ===== Inline Function End for 3.101. SLLI8 ===== */
  7091. /* ===== Inline Function Start for 3.102. SLL16 ===== */
  7092. /**
  7093. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  7094. * \brief SLL16 (SIMD 16-bit Shift Left Logical)
  7095. * \details
  7096. * **Type**: SIMD
  7097. *
  7098. * **Syntax**:\n
  7099. * ~~~
  7100. * SLL16 Rd, Rs1, Rs2
  7101. * ~~~
  7102. *
  7103. * **Purpose**:\n
  7104. * Do 16-bit elements logical left shift operations simultaneously. The shift amount is a
  7105. * variable from a GPR.
  7106. *
  7107. * **Description**:\n
  7108. * The 16-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
  7109. * The shifted out bits are filled with zero and the shift amount is specified by the low-order 4-bits of
  7110. * the value in the Rs2 register.
  7111. *
  7112. * **Operations**:\n
  7113. * ~~~
  7114. * sa = Rs2[3:0];
  7115. * Rd.H[x] = Rs1.H[x] << sa;
  7116. * for RV32: x=1...0,
  7117. * for RV64: x=3...0
  7118. * ~~~
  7119. *
  7120. * \param [in] a unsigned long type of value stored in a
  7121. * \param [in] b unsigned int type of value stored in b
  7122. * \return value stored in unsigned long type
  7123. */
  7124. __STATIC_FORCEINLINE unsigned long __RV_SLL16(unsigned long a, unsigned int b)
  7125. {
  7126. unsigned long result;
  7127. __ASM volatile("sll16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  7128. return result;
  7129. }
  7130. /* ===== Inline Function End for 3.102. SLL16 ===== */
  7131. /* ===== Inline Function Start for 3.103. SLLI16 ===== */
  7132. /**
  7133. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  7134. * \brief SLLI16 (SIMD 16-bit Shift Left Logical Immediate)
  7135. * \details
  7136. * **Type**: SIMD
  7137. *
  7138. * **Syntax**:\n
  7139. * ~~~
  7140. * SLLI16 Rd, Rs1, imm4[3:0]
  7141. * ~~~
  7142. *
  7143. * **Purpose**:\n
  7144. * Do 16-bit element logical left shift operations simultaneously. The shift amount is an
  7145. * immediate value.
  7146. *
  7147. * **Description**:\n
  7148. * The 16-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with
  7149. * zero and the shift amount is specified by the imm4[3:0] constant. And the results are written to Rd.
  7150. *
  7151. * **Operations**:\n
  7152. * ~~~
  7153. * sa = imm4[3:0];
  7154. * Rd.H[x] = Rs1.H[x] << sa;
  7155. * for RV32: x=1...0,
  7156. * for RV64: x=3...0
  7157. * ~~~
  7158. *
  7159. * \param [in] a unsigned long type of value stored in a
  7160. * \param [in] b unsigned int type of value stored in b
  7161. * \return value stored in unsigned long type
  7162. */
  7163. #define __RV_SLLI16(a, b) \
  7164. ({ \
  7165. unsigned long result; \
  7166. unsigned long __a = (unsigned long)(a); \
  7167. __ASM volatile("slli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  7168. result; \
  7169. })
  7170. /* ===== Inline Function End for 3.103. SLLI16 ===== */
  7171. /* ===== Inline Function Start for 3.104. SMAL ===== */
  7172. /**
  7173. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7174. * \brief SMAL (Signed Multiply Halfs & Add 64-bit)
  7175. * \details
  7176. * **Type**: Partial-SIMD
  7177. *
  7178. * **Syntax**:\n
  7179. * ~~~
  7180. * SMAL Rd, Rs1, Rs2
  7181. * ~~~
  7182. *
  7183. * **Purpose**:\n
  7184. * Multiply the signed bottom 16-bit content of the 32-bit elements of a register with the top
  7185. * 16-bit content of the same 32-bit elements of the same register, and add the results with a 64-bit
  7186. * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
  7187. * to another even/odd pair of registers (RV32) or a register (RV64).
  7188. *
  7189. * **RV32 Description**:\n
  7190. * This instruction multiplies the bottom 16-bit content of the lower 32-bit of Rs2 with the top 16-bit
  7191. * content of the lower 32-bit of Rs2 and adds the result with the 64-bit value of an even/odd pair of
  7192. * registers specified by Rs1(4,1). The 64-bit addition result is written back to an even/odd pair of
  7193. * registers specified by Rd(4,1). The 16-bit values of Rs2, and the 64-bit value of the Rs1(4,1) register-
  7194. * pair are treated as signed integers.
  7195. * Rx(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7196. * includes register 2d and 2d+1.
  7197. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7198. * register of the pair contains the low 32-bit of the operand.
  7199. *
  7200. * **RV64 Description**:\n
  7201. * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs2 with the top 16-bit
  7202. * content of the same 32-bit elements of Rs2 and adds the results with the 64-bit value of Rs1. The 64-
  7203. * bit addition result is written back to Rd. The 16-bit values of Rs2, and the 64-bit value of Rs1 are
  7204. * treated as signed integers.
  7205. *
  7206. * **Operations**:\n
  7207. * ~~~
  7208. * RV32:
  7209. * Mres[31:0] = Rs2.H[1] * Rs2.H[0];
  7210. * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1); +
  7211. * Idx2 = CONCAT(Rd(4,1),1'b0); Idx3 = CONCAT(Rd(4,1),1'b1);
  7212. * R[Idx3].R[Idx2] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
  7213. * RV64:
  7214. * Mres[0][31:0] = Rs2.W[0].H[1] * Rs2.W[0].H[0];
  7215. * Mres[1][31:0] = Rs2.W[1].H[1] * Rs2.W[1].H[0];
  7216. * Rd = Rs1 + SE64(Mres[1][31:0]) + SE64(Mres[0][31:0]);
  7217. * ~~~
  7218. *
  7219. * \param [in] a long long type of value stored in a
  7220. * \param [in] b unsigned long type of value stored in b
  7221. * \return value stored in long long type
  7222. */
  7223. __STATIC_FORCEINLINE long long __RV_SMAL(long long a, unsigned long b)
  7224. {
  7225. long long result;
  7226. __ASM volatile("smal %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  7227. return result;
  7228. }
  7229. /* ===== Inline Function End for 3.104. SMAL ===== */
  7230. /* ===== Inline Function Start for 3.105.1. SMALBB ===== */
  7231. /**
  7232. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7233. * \brief SMALBB (Signed Multiply Bottom Halfs & Add 64-bit)
  7234. * \details
  7235. * **Type**: DSP (64-bit Profile)
  7236. *
  7237. * **Syntax**:\n
  7238. * ~~~
  7239. * SMALBB Rd, Rs1, Rs2
  7240. * SMALBT Rd, Rs1, Rs2
  7241. * SMALTT Rd, Rs1, Rs2
  7242. * ~~~
  7243. *
  7244. * **Purpose**:\n
  7245. * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
  7246. * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
  7247. * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
  7248. * to the register-pair (RV32) or the register (RV64).
  7249. * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
  7250. * * SMALBT rt pair + bottom*top (all 32-bit elements)
  7251. * * SMALTT rt pair + top*top (all 32-bit elements)
  7252. *
  7253. * **RV32 Description**:\n
  7254. * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7255. * content of Rs2.
  7256. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
  7257. * content of Rs2.
  7258. * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
  7259. * of Rs2.
  7260. * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
  7261. * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
  7262. * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  7263. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7264. * includes register 2d and 2d+1.
  7265. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7266. * register of the pair contains the low 32-bit of the operand.
  7267. *
  7268. * **RV64 Description**:\n
  7269. * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7270. * with the bottom 16-bit content of the 32-bit elements of Rs2.
  7271. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7272. * with the top 16-bit content of the 32-bit elements of Rs2.
  7273. * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  7274. * the top 16-bit content of the 32-bit elements of Rs2.
  7275. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
  7276. * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
  7277. * integers.
  7278. *
  7279. * **Operations**:\n
  7280. * ~~~
  7281. * RV32:
  7282. * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
  7283. * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
  7284. * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
  7285. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7286. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
  7287. * RV64:
  7288. * // SMALBB
  7289. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
  7290. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
  7291. * // SMALBT
  7292. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
  7293. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
  7294. * // SMALTT
  7295. * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
  7296. * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
  7297. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  7298. * ~~~
  7299. *
  7300. * \param [in] t long long type of value stored in t
  7301. * \param [in] a unsigned long type of value stored in a
  7302. * \param [in] b unsigned long type of value stored in b
  7303. * \return value stored in long long type
  7304. */
  7305. __STATIC_FORCEINLINE long long __RV_SMALBB(long long t, unsigned long a, unsigned long b)
  7306. {
  7307. __ASM volatile("smalbb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7308. return t;
  7309. }
  7310. /* ===== Inline Function End for 3.105.1. SMALBB ===== */
  7311. /* ===== Inline Function Start for 3.105.2. SMALBT ===== */
  7312. /**
  7313. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7314. * \brief SMALBT (Signed Multiply Bottom Half & Top Half & Add 64-bit)
  7315. * \details
  7316. * **Type**: DSP (64-bit Profile)
  7317. *
  7318. * **Syntax**:\n
  7319. * ~~~
  7320. * SMALBB Rd, Rs1, Rs2
  7321. * SMALBT Rd, Rs1, Rs2
  7322. * SMALTT Rd, Rs1, Rs2
  7323. * ~~~
  7324. *
  7325. * **Purpose**:\n
  7326. * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
  7327. * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
  7328. * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
  7329. * to the register-pair (RV32) or the register (RV64).
  7330. * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
  7331. * * SMALBT rt pair + bottom*top (all 32-bit elements)
  7332. * * SMALTT rt pair + top*top (all 32-bit elements)
  7333. *
  7334. * **RV32 Description**:\n
  7335. * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7336. * content of Rs2.
  7337. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
  7338. * content of Rs2.
  7339. * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
  7340. * of Rs2.
  7341. * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
  7342. * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
  7343. * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  7344. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7345. * includes register 2d and 2d+1.
  7346. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7347. * register of the pair contains the low 32-bit of the operand.
  7348. *
  7349. * **RV64 Description**:\n
  7350. * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7351. * with the bottom 16-bit content of the 32-bit elements of Rs2.
  7352. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7353. * with the top 16-bit content of the 32-bit elements of Rs2.
  7354. * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  7355. * the top 16-bit content of the 32-bit elements of Rs2.
  7356. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
  7357. * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
  7358. * integers.
  7359. *
  7360. * **Operations**:\n
  7361. * ~~~
  7362. * RV32:
  7363. * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
  7364. * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
  7365. * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
  7366. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7367. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
  7368. * RV64:
  7369. * // SMALBB
  7370. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
  7371. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
  7372. * // SMALBT
  7373. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
  7374. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
  7375. * // SMALTT
  7376. * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
  7377. * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
  7378. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  7379. * ~~~
  7380. *
  7381. * \param [in] t long long type of value stored in t
  7382. * \param [in] a unsigned long type of value stored in a
  7383. * \param [in] b unsigned long type of value stored in b
  7384. * \return value stored in long long type
  7385. */
  7386. __STATIC_FORCEINLINE long long __RV_SMALBT(long long t, unsigned long a, unsigned long b)
  7387. {
  7388. __ASM volatile("smalbt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7389. return t;
  7390. }
  7391. /* ===== Inline Function End for 3.105.2. SMALBT ===== */
  7392. /* ===== Inline Function Start for 3.105.3. SMALTT ===== */
  7393. /**
  7394. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7395. * \brief SMALTT (Signed Multiply Top Halfs & Add 64-bit)
  7396. * \details
  7397. * **Type**: DSP (64-bit Profile)
  7398. *
  7399. * **Syntax**:\n
  7400. * ~~~
  7401. * SMALBB Rd, Rs1, Rs2
  7402. * SMALBT Rd, Rs1, Rs2
  7403. * SMALTT Rd, Rs1, Rs2
  7404. * ~~~
  7405. *
  7406. * **Purpose**:\n
  7407. * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit
  7408. * content of the corresponding 32-bit elements of another register and add the results with a 64-bit
  7409. * value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is written back
  7410. * to the register-pair (RV32) or the register (RV64).
  7411. * * SMALBB: rt pair + bottom*bottom (all 32-bit elements)
  7412. * * SMALBT rt pair + bottom*top (all 32-bit elements)
  7413. * * SMALTT rt pair + top*top (all 32-bit elements)
  7414. *
  7415. * **RV32 Description**:\n
  7416. * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7417. * content of Rs2.
  7418. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
  7419. * content of Rs2.
  7420. * For the `SMALTT` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
  7421. * of Rs2.
  7422. * The multiplication result is added with the 64-bit value of an even/odd pair of registers specified by
  7423. * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
  7424. * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  7425. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7426. * includes register 2d and 2d+1.
  7427. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7428. * register of the pair contains the low 32-bit of the operand.
  7429. *
  7430. * **RV64 Description**:\n
  7431. * For the `SMALBB` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7432. * with the bottom 16-bit content of the 32-bit elements of Rs2.
  7433. * For the `SMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7434. * with the top 16-bit content of the 32-bit elements of Rs2.
  7435. * For the `SMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  7436. * the top 16-bit content of the 32-bit elements of Rs2.
  7437. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written
  7438. * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
  7439. * integers.
  7440. *
  7441. * **Operations**:\n
  7442. * ~~~
  7443. * RV32:
  7444. * Mres[31:0] = Rs1.H[0] * Rs2.H[0]; // SMALBB
  7445. * Mres[31:0] = Rs1.H[0] * Rs2.H[1]; // SMALBT
  7446. * Mres[31:0] = Rs1.H[1] * Rs2.H[1]; // SMALTT
  7447. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7448. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
  7449. * RV64:
  7450. * // SMALBB
  7451. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
  7452. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
  7453. * // SMALBT
  7454. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
  7455. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
  7456. * // SMALTT
  7457. * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
  7458. * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
  7459. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  7460. * ~~~
  7461. *
  7462. * \param [in] t long long type of value stored in t
  7463. * \param [in] a unsigned long type of value stored in a
  7464. * \param [in] b unsigned long type of value stored in b
  7465. * \return value stored in long long type
  7466. */
  7467. __STATIC_FORCEINLINE long long __RV_SMALTT(long long t, unsigned long a, unsigned long b)
  7468. {
  7469. __ASM volatile("smaltt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7470. return t;
  7471. }
  7472. /* ===== Inline Function End for 3.105.3. SMALTT ===== */
  7473. /* ===== Inline Function Start for 3.106.1. SMALDA ===== */
  7474. /**
  7475. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7476. * \brief SMALDA (Signed Multiply Two Halfs and Two Adds 64-bit)
  7477. * \details
  7478. * **Type**: DSP (64-bit Profile)
  7479. *
  7480. * **Syntax**:\n
  7481. * ~~~
  7482. * SMALDA Rd, Rs1, Rs2
  7483. * SMALXDA Rd, Rs1, Rs2
  7484. * ~~~
  7485. *
  7486. * **Purpose**:\n
  7487. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  7488. * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together.
  7489. * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements)
  7490. * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
  7491. *
  7492. * **RV32 Description**:\n
  7493. * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7494. * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
  7495. * the top 16-bit content of Rs2 with unlimited precision.
  7496. * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
  7497. * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
  7498. * with the top 16-bit content of Rs2 with unlimited precision.
  7499. * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64-
  7500. * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64-
  7501. * bit value of the register-pair are treated as signed integers.
  7502. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7503. * includes register 2d and 2d+1.
  7504. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7505. * register of the pair contains the low 32-bit of the operand.
  7506. *
  7507. * **RV64 Description**:\n
  7508. * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7509. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  7510. * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
  7511. * bit elements of Rs2 with unlimited precision.
  7512. * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1
  7513. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  7514. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
  7515. * 32-bit elements of Rs2 with unlimited precision.
  7516. * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
  7517. * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
  7518. *
  7519. * **Operations**:\n
  7520. * ~~~
  7521. * RV32:
  7522. * // SMALDA
  7523. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
  7524. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
  7525. * // SMALXDA
  7526. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
  7527. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
  7528. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7529. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]);
  7530. * RV64:
  7531. * // SMALDA
  7532. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  7533. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  7534. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  7535. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  7536. * // SMALXDA
  7537. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  7538. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
  7539. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  7540. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
  7541. * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) +
  7542. * SE64(Mres1[1][31:0]);
  7543. * ~~~
  7544. *
  7545. * \param [in] t long long type of value stored in t
  7546. * \param [in] a unsigned long type of value stored in a
  7547. * \param [in] b unsigned long type of value stored in b
  7548. * \return value stored in long long type
  7549. */
  7550. __STATIC_FORCEINLINE long long __RV_SMALDA(long long t, unsigned long a, unsigned long b)
  7551. {
  7552. __ASM volatile("smalda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7553. return t;
  7554. }
  7555. /* ===== Inline Function End for 3.106.1. SMALDA ===== */
  7556. /* ===== Inline Function Start for 3.106.2. SMALXDA ===== */
  7557. /**
  7558. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7559. * \brief SMALXDA (Signed Crossed Multiply Two Halfs and Two Adds 64-bit)
  7560. * \details
  7561. * **Type**: DSP (64-bit Profile)
  7562. *
  7563. * **Syntax**:\n
  7564. * ~~~
  7565. * SMALDA Rd, Rs1, Rs2
  7566. * SMALXDA Rd, Rs1, Rs2
  7567. * ~~~
  7568. *
  7569. * **Purpose**:\n
  7570. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  7571. * adds the two 32-bit results and the 64-bit value of an even/odd pair of registers together.
  7572. * * SMALDA: rt pair+ top*top + bottom*bottom (all 32-bit elements)
  7573. * * SMALXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
  7574. *
  7575. * **RV32 Description**:\n
  7576. * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7577. * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
  7578. * the top 16-bit content of Rs2 with unlimited precision.
  7579. * For the `SMALXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
  7580. * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
  7581. * with the top 16-bit content of Rs2 with unlimited precision.
  7582. * The result is added to the 64-bit value of an even/odd pair of registers specified by Rd(4,1). The 64-
  7583. * bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 64-
  7584. * bit value of the register-pair are treated as signed integers.
  7585. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7586. * includes register 2d and 2d+1.
  7587. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7588. * register of the pair contains the low 32-bit of the operand.
  7589. *
  7590. * **RV64 Description**:\n
  7591. * For the `SMALDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7592. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  7593. * multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-
  7594. * bit elements of Rs2 with unlimited precision.
  7595. * For the `SMALXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1
  7596. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of
  7597. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
  7598. * 32-bit elements of Rs2 with unlimited precision.
  7599. * The results are added to the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
  7600. * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
  7601. *
  7602. * **Operations**:\n
  7603. * ~~~
  7604. * RV32:
  7605. * // SMALDA
  7606. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
  7607. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
  7608. * // SMALXDA
  7609. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
  7610. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
  7611. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7612. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres0[31:0]) + SE64(Mres1[31:0]);
  7613. * RV64:
  7614. * // SMALDA
  7615. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  7616. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  7617. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  7618. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  7619. * // SMALXDA
  7620. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  7621. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
  7622. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  7623. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
  7624. * Rd = Rd + SE64(Mres0[0][31:0]) + SE64(Mres1[0][31:0]) + SE64(Mres0[1][31:0]) +
  7625. * SE64(Mres1[1][31:0]);
  7626. * ~~~
  7627. *
  7628. * \param [in] t long long type of value stored in t
  7629. * \param [in] a unsigned long type of value stored in a
  7630. * \param [in] b unsigned long type of value stored in b
  7631. * \return value stored in long long type
  7632. */
  7633. __STATIC_FORCEINLINE long long __RV_SMALXDA(long long t, unsigned long a, unsigned long b)
  7634. {
  7635. __ASM volatile("smalxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7636. return t;
  7637. }
  7638. /* ===== Inline Function End for 3.106.2. SMALXDA ===== */
  7639. /* ===== Inline Function Start for 3.107.1. SMALDS ===== */
  7640. /**
  7641. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7642. * \brief SMALDS (Signed Multiply Two Halfs & Subtract & Add 64-bit)
  7643. * \details
  7644. * **Type**: DSP (64-bit Profile)
  7645. *
  7646. * **Syntax**:\n
  7647. * ~~~
  7648. * SMALDS Rd, Rs1, Rs2
  7649. * SMALDRS Rd, Rs1, Rs2
  7650. * SMALXDS Rd, Rs1, Rs2
  7651. * ~~~
  7652. *
  7653. * **Purpose**:\n
  7654. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  7655. * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
  7656. * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
  7657. * written back to the register-pair.
  7658. * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
  7659. * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
  7660. * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
  7661. *
  7662. * **RV32 Description**:\n
  7663. * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7664. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  7665. * Rs1 with the top 16-bit content of Rs2.
  7666. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
  7667. * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
  7668. * with the bottom 16-bit content of Rs2.
  7669. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
  7670. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  7671. * Rs1 with the bottom 16-bit content of Rs2.
  7672. * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
  7673. * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
  7674. * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  7675. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7676. * includes register 2d and 2d+1.
  7677. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7678. * register of the pair contains the low 32-bit of the operand.
  7679. *
  7680. * **RV64 Description**:\n
  7681. * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7682. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  7683. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
  7684. * of the 32-bit elements of Rs2.
  7685. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  7686. * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
  7687. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
  7688. * the 32-bit elements of Rs2.
  7689. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7690. * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  7691. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
  7692. * content of the 32-bit elements of Rs2.
  7693. * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
  7694. * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
  7695. * integers.
  7696. *
  7697. * **Operations**:\n
  7698. * ~~~
  7699. * * RV32:
  7700. * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
  7701. * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
  7702. * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
  7703. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7704. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
  7705. * * RV64:
  7706. * // SMALDS
  7707. * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  7708. * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  7709. * // SMALDRS
  7710. * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  7711. * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  7712. * // SMALXDS
  7713. * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  7714. * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  7715. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  7716. * ~~~
  7717. *
  7718. * \param [in] t long long type of value stored in t
  7719. * \param [in] a unsigned long type of value stored in a
  7720. * \param [in] b unsigned long type of value stored in b
  7721. * \return value stored in long long type
  7722. */
  7723. __STATIC_FORCEINLINE long long __RV_SMALDS(long long t, unsigned long a, unsigned long b)
  7724. {
  7725. __ASM volatile("smalds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7726. return t;
  7727. }
  7728. /* ===== Inline Function End for 3.107.1. SMALDS ===== */
  7729. /* ===== Inline Function Start for 3.107.2. SMALDRS ===== */
  7730. /**
  7731. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7732. * \brief SMALDRS (Signed Multiply Two Halfs & Reverse Subtract & Add 64- bit)
  7733. * \details
  7734. * **Type**: DSP (64-bit Profile)
  7735. *
  7736. * **Syntax**:\n
  7737. * ~~~
  7738. * SMALDS Rd, Rs1, Rs2
  7739. * SMALDRS Rd, Rs1, Rs2
  7740. * SMALXDS Rd, Rs1, Rs2
  7741. * ~~~
  7742. *
  7743. * **Purpose**:\n
  7744. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  7745. * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
  7746. * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
  7747. * written back to the register-pair.
  7748. * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
  7749. * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
  7750. * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
  7751. *
  7752. * **RV32 Description**:\n
  7753. * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7754. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  7755. * Rs1 with the top 16-bit content of Rs2.
  7756. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
  7757. * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
  7758. * with the bottom 16-bit content of Rs2.
  7759. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
  7760. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  7761. * Rs1 with the bottom 16-bit content of Rs2.
  7762. * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
  7763. * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
  7764. * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  7765. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7766. * includes register 2d and 2d+1.
  7767. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7768. * register of the pair contains the low 32-bit of the operand.
  7769. *
  7770. * **RV64 Description**:\n
  7771. * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7772. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  7773. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
  7774. * of the 32-bit elements of Rs2.
  7775. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  7776. * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
  7777. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
  7778. * the 32-bit elements of Rs2.
  7779. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7780. * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  7781. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
  7782. * content of the 32-bit elements of Rs2.
  7783. * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
  7784. * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
  7785. * integers.
  7786. *
  7787. * **Operations**:\n
  7788. * ~~~
  7789. * * RV32:
  7790. * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
  7791. * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
  7792. * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
  7793. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7794. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
  7795. * * RV64:
  7796. * // SMALDS
  7797. * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  7798. * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  7799. * // SMALDRS
  7800. * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  7801. * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  7802. * // SMALXDS
  7803. * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  7804. * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  7805. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  7806. * ~~~
  7807. *
  7808. * \param [in] t long long type of value stored in t
  7809. * \param [in] a unsigned long type of value stored in a
  7810. * \param [in] b unsigned long type of value stored in b
  7811. * \return value stored in long long type
  7812. */
  7813. __STATIC_FORCEINLINE long long __RV_SMALDRS(long long t, unsigned long a, unsigned long b)
  7814. {
  7815. __ASM volatile("smaldrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7816. return t;
  7817. }
  7818. /* ===== Inline Function End for 3.107.2. SMALDRS ===== */
  7819. /* ===== Inline Function Start for 3.107.3. SMALXDS ===== */
  7820. /**
  7821. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  7822. * \brief SMALXDS (Signed Crossed Multiply Two Halfs & Subtract & Add 64- bit)
  7823. * \details
  7824. * **Type**: DSP (64-bit Profile)
  7825. *
  7826. * **Syntax**:\n
  7827. * ~~~
  7828. * SMALDS Rd, Rs1, Rs2
  7829. * SMALDRS Rd, Rs1, Rs2
  7830. * SMALXDS Rd, Rs1, Rs2
  7831. * ~~~
  7832. *
  7833. * **Purpose**:\n
  7834. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  7835. * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
  7836. * the 64-bit value of an even/odd pair of registers (RV32) or a register (RV64). The addition result is
  7837. * written back to the register-pair.
  7838. * * SMALDS: rt pair + (top*top - bottom*bottom) (all 32-bit elements)
  7839. * * SMALDRS: rt pair + (bottom*bottom - top*top) (all 32-bit elements)
  7840. * * SMALXDS: rt pair + (top*bottom - bottom*top) (all 32-bit elements)
  7841. *
  7842. * **RV32 Description**:\n
  7843. * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  7844. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  7845. * Rs1 with the top 16-bit content of Rs2.
  7846. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of Rs1 with the top 16-bit content
  7847. * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
  7848. * with the bottom 16-bit content of Rs2.
  7849. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of Rs1 with the top 16-bit
  7850. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  7851. * Rs1 with the bottom 16-bit content of Rs2.
  7852. * The subtraction result is then added to the 64-bit value of an even/odd pair of registers specified by
  7853. * Rd(4,1). The 64-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and
  7854. * Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  7855. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  7856. * includes register 2d and 2d+1.
  7857. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  7858. * register of the pair contains the low 32-bit of the operand.
  7859. *
  7860. * **RV64 Description**:\n
  7861. * For the `SMALDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7862. * with the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  7863. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content
  7864. * of the 32-bit elements of Rs2.
  7865. * For the `SMALDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  7866. * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
  7867. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
  7868. * the 32-bit elements of Rs2.
  7869. * For the `SMALXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  7870. * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  7871. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
  7872. * content of the 32-bit elements of Rs2.
  7873. * The subtraction results are then added to the 64-bit value of Rd. The 64-bit addition result is written
  7874. * back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed
  7875. * integers.
  7876. *
  7877. * **Operations**:\n
  7878. * ~~~
  7879. * * RV32:
  7880. * Mres[31:0] = (Rs1.H[1] * Rs2.H[1]) - (Rs1.H[0] * Rs2.H[0]); // SMALDS
  7881. * Mres[31:0] = (Rs1.H[0] * Rs2.H[0]) - (Rs1.H[1] * Rs2.H[1]); // SMALDRS
  7882. * Mres[31:0] = (Rs1.H[1] * Rs2.H[0]) - (Rs1.H[0] * Rs2.H[1]); // SMALXDS
  7883. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  7884. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] + SE64(Mres[31:0]);
  7885. * * RV64:
  7886. * // SMALDS
  7887. * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]) - (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  7888. * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[1]) - (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  7889. * // SMALDRS
  7890. * Mres[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]) - (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  7891. * Mres[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[0].H[0]) - (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  7892. * // SMALXDS
  7893. * Mres[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]) - (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  7894. * Mres[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[0].H[0]) - (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  7895. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  7896. * ~~~
  7897. *
  7898. * \param [in] t long long type of value stored in t
  7899. * \param [in] a unsigned long type of value stored in a
  7900. * \param [in] b unsigned long type of value stored in b
  7901. * \return value stored in long long type
  7902. */
  7903. __STATIC_FORCEINLINE long long __RV_SMALXDS(long long t, unsigned long a, unsigned long b)
  7904. {
  7905. __ASM volatile("smalxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7906. return t;
  7907. }
  7908. /* ===== Inline Function End for 3.107.3. SMALXDS ===== */
  7909. /* ===== Inline Function Start for 3.108. SMAR64 ===== */
  7910. /**
  7911. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  7912. * \brief SMAR64 (Signed Multiply and Add to 64-Bit Data)
  7913. * \details
  7914. * **Type**: DSP (64-bit Profile)
  7915. *
  7916. * **Syntax**:\n
  7917. * ~~~
  7918. * SMAR64 Rd, Rs1, Rs2
  7919. * ~~~
  7920. *
  7921. * **Purpose**:\n
  7922. * Multiply the 32-bit signed elements in two registers and add the 64-bit multiplication
  7923. * result to the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is written
  7924. * back to the pair of registers (RV32) or a register (RV64).
  7925. *
  7926. * **RV32 Description**:\n
  7927. * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It adds
  7928. * the 64-bit multiplication result to the 64-bit signed data of an even/odd pair of registers specified by
  7929. * Rd(4,1). The addition result is written back to the even/odd pair of registers specified by Rd(4,1).
  7930. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  7931. * includes register 2d and 2d+1.
  7932. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  7933. * of the pair contains the low 32-bit of the result.
  7934. *
  7935. * **RV64 Description**:\n
  7936. * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
  7937. * adds the 64-bit multiplication results to the 64-bit signed data of Rd. The addition result is written
  7938. * back to Rd.
  7939. *
  7940. * **Operations**:\n
  7941. * ~~~
  7942. * * RV32:
  7943. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  7944. * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2);
  7945. * * RV64:
  7946. * Rd = Rd + (Rs1.W[0] * Rs2.W[0]) + (Rs1.W[1] * Rs2.W[1]);
  7947. * ~~~
  7948. *
  7949. * \param [in] t long long type of value stored in t
  7950. * \param [in] a long type of value stored in a
  7951. * \param [in] b long type of value stored in b
  7952. * \return value stored in long long type
  7953. */
  7954. __STATIC_FORCEINLINE long long __RV_SMAR64(long long t, long a, long b)
  7955. {
  7956. __ASM volatile("smar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  7957. return t;
  7958. }
  7959. /* ===== Inline Function End for 3.108. SMAR64 ===== */
  7960. /* ===== Inline Function Start for 3.109. SMAQA ===== */
  7961. /**
  7962. * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
  7963. * \brief SMAQA (Signed Multiply Four Bytes with 32-bit Adds)
  7964. * \details
  7965. * **Type**: Partial-SIMD (Reduction)
  7966. *
  7967. * **Syntax**:\n
  7968. * ~~~
  7969. * SMAQA Rd, Rs1, Rs2
  7970. * ~~~
  7971. *
  7972. * **Purpose**:\n
  7973. * Do four signed 8-bit multiplications from 32-bit chunks of two registers; and then adds
  7974. * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together.
  7975. *
  7976. * **Description**:\n
  7977. * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
  7978. * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the signed
  7979. * content of the corresponding 32-bit chunks of Rd. The final results are written back to the
  7980. * corresponding 32-bit chunks in Rd.
  7981. *
  7982. * **Operations**:\n
  7983. * ~~~
  7984. * res[x] = Rd.W[x] +
  7985. * (Rs1.W[x].B[3] s* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] s* Rs2.W[x].B[2]) +
  7986. * (Rs1.W[x].B[1] s* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] s* Rs2.W[x].B[0]);
  7987. * Rd.W[x] = res[x];
  7988. * for RV32: x=0,
  7989. * for RV64: x=1,0
  7990. * ~~~
  7991. *
  7992. * \param [in] t long type of value stored in t
  7993. * \param [in] a unsigned long type of value stored in a
  7994. * \param [in] b unsigned long type of value stored in b
  7995. * \return value stored in long type
  7996. */
  7997. __STATIC_FORCEINLINE long __RV_SMAQA(long t, unsigned long a, unsigned long b)
  7998. {
  7999. __ASM volatile("smaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  8000. return t;
  8001. }
  8002. /* ===== Inline Function End for 3.109. SMAQA ===== */
  8003. /* ===== Inline Function Start for 3.110. SMAQA.SU ===== */
  8004. /**
  8005. * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
  8006. * \brief SMAQA.SU (Signed and Unsigned Multiply Four Bytes with 32-bit Adds)
  8007. * \details
  8008. * **Type**: Partial-SIMD (Reduction)
  8009. *
  8010. * **Syntax**:\n
  8011. * ~~~
  8012. * SMAQA.SU Rd, Rs1, Rs2
  8013. * ~~~
  8014. *
  8015. * **Purpose**:\n
  8016. * Do four `signed x unsigned` 8-bit multiplications from 32-bit chunks of two registers; and
  8017. * then adds the four 16-bit results and the content of corresponding 32-bit chunks of a third register
  8018. * together.
  8019. *
  8020. * **Description**:\n
  8021. * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
  8022. * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
  8023. * signed content of the corresponding 32-bit chunks of Rd. The final results are written back to the
  8024. * corresponding 32-bit chunks in Rd.
  8025. *
  8026. * **Operations**:\n
  8027. * ~~~
  8028. * res[x] = Rd.W[x] +
  8029. * (Rs1.W[x].B[3] su* Rs2.W[x].B[3]) + (Rs1.W[x].B[2] su* Rs2.W[x].B[2]) +
  8030. * (Rs1.W[x].B[1] su* Rs2.W[x].B[1]) + (Rs1.W[x].B[0] su* Rs2.W[x].B[0]);
  8031. * Rd.W[x] = res[x];
  8032. * for RV32: x=0,
  8033. * for RV64: x=1...0
  8034. * ~~~
  8035. *
  8036. * \param [in] t long type of value stored in t
  8037. * \param [in] a unsigned long type of value stored in a
  8038. * \param [in] b unsigned long type of value stored in b
  8039. * \return value stored in long type
  8040. */
  8041. __STATIC_FORCEINLINE long __RV_SMAQA_SU(long t, unsigned long a, unsigned long b)
  8042. {
  8043. __ASM volatile("smaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  8044. return t;
  8045. }
  8046. /* ===== Inline Function End for 3.110. SMAQA.SU ===== */
  8047. /* ===== Inline Function Start for 3.111. SMAX8 ===== */
  8048. /**
  8049. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  8050. * \brief SMAX8 (SIMD 8-bit Signed Maximum)
  8051. * \details
  8052. * **Type**: SIMD
  8053. *
  8054. * **Syntax**:\n
  8055. * ~~~
  8056. * SMAX8 Rd, Rs1, Rs2
  8057. * ~~~
  8058. *
  8059. * **Purpose**:\n
  8060. * Do 8-bit signed integer elements finding maximum operations simultaneously.
  8061. *
  8062. * **Description**:\n
  8063. * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
  8064. * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
  8065. * selected results are written to Rd.
  8066. *
  8067. * **Operations**:\n
  8068. * ~~~
  8069. * Rd.B[x] = (Rs1.B[x] > Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
  8070. * for RV32: x=3...0,
  8071. * for RV64: x=7...0
  8072. * ~~~
  8073. *
  8074. * \param [in] a unsigned long type of value stored in a
  8075. * \param [in] b unsigned long type of value stored in b
  8076. * \return value stored in unsigned long type
  8077. */
  8078. __STATIC_FORCEINLINE unsigned long __RV_SMAX8(unsigned long a, unsigned long b)
  8079. {
  8080. unsigned long result;
  8081. __ASM volatile("smax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8082. return result;
  8083. }
  8084. /* ===== Inline Function End for 3.111. SMAX8 ===== */
  8085. /* ===== Inline Function Start for 3.112. SMAX16 ===== */
  8086. /**
  8087. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  8088. * \brief SMAX16 (SIMD 16-bit Signed Maximum)
  8089. * \details
  8090. * **Type**: SIMD
  8091. *
  8092. * **Syntax**:\n
  8093. * ~~~
  8094. * SMAX16 Rd, Rs1, Rs2
  8095. * ~~~
  8096. *
  8097. * **Purpose**:\n
  8098. * Do 16-bit signed integer elements finding maximum operations simultaneously.
  8099. *
  8100. * **Description**:\n
  8101. * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
  8102. * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
  8103. * selected results are written to Rd.
  8104. *
  8105. * **Operations**:\n
  8106. * ~~~
  8107. * Rd.H[x] = (Rs1.H[x] > Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
  8108. * for RV32: x=1...0,
  8109. * for RV64: x=3...0
  8110. * ~~~
  8111. *
  8112. * \param [in] a unsigned long type of value stored in a
  8113. * \param [in] b unsigned long type of value stored in b
  8114. * \return value stored in unsigned long type
  8115. */
  8116. __STATIC_FORCEINLINE unsigned long __RV_SMAX16(unsigned long a, unsigned long b)
  8117. {
  8118. unsigned long result;
  8119. __ASM volatile("smax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8120. return result;
  8121. }
  8122. /* ===== Inline Function End for 3.112. SMAX16 ===== */
  8123. /* ===== Inline Function Start for 3.113.1. SMBB16 ===== */
  8124. /**
  8125. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  8126. * \brief SMBB16 (SIMD Signed Multiply Bottom Half & Bottom Half)
  8127. * \details
  8128. * **Type**: SIMD
  8129. *
  8130. * **Syntax**:\n
  8131. * ~~~
  8132. * SMBB16 Rd, Rs1, Rs2
  8133. * SMBT16 Rd, Rs1, Rs2
  8134. * SMTT16 Rd, Rs1, Rs2
  8135. * ~~~
  8136. *
  8137. * **Purpose**:\n
  8138. * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
  8139. * bit content of the 32-bit elements of another register and write the result to a third register.
  8140. * * SMBB16: W[x].bottom*W[x].bottom
  8141. * * SMBT16: W[x].bottom *W[x].top
  8142. * * SMTT16: W[x].top * W[x].top
  8143. *
  8144. * **Description**:\n
  8145. * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8146. * with the bottom 16-bit content of the 32-bit elements of Rs2.
  8147. * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8148. * with the top 16-bit content of the 32-bit elements of Rs2.
  8149. * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8150. * the top 16-bit content of the 32-bit elements of Rs2.
  8151. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
  8152. * integers.
  8153. *
  8154. * **Operations**:\n
  8155. * ~~~
  8156. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
  8157. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
  8158. * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
  8159. * for RV32: x=0,
  8160. * for RV64: x=1...0
  8161. * ~~~
  8162. *
  8163. * \param [in] a unsigned long type of value stored in a
  8164. * \param [in] b unsigned long type of value stored in b
  8165. * \return value stored in long type
  8166. */
  8167. __STATIC_FORCEINLINE long __RV_SMBB16(unsigned long a, unsigned long b)
  8168. {
  8169. long result;
  8170. __ASM volatile("smbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8171. return result;
  8172. }
  8173. /* ===== Inline Function End for 3.113.1. SMBB16 ===== */
  8174. /* ===== Inline Function Start for 3.113.2. SMBT16 ===== */
  8175. /**
  8176. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  8177. * \brief SMBT16 (SIMD Signed Multiply Bottom Half & Top Half)
  8178. * \details
  8179. * **Type**: SIMD
  8180. *
  8181. * **Syntax**:\n
  8182. * ~~~
  8183. * SMBB16 Rd, Rs1, Rs2
  8184. * SMBT16 Rd, Rs1, Rs2
  8185. * SMTT16 Rd, Rs1, Rs2
  8186. * ~~~
  8187. *
  8188. * **Purpose**:\n
  8189. * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
  8190. * bit content of the 32-bit elements of another register and write the result to a third register.
  8191. * * SMBB16: W[x].bottom*W[x].bottom
  8192. * * SMBT16: W[x].bottom *W[x].top
  8193. * * SMTT16: W[x].top * W[x].top
  8194. *
  8195. * **Description**:\n
  8196. * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8197. * with the bottom 16-bit content of the 32-bit elements of Rs2.
  8198. * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8199. * with the top 16-bit content of the 32-bit elements of Rs2.
  8200. * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8201. * the top 16-bit content of the 32-bit elements of Rs2.
  8202. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
  8203. * integers.
  8204. *
  8205. * **Operations**:\n
  8206. * ~~~
  8207. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
  8208. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
  8209. * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
  8210. * for RV32: x=0,
  8211. * for RV64: x=1...0
  8212. * ~~~
  8213. *
  8214. * \param [in] a unsigned long type of value stored in a
  8215. * \param [in] b unsigned long type of value stored in b
  8216. * \return value stored in long type
  8217. */
  8218. __STATIC_FORCEINLINE long __RV_SMBT16(unsigned long a, unsigned long b)
  8219. {
  8220. long result;
  8221. __ASM volatile("smbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8222. return result;
  8223. }
  8224. /* ===== Inline Function End for 3.113.2. SMBT16 ===== */
  8225. /* ===== Inline Function Start for 3.113.3. SMTT16 ===== */
  8226. /**
  8227. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  8228. * \brief SMTT16 (SIMD Signed Multiply Top Half & Top Half)
  8229. * \details
  8230. * **Type**: SIMD
  8231. *
  8232. * **Syntax**:\n
  8233. * ~~~
  8234. * SMBB16 Rd, Rs1, Rs2
  8235. * SMBT16 Rd, Rs1, Rs2
  8236. * SMTT16 Rd, Rs1, Rs2
  8237. * ~~~
  8238. *
  8239. * **Purpose**:\n
  8240. * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-
  8241. * bit content of the 32-bit elements of another register and write the result to a third register.
  8242. * * SMBB16: W[x].bottom*W[x].bottom
  8243. * * SMBT16: W[x].bottom *W[x].top
  8244. * * SMTT16: W[x].top * W[x].top
  8245. *
  8246. * **Description**:\n
  8247. * For the `SMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8248. * with the bottom 16-bit content of the 32-bit elements of Rs2.
  8249. * For the `SMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8250. * with the top 16-bit content of the 32-bit elements of Rs2.
  8251. * For the `SMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8252. * the top 16-bit content of the 32-bit elements of Rs2.
  8253. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed
  8254. * integers.
  8255. *
  8256. * **Operations**:\n
  8257. * ~~~
  8258. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0]; // SMBB16
  8259. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1]; // SMBT16
  8260. * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1]; // SMTT16
  8261. * for RV32: x=0,
  8262. * for RV64: x=1...0
  8263. * ~~~
  8264. *
  8265. * \param [in] a unsigned long type of value stored in a
  8266. * \param [in] b unsigned long type of value stored in b
  8267. * \return value stored in long type
  8268. */
  8269. __STATIC_FORCEINLINE long __RV_SMTT16(unsigned long a, unsigned long b)
  8270. {
  8271. long result;
  8272. __ASM volatile("smtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8273. return result;
  8274. }
  8275. /* ===== Inline Function End for 3.113.3. SMTT16 ===== */
  8276. /* ===== Inline Function Start for 3.114.1. SMDS ===== */
  8277. /**
  8278. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  8279. * \brief SMDS (SIMD Signed Multiply Two Halfs and Subtract)
  8280. * \details
  8281. * **Type**: SIMD
  8282. *
  8283. * **Syntax**:\n
  8284. * ~~~
  8285. * SMDS Rd, Rs1, Rs2
  8286. * SMDRS Rd, Rs1, Rs2
  8287. * SMXDS Rd, Rs1, Rs2
  8288. * ~~~
  8289. *
  8290. * **Purpose**:\n
  8291. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  8292. * perform a subtraction operation between the two 32-bit results.
  8293. * * SMDS: top*top - bottom*bottom (per 32-bit element)
  8294. * * SMDRS: bottom*bottom - top*top (per 32-bit element)
  8295. * * SMXDS: top*bottom - bottom*top (per 32-bit element)
  8296. *
  8297. * **Description**:\n
  8298. * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
  8299. * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
  8300. * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
  8301. * 32-bit elements of Rs2.
  8302. * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8303. * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
  8304. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
  8305. * the 32-bit elements of Rs2.
  8306. * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8307. * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  8308. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
  8309. * content of the 32-bit elements of Rs2.
  8310. * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
  8311. * multiplication are treated as signed integers.
  8312. *
  8313. * **Operations**:\n
  8314. * ~~~
  8315. * * SMDS:
  8316. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  8317. * * SMDRS:
  8318. * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
  8319. * * SMXDS:
  8320. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  8321. * ~~~
  8322. *
  8323. * \param [in] a unsigned long type of value stored in a
  8324. * \param [in] b unsigned long type of value stored in b
  8325. * \return value stored in long type
  8326. */
  8327. __STATIC_FORCEINLINE long __RV_SMDS(unsigned long a, unsigned long b)
  8328. {
  8329. long result;
  8330. __ASM volatile("smds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8331. return result;
  8332. }
  8333. /* ===== Inline Function End for 3.114.1. SMDS ===== */
  8334. /* ===== Inline Function Start for 3.114.2. SMDRS ===== */
  8335. /**
  8336. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  8337. * \brief SMDRS (SIMD Signed Multiply Two Halfs and Reverse Subtract)
  8338. * \details
  8339. * **Type**: SIMD
  8340. *
  8341. * **Syntax**:\n
  8342. * ~~~
  8343. * SMDS Rd, Rs1, Rs2
  8344. * SMDRS Rd, Rs1, Rs2
  8345. * SMXDS Rd, Rs1, Rs2
  8346. * ~~~
  8347. *
  8348. * **Purpose**:\n
  8349. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  8350. * perform a subtraction operation between the two 32-bit results.
  8351. * * SMDS: top*top - bottom*bottom (per 32-bit element)
  8352. * * SMDRS: bottom*bottom - top*top (per 32-bit element)
  8353. * * SMXDS: top*bottom - bottom*top (per 32-bit element)
  8354. *
  8355. * **Description**:\n
  8356. * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
  8357. * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
  8358. * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
  8359. * 32-bit elements of Rs2.
  8360. * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8361. * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
  8362. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
  8363. * the 32-bit elements of Rs2.
  8364. * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8365. * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  8366. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
  8367. * content of the 32-bit elements of Rs2.
  8368. * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
  8369. * multiplication are treated as signed integers.
  8370. *
  8371. * **Operations**:\n
  8372. * ~~~
  8373. * * SMDS:
  8374. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  8375. * * SMDRS:
  8376. * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
  8377. * * SMXDS:
  8378. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  8379. * ~~~
  8380. *
  8381. * \param [in] a unsigned long type of value stored in a
  8382. * \param [in] b unsigned long type of value stored in b
  8383. * \return value stored in long type
  8384. */
  8385. __STATIC_FORCEINLINE long __RV_SMDRS(unsigned long a, unsigned long b)
  8386. {
  8387. long result;
  8388. __ASM volatile("smdrs %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8389. return result;
  8390. }
  8391. /* ===== Inline Function End for 3.114.2. SMDRS ===== */
  8392. /* ===== Inline Function Start for 3.114.3. SMXDS ===== */
  8393. /**
  8394. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_32B_ADDSUB
  8395. * \brief SMXDS (SIMD Signed Crossed Multiply Two Halfs and Subtract)
  8396. * \details
  8397. * **Type**: SIMD
  8398. *
  8399. * **Syntax**:\n
  8400. * ~~~
  8401. * SMDS Rd, Rs1, Rs2
  8402. * SMDRS Rd, Rs1, Rs2
  8403. * SMXDS Rd, Rs1, Rs2
  8404. * ~~~
  8405. *
  8406. * **Purpose**:\n
  8407. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  8408. * perform a subtraction operation between the two 32-bit results.
  8409. * * SMDS: top*top - bottom*bottom (per 32-bit element)
  8410. * * SMDRS: bottom*bottom - top*top (per 32-bit element)
  8411. * * SMXDS: top*bottom - bottom*top (per 32-bit element)
  8412. *
  8413. * **Description**:\n
  8414. * For the `SMDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with
  8415. * the bottom 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result
  8416. * of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the
  8417. * 32-bit elements of Rs2.
  8418. * For the `SMDRS` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8419. * the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of
  8420. * multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of
  8421. * the 32-bit elements of Rs2.
  8422. * For the `SMXDS` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8423. * with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the
  8424. * result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit
  8425. * content of the 32-bit elements of Rs2.
  8426. * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of
  8427. * multiplication are treated as signed integers.
  8428. *
  8429. * **Operations**:\n
  8430. * ~~~
  8431. * * SMDS:
  8432. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) - (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  8433. * * SMDRS:
  8434. * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]);
  8435. * * SMXDS:
  8436. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  8437. * ~~~
  8438. *
  8439. * \param [in] a unsigned long type of value stored in a
  8440. * \param [in] b unsigned long type of value stored in b
  8441. * \return value stored in long type
  8442. */
  8443. __STATIC_FORCEINLINE long __RV_SMXDS(unsigned long a, unsigned long b)
  8444. {
  8445. long result;
  8446. __ASM volatile("smxds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8447. return result;
  8448. }
  8449. /* ===== Inline Function End for 3.114.3. SMXDS ===== */
  8450. /* ===== Inline Function Start for 3.115. SMIN8 ===== */
  8451. /**
  8452. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  8453. * \brief SMIN8 (SIMD 8-bit Signed Minimum)
  8454. * \details
  8455. * **Type**: SIMD
  8456. *
  8457. * **Syntax**:\n
  8458. * ~~~
  8459. * SMIN8 Rd, Rs1, Rs2
  8460. * ~~~
  8461. *
  8462. * **Purpose**:\n
  8463. * Do 8-bit signed integer elements finding minimum operations simultaneously.
  8464. *
  8465. * **Description**:\n
  8466. * This instruction compares the 8-bit signed integer elements in Rs1 with the 8-bit
  8467. * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
  8468. * results are written to Rd.
  8469. *
  8470. * **Operations**:\n
  8471. * ~~~
  8472. * Rd.B[x] = (Rs1.B[x] < Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
  8473. * for RV32: x=3...0,
  8474. * for RV64: x=7...0
  8475. * ~~~
  8476. *
  8477. * \param [in] a unsigned long type of value stored in a
  8478. * \param [in] b unsigned long type of value stored in b
  8479. * \return value stored in unsigned long type
  8480. */
  8481. __STATIC_FORCEINLINE unsigned long __RV_SMIN8(unsigned long a, unsigned long b)
  8482. {
  8483. unsigned long result;
  8484. __ASM volatile("smin8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8485. return result;
  8486. }
  8487. /* ===== Inline Function End for 3.115. SMIN8 ===== */
  8488. /* ===== Inline Function Start for 3.116. SMIN16 ===== */
  8489. /**
  8490. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  8491. * \brief SMIN16 (SIMD 16-bit Signed Minimum)
  8492. * \details
  8493. * **Type**: SIMD
  8494. *
  8495. * **Syntax**:\n
  8496. * ~~~
  8497. * SMIN16 Rd, Rs1, Rs2
  8498. * ~~~
  8499. *
  8500. * **Purpose**:\n
  8501. * Do 16-bit signed integer elements finding minimum operations simultaneously.
  8502. *
  8503. * **Description**:\n
  8504. * This instruction compares the 16-bit signed integer elements in Rs1 with the 16-bit
  8505. * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
  8506. * results are written to Rd.
  8507. *
  8508. * **Operations**:\n
  8509. * ~~~
  8510. * Rd.H[x] = (Rs1.H[x] < Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
  8511. * for RV32: x=1...0,
  8512. * for RV64: x=3...0
  8513. * ~~~
  8514. *
  8515. * \param [in] a unsigned long type of value stored in a
  8516. * \param [in] b unsigned long type of value stored in b
  8517. * \return value stored in unsigned long type
  8518. */
  8519. __STATIC_FORCEINLINE unsigned long __RV_SMIN16(unsigned long a, unsigned long b)
  8520. {
  8521. unsigned long result;
  8522. __ASM volatile("smin16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8523. return result;
  8524. }
  8525. /* ===== Inline Function End for 3.116. SMIN16 ===== */
  8526. /* ===== Inline Function Start for 3.117.1. SMMUL ===== */
  8527. /**
  8528. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  8529. * \brief SMMUL (SIMD MSW Signed Multiply Word)
  8530. * \details
  8531. * **Type**: SIMD
  8532. *
  8533. * **Syntax**:\n
  8534. * ~~~
  8535. * SMMUL Rd, Rs1, Rs2
  8536. * SMMUL.u Rd, Rs1, Rs2
  8537. * ~~~
  8538. *
  8539. * **Purpose**:\n
  8540. * Multiply the 32-bit signed integer elements of two registers and write the most significant
  8541. * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an
  8542. * additional rounding up operation on the multiplication results before taking the most significant
  8543. * 32-bit part of the results.
  8544. *
  8545. * **Description**:\n
  8546. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
  8547. * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
  8548. * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up
  8549. * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
  8550. * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction.
  8551. *
  8552. * **Operations**:\n
  8553. * ~~~
  8554. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  8555. * if (`.u` form) {
  8556. * Round[x][32:0] = Mres[x][63:31] + 1;
  8557. * Rd.W[x] = Round[x][32:1];
  8558. * } else {
  8559. * Rd.W[x] = Mres[x][63:32];
  8560. * }
  8561. * for RV32: x=0
  8562. * for RV64: x=1...0
  8563. * ~~~
  8564. *
  8565. * \param [in] a long type of value stored in a
  8566. * \param [in] b long type of value stored in b
  8567. * \return value stored in long type
  8568. */
  8569. __STATIC_FORCEINLINE long __RV_SMMUL(long a, long b)
  8570. {
  8571. long result;
  8572. __ASM volatile("smmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8573. return result;
  8574. }
  8575. /* ===== Inline Function End for 3.117.1. SMMUL ===== */
  8576. /* ===== Inline Function Start for 3.117.2. SMMUL.u ===== */
  8577. /**
  8578. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X32_MAC
  8579. * \brief SMMUL.u (SIMD MSW Signed Multiply Word with Rounding)
  8580. * \details
  8581. * **Type**: SIMD
  8582. *
  8583. * **Syntax**:\n
  8584. * ~~~
  8585. * SMMUL Rd, Rs1, Rs2
  8586. * SMMUL.u Rd, Rs1, Rs2
  8587. * ~~~
  8588. *
  8589. * **Purpose**:\n
  8590. * Multiply the 32-bit signed integer elements of two registers and write the most significant
  8591. * 32-bit results to the corresponding 32-bit elements of a register. The `.u` form performs an
  8592. * additional rounding up operation on the multiplication results before taking the most significant
  8593. * 32-bit part of the results.
  8594. *
  8595. * **Description**:\n
  8596. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
  8597. * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
  8598. * elements of Rs1 and Rs2 are treated as signed integers. The `.u` form of the instruction rounds up
  8599. * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
  8600. * * For `smmul/RV32` instruction, it is an alias to `mulh/RV32` instruction.
  8601. *
  8602. * **Operations**:\n
  8603. * ~~~
  8604. * Mres[x][63:0] = Rs1.W[x] * Rs2.W[x];
  8605. * if (`.u` form) {
  8606. * Round[x][32:0] = Mres[x][63:31] + 1;
  8607. * Rd.W[x] = Round[x][32:1];
  8608. * } else {
  8609. * Rd.W[x] = Mres[x][63:32];
  8610. * }
  8611. * for RV32: x=0
  8612. * for RV64: x=1...0
  8613. * ~~~
  8614. *
  8615. * \param [in] a long type of value stored in a
  8616. * \param [in] b long type of value stored in b
  8617. * \return value stored in long type
  8618. */
  8619. __STATIC_FORCEINLINE long __RV_SMMUL_U(long a, long b)
  8620. {
  8621. long result;
  8622. __ASM volatile("smmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8623. return result;
  8624. }
  8625. /* ===== Inline Function End for 3.117.2. SMMUL.u ===== */
  8626. /* ===== Inline Function Start for 3.118.1. SMMWB ===== */
  8627. /**
  8628. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  8629. * \brief SMMWB (SIMD MSW Signed Multiply Word and Bottom Half)
  8630. * \details
  8631. * **Type**: SIMD
  8632. *
  8633. * **Syntax**:\n
  8634. * ~~~
  8635. * SMMWB Rd, Rs1, Rs2
  8636. * SMMWB.u Rd, Rs1, Rs2
  8637. * ~~~
  8638. *
  8639. * **Purpose**:\n
  8640. * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
  8641. * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
  8642. * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
  8643. * significant discarded bit.
  8644. *
  8645. * **Description**:\n
  8646. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
  8647. * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
  8648. * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
  8649. * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
  8650. *
  8651. * **Operations**:\n
  8652. * ~~~
  8653. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
  8654. * if (`.u` form) {
  8655. * Round[x][32:0] = Mres[x][47:15] + 1;
  8656. * Rd.W[x] = Round[x][32:1];
  8657. * } else {
  8658. * Rd.W[x] = Mres[x][47:16];
  8659. * }
  8660. * for RV32: x=0
  8661. * for RV64: x=1...0
  8662. * ~~~
  8663. *
  8664. * \param [in] a long type of value stored in a
  8665. * \param [in] b unsigned long type of value stored in b
  8666. * \return value stored in long type
  8667. */
  8668. __STATIC_FORCEINLINE long __RV_SMMWB(long a, unsigned long b)
  8669. {
  8670. long result;
  8671. __ASM volatile("smmwb %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8672. return result;
  8673. }
  8674. /* ===== Inline Function End for 3.118.1. SMMWB ===== */
  8675. /* ===== Inline Function Start for 3.118.2. SMMWB.u ===== */
  8676. /**
  8677. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  8678. * \brief SMMWB.u (SIMD MSW Signed Multiply Word and Bottom Half with Rounding)
  8679. * \details
  8680. * **Type**: SIMD
  8681. *
  8682. * **Syntax**:\n
  8683. * ~~~
  8684. * SMMWB Rd, Rs1, Rs2
  8685. * SMMWB.u Rd, Rs1, Rs2
  8686. * ~~~
  8687. *
  8688. * **Purpose**:\n
  8689. * Multiply the signed 32-bit integer elements of one register and the bottom 16-bit of the
  8690. * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
  8691. * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
  8692. * significant discarded bit.
  8693. *
  8694. * **Description**:\n
  8695. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed bottom 16-bit content
  8696. * of the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
  8697. * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
  8698. * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
  8699. *
  8700. * **Operations**:\n
  8701. * ~~~
  8702. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[0];
  8703. * if (`.u` form) {
  8704. * Round[x][32:0] = Mres[x][47:15] + 1;
  8705. * Rd.W[x] = Round[x][32:1];
  8706. * } else {
  8707. * Rd.W[x] = Mres[x][47:16];
  8708. * }
  8709. * for RV32: x=0
  8710. * for RV64: x=1...0
  8711. * ~~~
  8712. *
  8713. * \param [in] a long type of value stored in a
  8714. * \param [in] b unsigned long type of value stored in b
  8715. * \return value stored in long type
  8716. */
  8717. __STATIC_FORCEINLINE long __RV_SMMWB_U(long a, unsigned long b)
  8718. {
  8719. long result;
  8720. __ASM volatile("smmwb.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8721. return result;
  8722. }
  8723. /* ===== Inline Function End for 3.118.2. SMMWB.u ===== */
  8724. /* ===== Inline Function Start for 3.119.1. SMMWT ===== */
  8725. /**
  8726. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  8727. * \brief SMMWT (SIMD MSW Signed Multiply Word and Top Half)
  8728. * \details
  8729. * **Type**: SIMD
  8730. *
  8731. * **Syntax**:\n
  8732. * ~~~
  8733. * SMMWT Rd, Rs1, Rs2
  8734. * SMMWT.u Rd, Rs1, Rs2
  8735. * ~~~
  8736. *
  8737. * **Purpose**:\n
  8738. * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
  8739. * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
  8740. * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
  8741. * significant discarded bit.
  8742. *
  8743. * **Description**:\n
  8744. * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of
  8745. * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
  8746. * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
  8747. * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
  8748. *
  8749. * **Operations**:\n
  8750. * ~~~
  8751. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
  8752. * if (`.u` form) {
  8753. * Round[x][32:0] = Mres[x][47:15] + 1;
  8754. * Rd.W[x] = Round[x][32:1];
  8755. * } else {
  8756. * Rd.W[x] = Mres[x][47:16];
  8757. * }
  8758. * for RV32: x=0
  8759. * for RV64: x=1...0
  8760. * ~~~
  8761. *
  8762. * \param [in] a long type of value stored in a
  8763. * \param [in] b unsigned long type of value stored in b
  8764. * \return value stored in long type
  8765. */
  8766. __STATIC_FORCEINLINE long __RV_SMMWT(long a, unsigned long b)
  8767. {
  8768. long result;
  8769. __ASM volatile("smmwt %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8770. return result;
  8771. }
  8772. /* ===== Inline Function End for 3.119.1. SMMWT ===== */
  8773. /* ===== Inline Function Start for 3.119.2. SMMWT.u ===== */
  8774. /**
  8775. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_MSW_32X16_MAC
  8776. * \brief SMMWT.u (SIMD MSW Signed Multiply Word and Top Half with Rounding)
  8777. * \details
  8778. * **Type**: SIMD
  8779. *
  8780. * **Syntax**:\n
  8781. * ~~~
  8782. * SMMWT Rd, Rs1, Rs2
  8783. * SMMWT.u Rd, Rs1, Rs2
  8784. * ~~~
  8785. *
  8786. * **Purpose**:\n
  8787. * Multiply the signed 32-bit integer elements of one register and the top 16-bit of the
  8788. * corresponding 32-bit elements of another register, and write the most significant 32-bit results to
  8789. * the corresponding 32-bit elements of a register. The `.u` form rounds up the results from the most
  8790. * significant discarded bit.
  8791. *
  8792. * **Description**:\n
  8793. * This instruction multiplies the signed 32-bit elements of Rs1 with the top signed 16-bit content of
  8794. * the corresponding 32-bit elements of Rs2 and writes the most significant 32-bit multiplication
  8795. * results to the corresponding 32-bit elements of Rd. The `.u` form of the instruction rounds up the
  8796. * most significant 32-bit of the 48-bit multiplication results by adding a 1 to bit 15 of the results.
  8797. *
  8798. * **Operations**:\n
  8799. * ~~~
  8800. * Mres[x][47:0] = Rs1.W[x] * Rs2.W[x].H[1];
  8801. * if (`.u` form) {
  8802. * Round[x][32:0] = Mres[x][47:15] + 1;
  8803. * Rd.W[x] = Round[x][32:1];
  8804. * } else {
  8805. * Rd.W[x] = Mres[x][47:16];
  8806. * }
  8807. * for RV32: x=0
  8808. * for RV64: x=1...0
  8809. * ~~~
  8810. *
  8811. * \param [in] a long type of value stored in a
  8812. * \param [in] b unsigned long type of value stored in b
  8813. * \return value stored in long type
  8814. */
  8815. __STATIC_FORCEINLINE long __RV_SMMWT_U(long a, unsigned long b)
  8816. {
  8817. long result;
  8818. __ASM volatile("smmwt.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  8819. return result;
  8820. }
  8821. /* ===== Inline Function End for 3.119.2. SMMWT.u ===== */
  8822. /* ===== Inline Function Start for 3.120.1. SMSLDA ===== */
  8823. /**
  8824. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  8825. * \brief SMSLDA (Signed Multiply Two Halfs & Add & Subtract 64-bit)
  8826. * \details
  8827. * **Type**: DSP (64-bit Profile)
  8828. *
  8829. * **Syntax**:\n
  8830. * ~~~
  8831. * SMSLDA Rd, Rs1, Rs2
  8832. * SMSLXDA Rd, Rs1, Rs2
  8833. * ~~~
  8834. *
  8835. * **Purpose**:\n
  8836. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  8837. * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a
  8838. * register (RV64). The subtraction result is written back to the register-pair.
  8839. * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements)
  8840. * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements)
  8841. *
  8842. * **RV32 Description**:\n
  8843. * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  8844. * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
  8845. * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
  8846. * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
  8847. * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers
  8848. * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit
  8849. * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  8850. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  8851. * includes register 2d and 2d+1.
  8852. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  8853. * of the pair contains the low 32-bit of the result.
  8854. *
  8855. * **RV64 Description**:\n
  8856. * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8857. * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
  8858. * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  8859. * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8860. * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of
  8861. * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  8862. * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction
  8863. * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated
  8864. * as signed integers.
  8865. *
  8866. * **Operations**:\n
  8867. * ~~~
  8868. * * RV32:
  8869. * // SMSLDA
  8870. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
  8871. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
  8872. * // SMSLXDA
  8873. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
  8874. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
  8875. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  8876. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]);
  8877. * * RV64:
  8878. * // SMSLDA
  8879. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  8880. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  8881. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  8882. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  8883. * // SMSLXDA
  8884. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  8885. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
  8886. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  8887. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
  8888. * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) -
  8889. * SE64(Mres1[1][31:0]);
  8890. * ~~~
  8891. *
  8892. * \param [in] t long long type of value stored in t
  8893. * \param [in] a unsigned long type of value stored in a
  8894. * \param [in] b unsigned long type of value stored in b
  8895. * \return value stored in long long type
  8896. */
  8897. __STATIC_FORCEINLINE long long __RV_SMSLDA(long long t, unsigned long a, unsigned long b)
  8898. {
  8899. __ASM volatile("smslda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  8900. return t;
  8901. }
  8902. /* ===== Inline Function End for 3.120.1. SMSLDA ===== */
  8903. /* ===== Inline Function Start for 3.120.2. SMSLXDA ===== */
  8904. /**
  8905. * \ingroup NMSIS_Core_DSP_Intrinsic_SIGNED_16B_MULT_64B_ADDSUB
  8906. * \brief SMSLXDA (Signed Crossed Multiply Two Halfs & Add & Subtract 64- bit)
  8907. * \details
  8908. * **Type**: DSP (64-bit Profile)
  8909. *
  8910. * **Syntax**:\n
  8911. * ~~~
  8912. * SMSLDA Rd, Rs1, Rs2
  8913. * SMSLXDA Rd, Rs1, Rs2
  8914. * ~~~
  8915. *
  8916. * **Purpose**:\n
  8917. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then
  8918. * subtracts the two 32-bit results from the 64-bit value of an even/odd pair of registers (RV32) or a
  8919. * register (RV64). The subtraction result is written back to the register-pair.
  8920. * * SMSLDA: rd pair - top*top - bottom*bottom (all 32-bit elements)
  8921. * * SMSLXDA: rd pair - top*bottom - bottom*top (all 32-bit elements)
  8922. *
  8923. * **RV32 Description**:\n
  8924. * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  8925. * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
  8926. * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of Rs1 with the bottom 16-bit
  8927. * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
  8928. * The two multiplication results are subtracted from the 64-bit value of an even/odd pair of registers
  8929. * specified by Rd(4,1). The 64-bit subtraction result is written back to the register-pair. The 16-bit
  8930. * values of Rs1 and Rs2, and the 64-bit value of the register-pair are treated as signed integers.
  8931. * Rd(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register pair
  8932. * includes register 2d and 2d+1.
  8933. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  8934. * of the pair contains the low 32-bit of the result.
  8935. *
  8936. * **RV64 Description**:\n
  8937. * For the `SMSLDA` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  8938. * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
  8939. * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  8940. * For the `SMSLXDA` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with
  8941. * the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the bottom 16-bit content of
  8942. * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  8943. * The four multiplication results are subtracted from the 64-bit value of Rd. The 64-bit subtraction
  8944. * result is written back to Rd. The 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated
  8945. * as signed integers.
  8946. *
  8947. * **Operations**:\n
  8948. * ~~~
  8949. * * RV32:
  8950. * // SMSLDA
  8951. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[0]);
  8952. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[1]);
  8953. * // SMSLXDA
  8954. * Mres0[31:0] = (Rs1.H[0] * Rs2.H[1]);
  8955. * Mres1[31:0] = (Rs1.H[1] * Rs2.H[0]);
  8956. * Idx0 = CONCAT(Rd(4,1),1'b0); Idx1 = CONCAT(Rd(4,1),1'b1);
  8957. * R[Idx1].R[Idx0] = R[Idx1].R[Idx0] - SE64(Mres0[31:0]) - SE64(Mres1[31:0]);
  8958. * * RV64:
  8959. * // SMSLDA
  8960. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  8961. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  8962. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  8963. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  8964. * // SMSLXDA
  8965. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  8966. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
  8967. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  8968. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
  8969. * Rd = Rd - SE64(Mres0[0][31:0]) - SE64(Mres1[0][31:0]) - SE64(Mres0[1][31:0]) -
  8970. * SE64(Mres1[1][31:0]);
  8971. * ~~~
  8972. *
  8973. * \param [in] t long long type of value stored in t
  8974. * \param [in] a unsigned long type of value stored in a
  8975. * \param [in] b unsigned long type of value stored in b
  8976. * \return value stored in long long type
  8977. */
  8978. __STATIC_FORCEINLINE long long __RV_SMSLXDA(long long t, unsigned long a, unsigned long b)
  8979. {
  8980. __ASM volatile("smslxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  8981. return t;
  8982. }
  8983. /* ===== Inline Function End for 3.120.2. SMSLXDA ===== */
  8984. /* ===== Inline Function Start for 3.121. SMSR64 ===== */
  8985. /**
  8986. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  8987. * \brief SMSR64 (Signed Multiply and Subtract from 64- Bit Data)
  8988. * \details
  8989. * **Type**: DSP (64-bit Profile)
  8990. *
  8991. * **Syntax**:\n
  8992. * ~~~
  8993. * SMSR64 Rd, Rs1, Rs2
  8994. * ~~~
  8995. *
  8996. * **Purpose**:\n
  8997. * Multiply the 32-bit signed elements in two registers and subtract the 64-bit multiplication
  8998. * results from the 64-bit signed data of a pair of registers (RV32) or a register (RV64). The result is
  8999. * written back to the pair of registers (RV32) or a register (RV64).
  9000. *
  9001. * **RV32 Description**:\n
  9002. * This instruction multiplies the 32-bit signed data of Rs1 with that of Rs2. It
  9003. * subtracts the 64-bit multiplication result from the 64-bit signed data of an even/odd pair of registers
  9004. * specified by Rd(4,1). The subtraction result is written back to the even/odd pair of registers
  9005. * specified by Rd(4,1).
  9006. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  9007. * includes register 2d and 2d+1.
  9008. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  9009. * of the pair contains the low 32-bit of the result.
  9010. *
  9011. * **RV64 Description**:\n
  9012. * This instruction multiplies the 32-bit signed elements of Rs1 with that of Rs2. It
  9013. * subtracts the 64-bit multiplication results from the 64-bit signed data of Rd. The subtraction result is
  9014. * written back to Rd.
  9015. *
  9016. * **Operations**:\n
  9017. * ~~~
  9018. * * RV32:
  9019. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  9020. * R[t_H].R[t_L] = R[t_H].R[t_L] - (Rs1 * Rs2);
  9021. * * RV64:
  9022. * Rd = Rd - (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]);
  9023. * ~~~
  9024. *
  9025. * \param [in] t long long type of value stored in t
  9026. * \param [in] a long type of value stored in a
  9027. * \param [in] b long type of value stored in b
  9028. * \return value stored in long long type
  9029. */
  9030. __STATIC_FORCEINLINE long long __RV_SMSR64(long long t, long a, long b)
  9031. {
  9032. __ASM volatile("smsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  9033. return t;
  9034. }
  9035. /* ===== Inline Function End for 3.121. SMSR64 ===== */
  9036. /* ===== Inline Function Start for 3.122.1. SMUL8 ===== */
  9037. /**
  9038. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
  9039. * \brief SMUL8 (SIMD Signed 8-bit Multiply)
  9040. * \details
  9041. * **Type**: SIMD
  9042. *
  9043. * **Syntax**:\n
  9044. * ~~~
  9045. * SMUL8 Rd, Rs1, Rs2
  9046. * SMULX8 Rd, Rs1, Rs2
  9047. * ~~~
  9048. *
  9049. * **Purpose**:\n
  9050. * Do signed 8-bit multiplications and generate four 16-bit results simultaneously.
  9051. *
  9052. * **RV32 Description**:\n
  9053. * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
  9054. * corresponding 8-bit data elements of Rs2.
  9055. * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
  9056. * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
  9057. * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
  9058. * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
  9059. * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  9060. * includes register 2d and 2d+1.
  9061. * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
  9062. * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
  9063. * part of Rs1.
  9064. *
  9065. * **RV64 Description**:\n
  9066. * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
  9067. * corresponding 8-bit data elements of Rs2.
  9068. * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
  9069. * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
  9070. * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
  9071. * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
  9072. * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
  9073. * the bottom part of Rs1.
  9074. *
  9075. * **Operations**:\n
  9076. * ~~~
  9077. * * RV32:
  9078. * if (is `SMUL8`) {
  9079. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  9080. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  9081. * } else if (is `SMULX8`) {
  9082. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  9083. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  9084. * }
  9085. * rest[x/2] = op1t[x/2] s* op2t[x/2];
  9086. * resb[x/2] = op1b[x/2] s* op2b[x/2];
  9087. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  9088. * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
  9089. * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
  9090. * x = 0 and 2
  9091. * * RV64:
  9092. * if (is `SMUL8`) {
  9093. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  9094. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  9095. * } else if (is `SMULX8`) {
  9096. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  9097. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  9098. * }
  9099. * rest[x/2] = op1t[x/2] s* op2t[x/2];
  9100. * resb[x/2] = op1b[x/2] s* op2b[x/2];
  9101. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  9102. * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
  9103. * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0];
  9104. * x = 0 and 2
  9105. * ~~~
  9106. *
  9107. * \param [in] a unsigned int type of value stored in a
  9108. * \param [in] b unsigned int type of value stored in b
  9109. * \return value stored in unsigned long long type
  9110. */
  9111. __STATIC_FORCEINLINE unsigned long long __RV_SMUL8(unsigned int a, unsigned int b)
  9112. {
  9113. unsigned long long result;
  9114. __ASM volatile("smul8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9115. return result;
  9116. }
  9117. /* ===== Inline Function End for 3.122.1. SMUL8 ===== */
  9118. /* ===== Inline Function Start for 3.122.2. SMULX8 ===== */
  9119. /**
  9120. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
  9121. * \brief SMULX8 (SIMD Signed Crossed 8-bit Multiply)
  9122. * \details
  9123. * **Type**: SIMD
  9124. *
  9125. * **Syntax**:\n
  9126. * ~~~
  9127. * SMUL8 Rd, Rs1, Rs2
  9128. * SMULX8 Rd, Rs1, Rs2
  9129. * ~~~
  9130. *
  9131. * **Purpose**:\n
  9132. * Do signed 8-bit multiplications and generate four 16-bit results simultaneously.
  9133. *
  9134. * **RV32 Description**:\n
  9135. * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
  9136. * corresponding 8-bit data elements of Rs2.
  9137. * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
  9138. * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
  9139. * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
  9140. * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
  9141. * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  9142. * includes register 2d and 2d+1.
  9143. * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
  9144. * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
  9145. * part of Rs1.
  9146. *
  9147. * **RV64 Description**:\n
  9148. * For the `SMUL8` instruction, multiply the 8-bit data elements of Rs1 with the
  9149. * corresponding 8-bit data elements of Rs2.
  9150. * For the `SMULX8` instruction, multiply the first and second 8-bit data elements of Rs1 with the
  9151. * second and first 8-bit data elements of Rs2. At the same time, multiply the third and fourth 8-bit data
  9152. * elements of Rs1 with the fourth and third 8-bit data elements of Rs2.
  9153. * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
  9154. * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
  9155. * the bottom part of Rs1.
  9156. *
  9157. * **Operations**:\n
  9158. * ~~~
  9159. * * RV32:
  9160. * if (is `SMUL8`) {
  9161. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  9162. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  9163. * } else if (is `SMULX8`) {
  9164. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  9165. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  9166. * }
  9167. * rest[x/2] = op1t[x/2] s* op2t[x/2];
  9168. * resb[x/2] = op1b[x/2] s* op2b[x/2];
  9169. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  9170. * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
  9171. * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
  9172. * x = 0 and 2
  9173. * * RV64:
  9174. * if (is `SMUL8`) {
  9175. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  9176. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  9177. * } else if (is `SMULX8`) {
  9178. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  9179. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  9180. * }
  9181. * rest[x/2] = op1t[x/2] s* op2t[x/2];
  9182. * resb[x/2] = op1b[x/2] s* op2b[x/2];
  9183. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  9184. * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
  9185. * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0];
  9186. * x = 0 and 2
  9187. * ~~~
  9188. *
  9189. * \param [in] a unsigned int type of value stored in a
  9190. * \param [in] b unsigned int type of value stored in b
  9191. * \return value stored in unsigned long long type
  9192. */
  9193. __STATIC_FORCEINLINE unsigned long long __RV_SMULX8(unsigned int a, unsigned int b)
  9194. {
  9195. unsigned long long result;
  9196. __ASM volatile("smulx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9197. return result;
  9198. }
  9199. /* ===== Inline Function End for 3.122.2. SMULX8 ===== */
  9200. /* ===== Inline Function Start for 3.123.1. SMUL16 ===== */
  9201. /**
  9202. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
  9203. * \brief SMUL16 (SIMD Signed 16-bit Multiply)
  9204. * \details
  9205. * **Type**: SIMD
  9206. *
  9207. * **Syntax**:\n
  9208. * ~~~
  9209. * SMUL16 Rd, Rs1, Rs2
  9210. * SMULX16 Rd, Rs1, Rs2
  9211. * ~~~
  9212. *
  9213. * **Purpose**:\n
  9214. * Do signed 16-bit multiplications and generate two 32-bit results simultaneously.
  9215. *
  9216. * **RV32 Description**:\n
  9217. * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with
  9218. * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1
  9219. * with the bottom 16-bit Q15 content of Rs2.
  9220. * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit
  9221. * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16-
  9222. * bit Q15 content of Rs2.
  9223. * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
  9224. * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
  9225. * register 2d and 2d+1.
  9226. * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
  9227. * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
  9228. *
  9229. * **RV64 Description**:\n
  9230. * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower
  9231. * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time,
  9232. * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15
  9233. * content of the lower 32-bit word in Rs2.
  9234. * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1
  9235. * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the
  9236. * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the
  9237. * lower 32-bit word in Rs2.
  9238. * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the
  9239. * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
  9240. * the lower 32-bit word in Rs1 is written to Rd.W[0]
  9241. *
  9242. * **Operations**:\n
  9243. * ~~~
  9244. * * RV32:
  9245. * if (is `SMUL16`) {
  9246. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  9247. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  9248. * } else if (is `SMULX16`) {
  9249. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  9250. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  9251. * }
  9252. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  9253. * res = aop s* bop;
  9254. * }
  9255. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  9256. * R[t_H] = rest;
  9257. * R[t_L] = resb;
  9258. * * RV64:
  9259. * if (is `SMUL16`) {
  9260. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  9261. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  9262. * } else if (is `SMULX16`) {
  9263. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  9264. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  9265. * }
  9266. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  9267. * res = aop s* bop;
  9268. * }
  9269. * Rd.W[1] = rest;
  9270. * Rd.W[0] = resb;
  9271. * ~~~
  9272. *
  9273. * \param [in] a unsigned int type of value stored in a
  9274. * \param [in] b unsigned int type of value stored in b
  9275. * \return value stored in unsigned long long type
  9276. */
  9277. __STATIC_FORCEINLINE unsigned long long __RV_SMUL16(unsigned int a, unsigned int b)
  9278. {
  9279. unsigned long long result;
  9280. __ASM volatile("smul16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9281. return result;
  9282. }
  9283. /* ===== Inline Function End for 3.123.1. SMUL16 ===== */
  9284. /* ===== Inline Function Start for 3.123.2. SMULX16 ===== */
  9285. /**
  9286. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
  9287. * \brief SMULX16 (SIMD Signed Crossed 16-bit Multiply)
  9288. * \details
  9289. * **Type**: SIMD
  9290. *
  9291. * **Syntax**:\n
  9292. * ~~~
  9293. * SMUL16 Rd, Rs1, Rs2
  9294. * SMULX16 Rd, Rs1, Rs2
  9295. * ~~~
  9296. *
  9297. * **Purpose**:\n
  9298. * Do signed 16-bit multiplications and generate two 32-bit results simultaneously.
  9299. *
  9300. * **RV32 Description**:\n
  9301. * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of Rs1 with
  9302. * the top 16-bit Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1
  9303. * with the bottom 16-bit Q15 content of Rs2.
  9304. * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of Rs1 with the bottom 16-bit
  9305. * Q15 content of Rs2. At the same time, multiply the bottom 16-bit Q15 content of Rs1 with the top 16-
  9306. * bit Q15 content of Rs2.
  9307. * The two Q30 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
  9308. * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
  9309. * register 2d and 2d+1.
  9310. * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
  9311. * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
  9312. *
  9313. * **RV64 Description**:\n
  9314. * For the `SMUL16` instruction, multiply the top 16-bit Q15 content of the lower
  9315. * 32-bit word in Rs1 with the top 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time,
  9316. * multiply the bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the bottom 16-bit Q15
  9317. * content of the lower 32-bit word in Rs2.
  9318. * For the `SMULX16` instruction, multiply the top 16-bit Q15 content of the lower 32-bit word in Rs1
  9319. * with the bottom 16-bit Q15 content of the lower 32-bit word in Rs2. At the same time, multiply the
  9320. * bottom 16-bit Q15 content of the lower 32-bit word in Rs1 with the top 16-bit Q15 content of the
  9321. * lower 32-bit word in Rs2.
  9322. * The two 32-bit Q30 results are then written into Rd. The result calculated from the top 16-bit of the
  9323. * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
  9324. * the lower 32-bit word in Rs1 is written to Rd.W[0]
  9325. *
  9326. * **Operations**:\n
  9327. * ~~~
  9328. * * RV32:
  9329. * if (is `SMUL16`) {
  9330. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  9331. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  9332. * } else if (is `SMULX16`) {
  9333. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  9334. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  9335. * }
  9336. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  9337. * res = aop s* bop;
  9338. * }
  9339. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  9340. * R[t_H] = rest;
  9341. * R[t_L] = resb;
  9342. * * RV64:
  9343. * if (is `SMUL16`) {
  9344. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  9345. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  9346. * } else if (is `SMULX16`) {
  9347. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  9348. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  9349. * }
  9350. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  9351. * res = aop s* bop;
  9352. * }
  9353. * Rd.W[1] = rest;
  9354. * Rd.W[0] = resb;
  9355. * ~~~
  9356. *
  9357. * \param [in] a unsigned int type of value stored in a
  9358. * \param [in] b unsigned int type of value stored in b
  9359. * \return value stored in unsigned long long type
  9360. */
  9361. __STATIC_FORCEINLINE unsigned long long __RV_SMULX16(unsigned int a, unsigned int b)
  9362. {
  9363. unsigned long long result;
  9364. __ASM volatile("smulx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9365. return result;
  9366. }
  9367. /* ===== Inline Function End for 3.123.2. SMULX16 ===== */
  9368. /* ===== Inline Function Start for 3.124. SRA.u ===== */
  9369. /**
  9370. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  9371. * \brief SRA.u (Rounding Shift Right Arithmetic)
  9372. * \details
  9373. * **Type**: DSP
  9374. *
  9375. * **Syntax**:\n
  9376. * ~~~
  9377. * SRA.u Rd, Rs1, Rs2
  9378. * ~~~
  9379. *
  9380. * **Purpose**:\n
  9381. * Perform an arithmetic right shift operation with rounding. The shift amount is a variable
  9382. * from a GPR.
  9383. *
  9384. * **Description**:\n
  9385. * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are
  9386. * filled with the sign-bit and the shift amount is specified by the low-order 5-bits (RV32) or 6-bits
  9387. * (RV64) of the Rs2 register. For the rounding operation, a value of 1 is added to the most significant
  9388. * discarded bit of the data to calculate the final result. And the result is written to Rd.
  9389. *
  9390. * **Operations**:\n
  9391. * ~~~
  9392. * * RV32:
  9393. * sa = Rs2[4:0];
  9394. * if (sa > 0) {
  9395. * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
  9396. * Rd = res[31:0];
  9397. * } else {
  9398. * Rd = Rs1;
  9399. * }
  9400. * * RV64:
  9401. * sa = Rs2[5:0];
  9402. * if (sa > 0) {
  9403. * res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1;
  9404. * Rd = res[63:0];
  9405. * } else {
  9406. * Rd = Rs1;
  9407. * }
  9408. * ~~~
  9409. *
  9410. * \param [in] a long type of value stored in a
  9411. * \param [in] b unsigned int type of value stored in b
  9412. * \return value stored in long type
  9413. */
  9414. __STATIC_FORCEINLINE long __RV_SRA_U(long a, unsigned int b)
  9415. {
  9416. long result;
  9417. __ASM volatile("sra.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9418. return result;
  9419. }
  9420. /* ===== Inline Function End for 3.124. SRA.u ===== */
  9421. /* ===== Inline Function Start for 3.125. SRAI.u ===== */
  9422. /**
  9423. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  9424. * \brief SRAI.u (Rounding Shift Right Arithmetic Immediate)
  9425. * \details
  9426. * **Type**: DSP
  9427. *
  9428. * **Syntax**:\n
  9429. * ~~~
  9430. * SRAI.u Rd, Rs1, imm6u[4:0] (RV32)
  9431. * SRAI.u Rd, Rs1, imm6u[5:0] (RV64)
  9432. * ~~~
  9433. *
  9434. * **Purpose**:\n
  9435. * Perform an arithmetic right shift operation with rounding. The shift amount is an
  9436. * immediate value.
  9437. *
  9438. * **Description**:\n
  9439. * This instruction right-shifts the content of Rs1 arithmetically. The shifted out bits are
  9440. * filled with the sign-bit and the shift amount is specified by the imm6u[4:0] (RV32) or imm6u[5:0]
  9441. * (RV64) constant . For the rounding operation, a value of 1 is added to the most significant discarded
  9442. * bit of the data to calculate the final result. And the result is written to Rd.
  9443. *
  9444. * **Operations**:\n
  9445. * ~~~
  9446. * * RV32:
  9447. * sa = imm6u[4:0];
  9448. * if (sa > 0) {
  9449. * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
  9450. * Rd = res[31:0];
  9451. * } else {
  9452. * Rd = Rs1;
  9453. * }
  9454. * * RV64:
  9455. * sa = imm6u[5:0];
  9456. * if (sa > 0) {
  9457. * res[63:-1] = SE65(Rs1[63:(sa-1)]) + 1;
  9458. * Rd = res[63:0];
  9459. * } else {
  9460. * Rd = Rs1;
  9461. * }
  9462. * ~~~
  9463. *
  9464. * \param [in] a long type of value stored in a
  9465. * \param [in] b unsigned int type of value stored in b
  9466. * \return value stored in long type
  9467. */
  9468. #define __RV_SRAI_U(a, b) \
  9469. ({ \
  9470. long result; \
  9471. long __a = (long)(a); \
  9472. __ASM volatile("srai.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  9473. result; \
  9474. })
  9475. /* ===== Inline Function End for 3.125. SRAI.u ===== */
  9476. /* ===== Inline Function Start for 3.126.1. SRA8 ===== */
  9477. /**
  9478. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  9479. * \brief SRA8 (SIMD 8-bit Shift Right Arithmetic)
  9480. * \details
  9481. * **Type**: SIMD
  9482. *
  9483. * **Syntax**:\n
  9484. * ~~~
  9485. * SRA8 Rd, Rs1, Rs2
  9486. * SRA8.u Rd, Rs1, Rs2
  9487. * ~~~
  9488. *
  9489. * **Purpose**:\n
  9490. * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a
  9491. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  9492. * results.
  9493. *
  9494. * **Description**:\n
  9495. * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9496. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
  9497. * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
  9498. * added to the most significant discarded bit of each 8-bit data element to calculate the final results.
  9499. * And the results are written to Rd.
  9500. *
  9501. * **Operations**:\n
  9502. * ~~~
  9503. * sa = Rs2[2:0];
  9504. * if (sa > 0) {
  9505. * if (`.u` form) { // SRA8.u
  9506. * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
  9507. * Rd.B[x] = res[7:0];
  9508. * } else { // SRA8
  9509. * Rd.B[x] = SE8(Rd.B[x][7:sa])
  9510. * }
  9511. * } else {
  9512. * Rd = Rs1;
  9513. * }
  9514. * for RV32: x=3...0,
  9515. * for RV64: x=7...0
  9516. * ~~~
  9517. *
  9518. * \param [in] a unsigned long type of value stored in a
  9519. * \param [in] b unsigned int type of value stored in b
  9520. * \return value stored in unsigned long type
  9521. */
  9522. __STATIC_FORCEINLINE unsigned long __RV_SRA8(unsigned long a, unsigned int b)
  9523. {
  9524. unsigned long result;
  9525. __ASM volatile("sra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9526. return result;
  9527. }
  9528. /* ===== Inline Function End for 3.126.1. SRA8 ===== */
  9529. /* ===== Inline Function Start for 3.126.2. SRA8.u ===== */
  9530. /**
  9531. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  9532. * \brief SRA8.u (SIMD 8-bit Rounding Shift Right Arithmetic)
  9533. * \details
  9534. * **Type**: SIMD
  9535. *
  9536. * **Syntax**:\n
  9537. * ~~~
  9538. * SRA8 Rd, Rs1, Rs2
  9539. * SRA8.u Rd, Rs1, Rs2
  9540. * ~~~
  9541. *
  9542. * **Purpose**:\n
  9543. * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is a
  9544. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  9545. * results.
  9546. *
  9547. * **Description**:\n
  9548. * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9549. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
  9550. * 3-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
  9551. * added to the most significant discarded bit of each 8-bit data element to calculate the final results.
  9552. * And the results are written to Rd.
  9553. *
  9554. * **Operations**:\n
  9555. * ~~~
  9556. * sa = Rs2[2:0];
  9557. * if (sa > 0) {
  9558. * if (`.u` form) { // SRA8.u
  9559. * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
  9560. * Rd.B[x] = res[7:0];
  9561. * } else { // SRA8
  9562. * Rd.B[x] = SE8(Rd.B[x][7:sa])
  9563. * }
  9564. * } else {
  9565. * Rd = Rs1;
  9566. * }
  9567. * for RV32: x=3...0,
  9568. * for RV64: x=7...0
  9569. * ~~~
  9570. *
  9571. * \param [in] a unsigned long type of value stored in a
  9572. * \param [in] b unsigned int type of value stored in b
  9573. * \return value stored in unsigned long type
  9574. */
  9575. __STATIC_FORCEINLINE unsigned long __RV_SRA8_U(unsigned long a, unsigned int b)
  9576. {
  9577. unsigned long result;
  9578. __ASM volatile("sra8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9579. return result;
  9580. }
  9581. /* ===== Inline Function End for 3.126.2. SRA8.u ===== */
  9582. /* ===== Inline Function Start for 3.127.1. SRAI8 ===== */
  9583. /**
  9584. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  9585. * \brief SRAI8 (SIMD 8-bit Shift Right Arithmetic Immediate)
  9586. * \details
  9587. * **Type**: SIMD
  9588. *
  9589. * **Syntax**:\n
  9590. * ~~~
  9591. * SRAI8 Rd, Rs1, imm3u
  9592. * SRAI8.u Rd, Rs1, imm3u
  9593. * ~~~
  9594. *
  9595. * **Purpose**:\n
  9596. * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an
  9597. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  9598. *
  9599. * **Description**:\n
  9600. * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9601. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u
  9602. * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
  9603. * discarded bit of each 8-bit data element to calculate the final results. And the results are written to
  9604. * Rd.
  9605. *
  9606. * **Operations**:\n
  9607. * ~~~
  9608. * sa = imm3u[2:0];
  9609. * if (sa > 0) {
  9610. * if (`.u` form) { // SRA8.u
  9611. * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
  9612. * Rd.B[x] = res[7:0];
  9613. * } else { // SRA8
  9614. * Rd.B[x] = SE8(Rd.B[x][7:sa])
  9615. * }
  9616. * } else {
  9617. * Rd = Rs1;
  9618. * }
  9619. * for RV32: x=3...0,
  9620. * for RV64: x=7...0
  9621. * ~~~
  9622. *
  9623. * \param [in] a unsigned long type of value stored in a
  9624. * \param [in] b unsigned int type of value stored in b
  9625. * \return value stored in unsigned long type
  9626. */
  9627. #define __RV_SRAI8(a, b) \
  9628. ({ \
  9629. unsigned long result; \
  9630. unsigned long __a = (unsigned long)(a); \
  9631. __ASM volatile("srai8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  9632. result; \
  9633. })
  9634. /* ===== Inline Function End for 3.127.1. SRAI8 ===== */
  9635. /* ===== Inline Function Start for 3.127.2. SRAI8.u ===== */
  9636. /**
  9637. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  9638. * \brief SRAI8.u (SIMD 8-bit Rounding Shift Right Arithmetic Immediate)
  9639. * \details
  9640. * **Type**: SIMD
  9641. *
  9642. * **Syntax**:\n
  9643. * ~~~
  9644. * SRAI8 Rd, Rs1, imm3u
  9645. * SRAI8.u Rd, Rs1, imm3u
  9646. * ~~~
  9647. *
  9648. * **Purpose**:\n
  9649. * Do 8-bit element arithmetic right shift operations simultaneously. The shift amount is an
  9650. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  9651. *
  9652. * **Description**:\n
  9653. * The 8-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9654. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the imm3u
  9655. * constant. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
  9656. * discarded bit of each 8-bit data element to calculate the final results. And the results are written to
  9657. * Rd.
  9658. *
  9659. * **Operations**:\n
  9660. * ~~~
  9661. * sa = imm3u[2:0];
  9662. * if (sa > 0) {
  9663. * if (`.u` form) { // SRA8.u
  9664. * res[7:-1] = SE9(Rs1.B[x][7:sa-1]) + 1;
  9665. * Rd.B[x] = res[7:0];
  9666. * } else { // SRA8
  9667. * Rd.B[x] = SE8(Rd.B[x][7:sa])
  9668. * }
  9669. * } else {
  9670. * Rd = Rs1;
  9671. * }
  9672. * for RV32: x=3...0,
  9673. * for RV64: x=7...0
  9674. * ~~~
  9675. *
  9676. * \param [in] a unsigned long type of value stored in a
  9677. * \param [in] b unsigned int type of value stored in b
  9678. * \return value stored in unsigned long type
  9679. */
  9680. #define __RV_SRAI8_U(a, b) \
  9681. ({ \
  9682. unsigned long result; \
  9683. unsigned long __a = (unsigned long)(a); \
  9684. __ASM volatile("srai8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  9685. result; \
  9686. })
  9687. /* ===== Inline Function End for 3.127.2. SRAI8.u ===== */
  9688. /* ===== Inline Function Start for 3.128.1. SRA16 ===== */
  9689. /**
  9690. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  9691. * \brief SRA16 (SIMD 16-bit Shift Right Arithmetic)
  9692. * \details
  9693. * **Type**: SIMD
  9694. *
  9695. * **Syntax**:\n
  9696. * ~~~
  9697. * SRA16 Rd, Rs1, Rs2
  9698. * SRA16.u Rd, Rs1, Rs2
  9699. * ~~~
  9700. *
  9701. * **Purpose**:\n
  9702. * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a
  9703. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  9704. * results.
  9705. *
  9706. * **Description**:\n
  9707. * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9708. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
  9709. * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
  9710. * added to the most significant discarded bit of each 16-bit data element to calculate the final results.
  9711. * And the results are written to Rd.
  9712. *
  9713. * **Operations**:\n
  9714. * ~~~
  9715. * sa = Rs2[3:0];
  9716. * if (sa != 0) {
  9717. * if (`.u` form) { // SRA16.u
  9718. * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
  9719. * Rd.H[x] = res[15:0];
  9720. * } else { // SRA16
  9721. * Rd.H[x] = SE16(Rs1.H[x][15:sa])
  9722. * }
  9723. * } else {
  9724. * Rd = Rs1;
  9725. * }
  9726. * for RV32: x=1...0,
  9727. * for RV64: x=3...0
  9728. * ~~~
  9729. *
  9730. * \param [in] a unsigned long type of value stored in a
  9731. * \param [in] b unsigned long type of value stored in b
  9732. * \return value stored in unsigned long type
  9733. */
  9734. __STATIC_FORCEINLINE unsigned long __RV_SRA16(unsigned long a, unsigned long b)
  9735. {
  9736. unsigned long result;
  9737. __ASM volatile("sra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9738. return result;
  9739. }
  9740. /* ===== Inline Function End for 3.128.1. SRA16 ===== */
  9741. /* ===== Inline Function Start for 3.128.2. SRA16.u ===== */
  9742. /**
  9743. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  9744. * \brief SRA16.u (SIMD 16-bit Rounding Shift Right Arithmetic)
  9745. * \details
  9746. * **Type**: SIMD
  9747. *
  9748. * **Syntax**:\n
  9749. * ~~~
  9750. * SRA16 Rd, Rs1, Rs2
  9751. * SRA16.u Rd, Rs1, Rs2
  9752. * ~~~
  9753. *
  9754. * **Purpose**:\n
  9755. * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a
  9756. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  9757. * results.
  9758. *
  9759. * **Description**:\n
  9760. * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9761. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
  9762. * 4-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
  9763. * added to the most significant discarded bit of each 16-bit data element to calculate the final results.
  9764. * And the results are written to Rd.
  9765. *
  9766. * **Operations**:\n
  9767. * ~~~
  9768. * sa = Rs2[3:0];
  9769. * if (sa != 0) {
  9770. * if (`.u` form) { // SRA16.u
  9771. * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
  9772. * Rd.H[x] = res[15:0];
  9773. * } else { // SRA16
  9774. * Rd.H[x] = SE16(Rs1.H[x][15:sa])
  9775. * }
  9776. * } else {
  9777. * Rd = Rs1;
  9778. * }
  9779. * for RV32: x=1...0,
  9780. * for RV64: x=3...0
  9781. * ~~~
  9782. *
  9783. * \param [in] a unsigned long type of value stored in a
  9784. * \param [in] b unsigned long type of value stored in b
  9785. * \return value stored in unsigned long type
  9786. */
  9787. __STATIC_FORCEINLINE unsigned long __RV_SRA16_U(unsigned long a, unsigned long b)
  9788. {
  9789. unsigned long result;
  9790. __ASM volatile("sra16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9791. return result;
  9792. }
  9793. /* ===== Inline Function End for 3.128.2. SRA16.u ===== */
  9794. /* ===== Inline Function Start for 3.129.1. SRAI16 ===== */
  9795. /**
  9796. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  9797. * \brief SRAI16 (SIMD 16-bit Shift Right Arithmetic Immediate)
  9798. * \details
  9799. * **Type**: SIMD
  9800. *
  9801. * **Syntax**:\n
  9802. * ~~~
  9803. * SRAI16 Rd, Rs1, imm4u
  9804. * SRAI16.u Rd, Rs1, imm4u
  9805. * ~~~
  9806. *
  9807. * **Purpose**:\n
  9808. * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is
  9809. * an immediate value. The `.u` form performs additional rounding up operations on the shifted
  9810. * results.
  9811. *
  9812. * **Description**:\n
  9813. * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9814. * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the
  9815. * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
  9816. * significant discarded bit of each 16-bit data to calculate the final results. And the results are written
  9817. * to Rd.
  9818. *
  9819. * **Operations**:\n
  9820. * ~~~
  9821. * sa = imm4u[3:0];
  9822. * if (sa > 0) {
  9823. * if (`.u` form) { // SRAI16.u
  9824. * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
  9825. * Rd.H[x] = res[15:0];
  9826. * } else { // SRAI16
  9827. * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
  9828. * }
  9829. * } else {
  9830. * Rd = Rs1;
  9831. * }
  9832. * for RV32: x=1...0,
  9833. * for RV64: x=3...0
  9834. * ~~~
  9835. *
  9836. * \param [in] a unsigned long type of value stored in a
  9837. * \param [in] b unsigned long type of value stored in b
  9838. * \return value stored in unsigned long type
  9839. */
  9840. #define __RV_SRAI16(a, b) \
  9841. ({ \
  9842. unsigned long result; \
  9843. unsigned long __a = (unsigned long)(a); \
  9844. __ASM volatile("srai16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  9845. result; \
  9846. })
  9847. /* ===== Inline Function End for 3.129.1. SRAI16 ===== */
  9848. /* ===== Inline Function Start for 3.129.2. SRAI16.u ===== */
  9849. /**
  9850. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  9851. * \brief SRAI16.u (SIMD 16-bit Rounding Shift Right Arithmetic Immediate)
  9852. * \details
  9853. * **Type**: SIMD
  9854. *
  9855. * **Syntax**:\n
  9856. * ~~~
  9857. * SRAI16 Rd, Rs1, imm4u
  9858. * SRAI16.u Rd, Rs1, imm4u
  9859. * ~~~
  9860. *
  9861. * **Purpose**:\n
  9862. * Do 16-bit elements arithmetic right shift operations simultaneously. The shift amount is
  9863. * an immediate value. The `.u` form performs additional rounding up operations on the shifted
  9864. * results.
  9865. *
  9866. * **Description**:\n
  9867. * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  9868. * bits are filled with the sign-bit of the 16-bit data elements. The shift amount is specified by the
  9869. * imm4u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
  9870. * significant discarded bit of each 16-bit data to calculate the final results. And the results are written
  9871. * to Rd.
  9872. *
  9873. * **Operations**:\n
  9874. * ~~~
  9875. * sa = imm4u[3:0];
  9876. * if (sa > 0) {
  9877. * if (`.u` form) { // SRAI16.u
  9878. * res[15:-1] = SE17(Rs1.H[x][15:sa-1]) + 1;
  9879. * Rd.H[x] = res[15:0];
  9880. * } else { // SRAI16
  9881. * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
  9882. * }
  9883. * } else {
  9884. * Rd = Rs1;
  9885. * }
  9886. * for RV32: x=1...0,
  9887. * for RV64: x=3...0
  9888. * ~~~
  9889. *
  9890. * \param [in] a unsigned long type of value stored in a
  9891. * \param [in] b unsigned long type of value stored in b
  9892. * \return value stored in unsigned long type
  9893. */
  9894. #define __RV_SRAI16_U(a, b) \
  9895. ({ \
  9896. unsigned long result; \
  9897. unsigned long __a = (unsigned long)(a); \
  9898. __ASM volatile("srai16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  9899. result; \
  9900. })
  9901. /* ===== Inline Function End for 3.129.2. SRAI16.u ===== */
  9902. /* ===== Inline Function Start for 3.130.1. SRL8 ===== */
  9903. /**
  9904. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  9905. * \brief SRL8 (SIMD 8-bit Shift Right Logical)
  9906. * \details
  9907. * **Type**: SIMD
  9908. *
  9909. * **Syntax**:\n
  9910. * ~~~
  9911. * SRL8 Rt, Ra, Rb
  9912. * SRL8.u Rt, Ra, Rb
  9913. * ~~~
  9914. *
  9915. * **Purpose**:\n
  9916. * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a
  9917. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  9918. * results.
  9919. *
  9920. * **Description**:\n
  9921. * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
  9922. * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
  9923. * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded
  9924. * bit of each 8-bit data element to calculate the final results. And the results are written to Rd.
  9925. *
  9926. * **Operations**:\n
  9927. * ~~~
  9928. * sa = Rs2[2:0];
  9929. * if (sa > 0) {
  9930. * if (`.u` form) { // SRL8.u
  9931. * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
  9932. * Rd.B[x] = res[8:1];
  9933. * } else { // SRL8
  9934. * Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
  9935. * }
  9936. * } else {
  9937. * Rd = Rs1;
  9938. * }
  9939. * for RV32: x=3...0,
  9940. * for RV64: x=7...0
  9941. * ~~~
  9942. *
  9943. * \param [in] a unsigned long type of value stored in a
  9944. * \param [in] b unsigned int type of value stored in b
  9945. * \return value stored in unsigned long type
  9946. */
  9947. __STATIC_FORCEINLINE unsigned long __RV_SRL8(unsigned long a, unsigned int b)
  9948. {
  9949. unsigned long result;
  9950. __ASM volatile("srl8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  9951. return result;
  9952. }
  9953. /* ===== Inline Function End for 3.130.1. SRL8 ===== */
  9954. /* ===== Inline Function Start for 3.130.2. SRL8.u ===== */
  9955. /**
  9956. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  9957. * \brief SRL8.u (SIMD 8-bit Rounding Shift Right Logical)
  9958. * \details
  9959. * **Type**: SIMD
  9960. *
  9961. * **Syntax**:\n
  9962. * ~~~
  9963. * SRL8 Rt, Ra, Rb
  9964. * SRL8.u Rt, Ra, Rb
  9965. * ~~~
  9966. *
  9967. * **Purpose**:\n
  9968. * Do 8-bit elements logical right shift operations simultaneously. The shift amount is a
  9969. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  9970. * results.
  9971. *
  9972. * **Description**:\n
  9973. * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
  9974. * filled with zero. The shift amount is specified by the low-order 3-bits of the value in the Rs2 register.
  9975. * For the rounding operation of the `.u` form, a value of 1 is added to the most significant discarded
  9976. * bit of each 8-bit data element to calculate the final results. And the results are written to Rd.
  9977. *
  9978. * **Operations**:\n
  9979. * ~~~
  9980. * sa = Rs2[2:0];
  9981. * if (sa > 0) {
  9982. * if (`.u` form) { // SRL8.u
  9983. * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
  9984. * Rd.B[x] = res[8:1];
  9985. * } else { // SRL8
  9986. * Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
  9987. * }
  9988. * } else {
  9989. * Rd = Rs1;
  9990. * }
  9991. * for RV32: x=3...0,
  9992. * for RV64: x=7...0
  9993. * ~~~
  9994. *
  9995. * \param [in] a unsigned long type of value stored in a
  9996. * \param [in] b unsigned int type of value stored in b
  9997. * \return value stored in unsigned long type
  9998. */
  9999. __STATIC_FORCEINLINE unsigned long __RV_SRL8_U(unsigned long a, unsigned int b)
  10000. {
  10001. unsigned long result;
  10002. __ASM volatile("srl8.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10003. return result;
  10004. }
  10005. /* ===== Inline Function End for 3.130.2. SRL8.u ===== */
  10006. /* ===== Inline Function Start for 3.131.1. SRLI8 ===== */
  10007. /**
  10008. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  10009. * \brief SRLI8 (SIMD 8-bit Shift Right Logical Immediate)
  10010. * \details
  10011. * **Type**: SIMD
  10012. *
  10013. * **Syntax**:\n
  10014. * ~~~
  10015. * SRLI8 Rt, Ra, imm3u
  10016. * SRLI8.u Rt, Ra, imm3u
  10017. * ~~~
  10018. *
  10019. * **Purpose**:\n
  10020. * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an
  10021. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  10022. *
  10023. * **Description**:\n
  10024. * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
  10025. * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of
  10026. * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to
  10027. * calculate the final results. And the results are written to Rd.
  10028. *
  10029. * **Operations**:\n
  10030. * ~~~
  10031. * sa = imm3u[2:0];
  10032. * if (sa > 0) {
  10033. * if (`.u` form) { // SRLI8.u
  10034. * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
  10035. * Rd.B[x] = res[8:1];
  10036. * } else { // SRLI8
  10037. * Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
  10038. * }
  10039. * } else {
  10040. * Rd = Rs1;
  10041. * }
  10042. * for RV32: x=3...0,
  10043. * for RV64: x=7...0
  10044. * ~~~
  10045. *
  10046. * \param [in] a unsigned long type of value stored in a
  10047. * \param [in] b unsigned int type of value stored in b
  10048. * \return value stored in unsigned long type
  10049. */
  10050. #define __RV_SRLI8(a, b) \
  10051. ({ \
  10052. unsigned long result; \
  10053. unsigned long __a = (unsigned long)(a); \
  10054. __ASM volatile("srli8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  10055. result; \
  10056. })
  10057. /* ===== Inline Function End for 3.131.1. SRLI8 ===== */
  10058. /* ===== Inline Function Start for 3.131.2. SRLI8.u ===== */
  10059. /**
  10060. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_SHIFT
  10061. * \brief SRLI8.u (SIMD 8-bit Rounding Shift Right Logical Immediate)
  10062. * \details
  10063. * **Type**: SIMD
  10064. *
  10065. * **Syntax**:\n
  10066. * ~~~
  10067. * SRLI8 Rt, Ra, imm3u
  10068. * SRLI8.u Rt, Ra, imm3u
  10069. * ~~~
  10070. *
  10071. * **Purpose**:\n
  10072. * Do 8-bit elements logical right shift operations simultaneously. The shift amount is an
  10073. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  10074. *
  10075. * **Description**:\n
  10076. * The 8-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits are
  10077. * filled with zero. The shift amount is specified by the imm3u constant. For the rounding operation of
  10078. * the `.u` form, a value of 1 is added to the most significant discarded bit of each 8-bit data element to
  10079. * calculate the final results. And the results are written to Rd.
  10080. *
  10081. * **Operations**:\n
  10082. * ~~~
  10083. * sa = imm3u[2:0];
  10084. * if (sa > 0) {
  10085. * if (`.u` form) { // SRLI8.u
  10086. * res[8:0] = ZE9(Rs1.B[x][7:sa-1]) + 1;
  10087. * Rd.B[x] = res[8:1];
  10088. * } else { // SRLI8
  10089. * Rd.B[x] = ZE8(Rs1.B[x][7:sa]);
  10090. * }
  10091. * } else {
  10092. * Rd = Rs1;
  10093. * }
  10094. * for RV32: x=3...0,
  10095. * for RV64: x=7...0
  10096. * ~~~
  10097. *
  10098. * \param [in] a unsigned long type of value stored in a
  10099. * \param [in] b unsigned int type of value stored in b
  10100. * \return value stored in unsigned long type
  10101. */
  10102. #define __RV_SRLI8_U(a, b) \
  10103. ({ \
  10104. unsigned long result; \
  10105. unsigned long __a = (unsigned long)(a); \
  10106. __ASM volatile("srli8.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  10107. result; \
  10108. })
  10109. /* ===== Inline Function End for 3.131.2. SRLI8.u ===== */
  10110. /* ===== Inline Function Start for 3.132.1. SRL16 ===== */
  10111. /**
  10112. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  10113. * \brief SRL16 (SIMD 16-bit Shift Right Logical)
  10114. * \details
  10115. * **Type**: SIMD
  10116. *
  10117. * **Syntax**:\n
  10118. * ~~~
  10119. * SRL16 Rt, Ra, Rb
  10120. * SRL16.u Rt, Ra, Rb
  10121. * ~~~
  10122. *
  10123. * **Purpose**:\n
  10124. * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results.
  10125. *
  10126. * **Description**:\n
  10127. * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  10128. * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2
  10129. * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
  10130. * discarded bit of each 16-bit data element to calculate the final results. And the results are written to
  10131. * Rd.
  10132. *
  10133. * **Operations**:\n
  10134. * ~~~
  10135. * sa = Rs2[3:0];
  10136. * if (sa > 0) {
  10137. * if (`.u` form) { // SRL16.u
  10138. * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
  10139. * Rd.H[x] = res[16:1];
  10140. * } else { // SRL16
  10141. * Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
  10142. * }
  10143. * } else {
  10144. * Rd = Rs1;
  10145. * }
  10146. * for RV32: x=1...0,
  10147. * for RV64: x=3...0
  10148. * ~~~
  10149. *
  10150. * \param [in] a unsigned long type of value stored in a
  10151. * \param [in] b unsigned int type of value stored in b
  10152. * \return value stored in unsigned long type
  10153. */
  10154. __STATIC_FORCEINLINE unsigned long __RV_SRL16(unsigned long a, unsigned int b)
  10155. {
  10156. unsigned long result;
  10157. __ASM volatile("srl16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10158. return result;
  10159. }
  10160. /* ===== Inline Function End for 3.132.1. SRL16 ===== */
  10161. /* ===== Inline Function Start for 3.132.2. SRL16.u ===== */
  10162. /**
  10163. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  10164. * \brief SRL16.u (SIMD 16-bit Rounding Shift Right Logical)
  10165. * \details
  10166. * **Type**: SIMD
  10167. *
  10168. * **Syntax**:\n
  10169. * ~~~
  10170. * SRL16 Rt, Ra, Rb
  10171. * SRL16.u Rt, Ra, Rb
  10172. * ~~~
  10173. *
  10174. * **Purpose**:\n
  10175. * Do 16-bit elements logical right shift operations simultaneously. The shift amount is a variable from a GPR. The `.u` form performs additional rounding upoperations on the shifted results.
  10176. *
  10177. * **Description**:\n
  10178. * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  10179. * are filled with zero. The shift amount is specified by the low-order 4-bits of the value in the Rs2
  10180. * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
  10181. * discarded bit of each 16-bit data element to calculate the final results. And the results are written to
  10182. * Rd.
  10183. *
  10184. * **Operations**:\n
  10185. * ~~~
  10186. * sa = Rs2[3:0];
  10187. * if (sa > 0) {
  10188. * if (`.u` form) { // SRL16.u
  10189. * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
  10190. * Rd.H[x] = res[16:1];
  10191. * } else { // SRL16
  10192. * Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
  10193. * }
  10194. * } else {
  10195. * Rd = Rs1;
  10196. * }
  10197. * for RV32: x=1...0,
  10198. * for RV64: x=3...0
  10199. * ~~~
  10200. *
  10201. * \param [in] a unsigned long type of value stored in a
  10202. * \param [in] b unsigned int type of value stored in b
  10203. * \return value stored in unsigned long type
  10204. */
  10205. __STATIC_FORCEINLINE unsigned long __RV_SRL16_U(unsigned long a, unsigned int b)
  10206. {
  10207. unsigned long result;
  10208. __ASM volatile("srl16.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10209. return result;
  10210. }
  10211. /* ===== Inline Function End for 3.132.2. SRL16.u ===== */
  10212. /* ===== Inline Function Start for 3.133.1. SRLI16 ===== */
  10213. /**
  10214. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  10215. * \brief SRLI16 (SIMD 16-bit Shift Right Logical Immediate)
  10216. * \details
  10217. * **Type**: SIMD
  10218. *
  10219. * **Syntax**:\n
  10220. * ~~~
  10221. * SRLI16 Rt, Ra, imm4u
  10222. * SRLI16.u Rt, Ra, imm4u
  10223. * ~~~
  10224. *
  10225. * **Purpose**:\n
  10226. * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an
  10227. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  10228. *
  10229. * **Description**:\n
  10230. * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  10231. * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding
  10232. * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit
  10233. * data element to calculate the final results. And the results are written to Rd.
  10234. *
  10235. * **Operations**:\n
  10236. * ~~~
  10237. * sa = imm4u;
  10238. * if (sa > 0) {
  10239. * if (`.u` form) { // SRLI16.u
  10240. * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
  10241. * Rd.H[x] = res[16:1];
  10242. * } else { // SRLI16
  10243. * Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
  10244. * }
  10245. * } else {
  10246. * Rd = Rs1;
  10247. * }
  10248. * for RV32: x=1...0,
  10249. * for RV64: x=3...0
  10250. * ~~~
  10251. *
  10252. * \param [in] a unsigned long type of value stored in a
  10253. * \param [in] b unsigned int type of value stored in b
  10254. * \return value stored in unsigned long type
  10255. */
  10256. #define __RV_SRLI16(a, b) \
  10257. ({ \
  10258. unsigned long result; \
  10259. unsigned long __a = (unsigned long)(a); \
  10260. __ASM volatile("srli16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  10261. result; \
  10262. })
  10263. /* ===== Inline Function End for 3.133.1. SRLI16 ===== */
  10264. /* ===== Inline Function Start for 3.133.2. SRLI16.u ===== */
  10265. /**
  10266. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_SHIFT
  10267. * \brief SRLI16.u (SIMD 16-bit Rounding Shift Right Logical Immediate)
  10268. * \details
  10269. * **Type**: SIMD
  10270. *
  10271. * **Syntax**:\n
  10272. * ~~~
  10273. * SRLI16 Rt, Ra, imm4u
  10274. * SRLI16.u Rt, Ra, imm4u
  10275. * ~~~
  10276. *
  10277. * **Purpose**:\n
  10278. * Do 16-bit elements logical right shift operations simultaneously. The shift amount is an
  10279. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  10280. *
  10281. * **Description**:\n
  10282. * The 16-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  10283. * are filled with zero. The shift amount is specified by the imm4u constant. For the rounding
  10284. * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 16-bit
  10285. * data element to calculate the final results. And the results are written to Rd.
  10286. *
  10287. * **Operations**:\n
  10288. * ~~~
  10289. * sa = imm4u;
  10290. * if (sa > 0) {
  10291. * if (`.u` form) { // SRLI16.u
  10292. * res[16:0] = ZE17(Rs1.H[x][15:sa-1]) + 1;
  10293. * Rd.H[x] = res[16:1];
  10294. * } else { // SRLI16
  10295. * Rd.H[x] = ZE16(Rs1.H[x][15:sa]);
  10296. * }
  10297. * } else {
  10298. * Rd = Rs1;
  10299. * }
  10300. * for RV32: x=1...0,
  10301. * for RV64: x=3...0
  10302. * ~~~
  10303. *
  10304. * \param [in] a unsigned long type of value stored in a
  10305. * \param [in] b unsigned int type of value stored in b
  10306. * \return value stored in unsigned long type
  10307. */
  10308. #define __RV_SRLI16_U(a, b) \
  10309. ({ \
  10310. unsigned long result; \
  10311. unsigned long __a = (unsigned long)(a); \
  10312. __ASM volatile("srli16.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  10313. result; \
  10314. })
  10315. /* ===== Inline Function End for 3.133.2. SRLI16.u ===== */
  10316. /* ===== Inline Function Start for 3.134. STAS16 ===== */
  10317. /**
  10318. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  10319. * \brief STAS16 (SIMD 16-bit Straight Addition & Subtraction)
  10320. * \details
  10321. * **Type**: SIMD
  10322. *
  10323. * **Syntax**:\n
  10324. * ~~~
  10325. * STAS16 Rd, Rs1, Rs2
  10326. * ~~~
  10327. *
  10328. * **Purpose**:\n
  10329. * Do 16-bit integer element addition and 16-bit integer element subtraction in a 32-bit
  10330. * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
  10331. *
  10332. * **Description**:\n
  10333. * This instruction adds the 16-bit integer element in [31:16] of 32-bit chunks in Rs1 with
  10334. * the 16-bit integer element in [31:16] of 32-bit chunks in Rs2, and writes the result to [31:16] of 32-bit
  10335. * chunks in Rd; at the same time, it subtracts the 16-bit integer element in [15:0] of 32-bit chunks in
  10336. * Rs2 from the 16-bit integer element in [15:0] of 32-bit chunks, and writes the result to [15:0] of 32-
  10337. * bit chunks in Rd.
  10338. *
  10339. * **Note**:\n
  10340. * This instruction can be used for either signed or unsigned operations.
  10341. *
  10342. * **Operations**:\n
  10343. * ~~~
  10344. * Rd.W[x][31:16] = Rs1.W[x][31:16] + Rs2.W[x][31:16];
  10345. * Rd.W[x][15:0] = Rs1.W[x][15:0] - Rs2.W[x][15:0];
  10346. * for RV32, x=0
  10347. * for RV64, x=1...0
  10348. * ~~~
  10349. *
  10350. * \param [in] a unsigned long type of value stored in a
  10351. * \param [in] b unsigned long type of value stored in b
  10352. * \return value stored in unsigned long type
  10353. */
  10354. __STATIC_FORCEINLINE unsigned long __RV_STAS16(unsigned long a, unsigned long b)
  10355. {
  10356. unsigned long result;
  10357. __ASM volatile("stas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10358. return result;
  10359. }
  10360. /* ===== Inline Function End for 3.134. STAS16 ===== */
  10361. /* ===== Inline Function Start for 3.135. STSA16 ===== */
  10362. /**
  10363. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  10364. * \brief STSA16 (SIMD 16-bit Straight Subtraction & Addition)
  10365. * \details
  10366. * **Type**: SIMD
  10367. *
  10368. * **Syntax**:\n
  10369. * ~~~
  10370. * STSA16 Rd, Rs1, Rs2
  10371. * ~~~
  10372. *
  10373. * **Purpose**:\n
  10374. * Do 16-bit integer element subtraction and 16-bit integer element addition in a 32-bit
  10375. * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
  10376. *
  10377. * **Description**:\n
  10378. * This instruction subtracts the 16-bit integer element in [31:16] of 32-bit chunks in Rs2
  10379. * from the 16-bit integer element in [31:16] of 32-bit chunks in Rs1, and writes the result to [31:16] of
  10380. * 32-bit chunks in Rd; at the same time, it adds the 16-bit integer element in [15:0] of 32-bit chunks in
  10381. * Rs2 with the 16-bit integer element in [15:0] of 32-bit chunks in Rs1, and writes the result to [15:0] of
  10382. * 32-bit chunks in Rd.
  10383. *
  10384. * **Note**:\n
  10385. * This instruction can be used for either signed or unsigned operations.
  10386. *
  10387. * **Operations**:\n
  10388. * ~~~
  10389. * Rd.W[x][31:16] = Rs1.W[x][31:16] - Rs2.W[x][31:16];
  10390. * Rd.W[x][15:0] = Rs1.W[x][15:0] + Rs2.W[x][15:0];
  10391. * for RV32, x=0
  10392. * for RV64, x=1...0
  10393. * ~~~
  10394. *
  10395. * \param [in] a unsigned long type of value stored in a
  10396. * \param [in] b unsigned long type of value stored in b
  10397. * \return value stored in unsigned long type
  10398. */
  10399. __STATIC_FORCEINLINE unsigned long __RV_STSA16(unsigned long a, unsigned long b)
  10400. {
  10401. unsigned long result;
  10402. __ASM volatile("stsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10403. return result;
  10404. }
  10405. /* ===== Inline Function End for 3.135. STSA16 ===== */
  10406. /* ===== Inline Function Start for 3.136. SUB8 ===== */
  10407. /**
  10408. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  10409. * \brief SUB8 (SIMD 8-bit Subtraction)
  10410. * \details
  10411. * **Type**: SIMD
  10412. *
  10413. * **Syntax**:\n
  10414. * ~~~
  10415. * SUB8 Rd, Rs1, Rs2
  10416. * ~~~
  10417. *
  10418. * **Purpose**:\n
  10419. * Do 8-bit integer element subtractions simultaneously.
  10420. *
  10421. * **Description**:\n
  10422. * This instruction subtracts the 8-bit integer elements in Rs2 from the 8-bit integer
  10423. * elements in Rs1, and then writes the result to Rd.
  10424. *
  10425. * **Note**:\n
  10426. * This instruction can be used for either signed or unsigned subtraction.
  10427. *
  10428. * **Operations**:\n
  10429. * ~~~
  10430. * Rd.B[x] = Rs1.B[x] - Rs2.B[x];
  10431. * for RV32: x=3...0,
  10432. * for RV64: x=7...0
  10433. * ~~~
  10434. *
  10435. * \param [in] a unsigned long type of value stored in a
  10436. * \param [in] b unsigned long type of value stored in b
  10437. * \return value stored in unsigned long type
  10438. */
  10439. __STATIC_FORCEINLINE unsigned long __RV_SUB8(unsigned long a, unsigned long b)
  10440. {
  10441. unsigned long result;
  10442. __ASM volatile("sub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10443. return result;
  10444. }
  10445. /* ===== Inline Function End for 3.136. SUB8 ===== */
  10446. /* ===== Inline Function Start for 3.137. SUB16 ===== */
  10447. /**
  10448. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  10449. * \brief SUB16 (SIMD 16-bit Subtraction)
  10450. * \details
  10451. * **Type**: SIMD
  10452. *
  10453. * **Syntax**:\n
  10454. * ~~~
  10455. * SUB16 Rd, Rs1, Rs2
  10456. * ~~~
  10457. *
  10458. * **Purpose**:\n
  10459. * Do 16-bit integer element subtractions simultaneously.
  10460. *
  10461. * **Description**:\n
  10462. * This instruction subtracts the 16-bit integer elements in Rs2 from the 16-bit integer
  10463. * elements in Rs1, and then writes the result to Rd.
  10464. *
  10465. * **Note**:\n
  10466. * This instruction can be used for either signed or unsigned subtraction.
  10467. *
  10468. * **Operations**:\n
  10469. * ~~~
  10470. * Rd.H[x] = Rs1.H[x] - Rs2.H[x];
  10471. * for RV32: x=1...0,
  10472. * for RV64: x=3...0
  10473. * ~~~
  10474. *
  10475. * \param [in] a unsigned long type of value stored in a
  10476. * \param [in] b unsigned long type of value stored in b
  10477. * \return value stored in unsigned long type
  10478. */
  10479. __STATIC_FORCEINLINE unsigned long __RV_SUB16(unsigned long a, unsigned long b)
  10480. {
  10481. unsigned long result;
  10482. __ASM volatile("sub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10483. return result;
  10484. }
  10485. /* ===== Inline Function End for 3.137. SUB16 ===== */
  10486. /* ===== Inline Function Start for 3.138. SUB64 ===== */
  10487. /**
  10488. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  10489. * \brief SUB64 (64-bit Subtraction)
  10490. * \details
  10491. * **Type**: DSP (64-bit Profile)
  10492. *
  10493. * **Syntax**:\n
  10494. * ~~~
  10495. * SUB64 Rd, Rs1, Rs2
  10496. * ~~~
  10497. *
  10498. * **Purpose**:\n
  10499. * Perform a 64-bit signed or unsigned integer subtraction.
  10500. *
  10501. * **RV32 Description**:\n
  10502. * This instruction subtracts the 64-bit integer of an even/odd pair of registers
  10503. * specified by Rs2(4,1) from the 64-bit integer of an even/odd pair of registers specified by Rs1(4,1),
  10504. * and then writes the 64-bit result to an even/odd pair of registers specified by Rd(4,1).
  10505. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  10506. * includes register 2d and 2d+1.
  10507. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  10508. * register of the pair contains the low 32-bit of the operand.
  10509. *
  10510. * **RV64 Description**:\n
  10511. * This instruction subtracts the 64-bit integer of Rs2 from the 64-bit integer of Rs1,
  10512. * and then writes the 64-bit result to Rd.
  10513. *
  10514. * **Note**:\n
  10515. * This instruction can be used for either signed or unsigned subtraction.
  10516. *
  10517. * **Operations**:\n
  10518. * ~~~
  10519. * * RV32:
  10520. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  10521. * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
  10522. * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
  10523. * R[t_H].R[t_L] = R[a_H].R[a_L] - R[b_H].R[b_L];
  10524. * * RV64:
  10525. * Rd = Rs1 - Rs2;
  10526. * ~~~
  10527. *
  10528. * \param [in] a unsigned long long type of value stored in a
  10529. * \param [in] b unsigned long long type of value stored in b
  10530. * \return value stored in unsigned long long type
  10531. */
  10532. __STATIC_FORCEINLINE unsigned long long __RV_SUB64(unsigned long long a, unsigned long long b)
  10533. {
  10534. unsigned long long result;
  10535. __ASM volatile("sub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  10536. return result;
  10537. }
  10538. /* ===== Inline Function End for 3.138. SUB64 ===== */
  10539. /* ===== Inline Function Start for 3.139.1. SUNPKD810 ===== */
  10540. /**
  10541. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  10542. * \brief SUNPKD810 (Signed Unpacking Bytes 1 & 0)
  10543. * \details
  10544. * **Type**: DSP
  10545. *
  10546. * **Syntax**:\n
  10547. * ~~~
  10548. * SUNPKD8xy Rd, Rs1
  10549. * xy = {10, 20, 30, 31, 32}
  10550. * ~~~
  10551. *
  10552. * **Purpose**:\n
  10553. * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
  10554. * of 32-bit chunks in a register.
  10555. *
  10556. * **Description**:\n
  10557. * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  10558. * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
  10559. * chunks in Rd.
  10560. *
  10561. * **Operations**:\n
  10562. * ~~~
  10563. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
  10564. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
  10565. * // SUNPKD810, x=1,y=0
  10566. * // SUNPKD820, x=2,y=0
  10567. * // SUNPKD830, x=3,y=0
  10568. * // SUNPKD831, x=3,y=1
  10569. * // SUNPKD832, x=3,y=2
  10570. * for RV32: m=0,
  10571. * for RV64: m=1...0
  10572. * ~~~
  10573. *
  10574. * \param [in] a unsigned long type of value stored in a
  10575. * \return value stored in unsigned long type
  10576. */
  10577. __STATIC_FORCEINLINE unsigned long __RV_SUNPKD810(unsigned long a)
  10578. {
  10579. unsigned long result;
  10580. __ASM volatile("sunpkd810 %0, %1" : "=r"(result) : "r"(a));
  10581. return result;
  10582. }
  10583. /* ===== Inline Function End for 3.139.1. SUNPKD810 ===== */
  10584. /* ===== Inline Function Start for 3.139.2. SUNPKD820 ===== */
  10585. /**
  10586. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  10587. * \brief SUNPKD820 (Signed Unpacking Bytes 2 & 0)
  10588. * \details
  10589. * **Type**: DSP
  10590. *
  10591. * **Syntax**:\n
  10592. * ~~~
  10593. * SUNPKD8xy Rd, Rs1
  10594. * xy = {10, 20, 30, 31, 32}
  10595. * ~~~
  10596. *
  10597. * **Purpose**:\n
  10598. * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
  10599. * of 32-bit chunks in a register.
  10600. *
  10601. * **Description**:\n
  10602. * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  10603. * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
  10604. * chunks in Rd.
  10605. *
  10606. * **Operations**:\n
  10607. * ~~~
  10608. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
  10609. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
  10610. * // SUNPKD810, x=1,y=0
  10611. * // SUNPKD820, x=2,y=0
  10612. * // SUNPKD830, x=3,y=0
  10613. * // SUNPKD831, x=3,y=1
  10614. * // SUNPKD832, x=3,y=2
  10615. * for RV32: m=0,
  10616. * for RV64: m=1...0
  10617. * ~~~
  10618. *
  10619. * \param [in] a unsigned long type of value stored in a
  10620. * \return value stored in unsigned long type
  10621. */
  10622. __STATIC_FORCEINLINE unsigned long __RV_SUNPKD820(unsigned long a)
  10623. {
  10624. unsigned long result;
  10625. __ASM volatile("sunpkd820 %0, %1" : "=r"(result) : "r"(a));
  10626. return result;
  10627. }
  10628. /* ===== Inline Function End for 3.139.2. SUNPKD820 ===== */
  10629. /* ===== Inline Function Start for 3.139.3. SUNPKD830 ===== */
  10630. /**
  10631. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  10632. * \brief SUNPKD830 (Signed Unpacking Bytes 3 & 0)
  10633. * \details
  10634. * **Type**: DSP
  10635. *
  10636. * **Syntax**:\n
  10637. * ~~~
  10638. * SUNPKD8xy Rd, Rs1
  10639. * xy = {10, 20, 30, 31, 32}
  10640. * ~~~
  10641. *
  10642. * **Purpose**:\n
  10643. * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
  10644. * of 32-bit chunks in a register.
  10645. *
  10646. * **Description**:\n
  10647. * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  10648. * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
  10649. * chunks in Rd.
  10650. *
  10651. * **Operations**:\n
  10652. * ~~~
  10653. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
  10654. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
  10655. * // SUNPKD810, x=1,y=0
  10656. * // SUNPKD820, x=2,y=0
  10657. * // SUNPKD830, x=3,y=0
  10658. * // SUNPKD831, x=3,y=1
  10659. * // SUNPKD832, x=3,y=2
  10660. * for RV32: m=0,
  10661. * for RV64: m=1...0
  10662. * ~~~
  10663. *
  10664. * \param [in] a unsigned long type of value stored in a
  10665. * \return value stored in unsigned long type
  10666. */
  10667. __STATIC_FORCEINLINE unsigned long __RV_SUNPKD830(unsigned long a)
  10668. {
  10669. unsigned long result;
  10670. __ASM volatile("sunpkd830 %0, %1" : "=r"(result) : "r"(a));
  10671. return result;
  10672. }
  10673. /* ===== Inline Function End for 3.139.3. SUNPKD830 ===== */
  10674. /* ===== Inline Function Start for 3.139.4. SUNPKD831 ===== */
  10675. /**
  10676. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  10677. * \brief SUNPKD831 (Signed Unpacking Bytes 3 & 1)
  10678. * \details
  10679. * **Type**: DSP
  10680. *
  10681. * **Syntax**:\n
  10682. * ~~~
  10683. * SUNPKD8xy Rd, Rs1
  10684. * xy = {10, 20, 30, 31, 32}
  10685. * ~~~
  10686. *
  10687. * **Purpose**:\n
  10688. * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
  10689. * of 32-bit chunks in a register.
  10690. *
  10691. * **Description**:\n
  10692. * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  10693. * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
  10694. * chunks in Rd.
  10695. *
  10696. * **Operations**:\n
  10697. * ~~~
  10698. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
  10699. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
  10700. * // SUNPKD810, x=1,y=0
  10701. * // SUNPKD820, x=2,y=0
  10702. * // SUNPKD830, x=3,y=0
  10703. * // SUNPKD831, x=3,y=1
  10704. * // SUNPKD832, x=3,y=2
  10705. * for RV32: m=0,
  10706. * for RV64: m=1...0
  10707. * ~~~
  10708. *
  10709. * \param [in] a unsigned long type of value stored in a
  10710. * \return value stored in unsigned long type
  10711. */
  10712. __STATIC_FORCEINLINE unsigned long __RV_SUNPKD831(unsigned long a)
  10713. {
  10714. unsigned long result;
  10715. __ASM volatile("sunpkd831 %0, %1" : "=r"(result) : "r"(a));
  10716. return result;
  10717. }
  10718. /* ===== Inline Function End for 3.139.4. SUNPKD831 ===== */
  10719. /* ===== Inline Function Start for 3.139.5. SUNPKD832 ===== */
  10720. /**
  10721. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  10722. * \brief SUNPKD832 (Signed Unpacking Bytes 3 & 2)
  10723. * \details
  10724. * **Type**: DSP
  10725. *
  10726. * **Syntax**:\n
  10727. * ~~~
  10728. * SUNPKD8xy Rd, Rs1
  10729. * xy = {10, 20, 30, 31, 32}
  10730. * ~~~
  10731. *
  10732. * **Purpose**:\n
  10733. * Unpack byte *x and byte y* of 32-bit chunks in a register into two 16-bit signed halfwords
  10734. * of 32-bit chunks in a register.
  10735. *
  10736. * **Description**:\n
  10737. * For the `SUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  10738. * two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit
  10739. * chunks in Rd.
  10740. *
  10741. * **Operations**:\n
  10742. * ~~~
  10743. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[x])
  10744. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[y])
  10745. * // SUNPKD810, x=1,y=0
  10746. * // SUNPKD820, x=2,y=0
  10747. * // SUNPKD830, x=3,y=0
  10748. * // SUNPKD831, x=3,y=1
  10749. * // SUNPKD832, x=3,y=2
  10750. * for RV32: m=0,
  10751. * for RV64: m=1...0
  10752. * ~~~
  10753. *
  10754. * \param [in] a unsigned long type of value stored in a
  10755. * \return value stored in unsigned long type
  10756. */
  10757. __STATIC_FORCEINLINE unsigned long __RV_SUNPKD832(unsigned long a)
  10758. {
  10759. unsigned long result;
  10760. __ASM volatile("sunpkd832 %0, %1" : "=r"(result) : "r"(a));
  10761. return result;
  10762. }
  10763. /* ===== Inline Function End for 3.139.5. SUNPKD832 ===== */
  10764. /* ===== Inline Function Start for 3.140. SWAP8 ===== */
  10765. /**
  10766. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  10767. * \brief SWAP8 (Swap Byte within Halfword)
  10768. * \details
  10769. * **Type**: DSP
  10770. *
  10771. * **Syntax**:\n
  10772. * ~~~
  10773. * SWAP8 Rd, Rs1
  10774. * ~~~
  10775. *
  10776. * **Purpose**:\n
  10777. * Swap the bytes within each halfword of a register.
  10778. *
  10779. * **Description**:\n
  10780. * This instruction swaps the bytes within each halfword of Rs1 and writes the result to
  10781. * Rd.
  10782. *
  10783. * **Operations**:\n
  10784. * ~~~
  10785. * Rd.H[x] = CONCAT(Rs1.H[x][7:0],Rs1.H[x][15:8]);
  10786. * for RV32: x=1...0,
  10787. * for RV64: x=3...0
  10788. * ~~~
  10789. *
  10790. * \param [in] a unsigned long type of value stored in a
  10791. * \return value stored in unsigned long type
  10792. */
  10793. __STATIC_FORCEINLINE unsigned long __RV_SWAP8(unsigned long a)
  10794. {
  10795. unsigned long result;
  10796. __ASM volatile("swap8 %0, %1" : "=r"(result) : "r"(a));
  10797. return result;
  10798. }
  10799. /* ===== Inline Function End for 3.140. SWAP8 ===== */
  10800. /* ===== Inline Function Start for 3.141. SWAP16 ===== */
  10801. /**
  10802. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  10803. * \brief SWAP16 (Swap Halfword within Word)
  10804. * \details
  10805. * **Type**: DSP
  10806. *
  10807. * **Syntax**:\n
  10808. * ~~~
  10809. * SWAP16 Rd, Rs1
  10810. * ~~~
  10811. *
  10812. * **Purpose**:\n
  10813. * Swap the 16-bit halfwords within each word of a register.
  10814. *
  10815. * **Description**:\n
  10816. * This instruction swaps the 16-bit halfwords within each word of Rs1 and writes the
  10817. * result to Rd.
  10818. *
  10819. * **Operations**:\n
  10820. * ~~~
  10821. * Rd.W[x] = CONCAT(Rs1.W[x][15:0],Rs1.H[x][31:16]);
  10822. * for RV32: x=0,
  10823. * for RV64: x=1...0
  10824. * ~~~
  10825. *
  10826. * \param [in] a unsigned long type of value stored in a
  10827. * \return value stored in unsigned long type
  10828. */
  10829. __STATIC_FORCEINLINE unsigned long __RV_SWAP16(unsigned long a)
  10830. {
  10831. unsigned long result;
  10832. __ASM volatile("swap16 %0, %1" : "=r"(result) : "r"(a));
  10833. return result;
  10834. }
  10835. /* ===== Inline Function End for 3.141. SWAP16 ===== */
  10836. /* ===== Inline Function Start for 3.142. UCLIP8 ===== */
  10837. /**
  10838. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  10839. * \brief UCLIP8 (SIMD 8-bit Unsigned Clip Value)
  10840. * \details
  10841. * **Type**: SIMD
  10842. *
  10843. * **Syntax**:\n
  10844. * ~~~
  10845. * UCLIP8 Rt, Ra, imm3u
  10846. * ~~~
  10847. *
  10848. * **Purpose**:\n
  10849. * Limit the 8-bit signed elements of a register into an unsigned range simultaneously.
  10850. *
  10851. * **Description**:\n
  10852. * This instruction limits the 8-bit signed elements stored in Rs1 into an unsigned integer
  10853. * range between 2^imm3u-1 and 0, and writes the limited results to Rd. For example, if imm3u is 3, the 8-
  10854. * bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit to 1.
  10855. *
  10856. * **Operations**:\n
  10857. * ~~~
  10858. * src = Rs1.H[x];
  10859. * if (src > (2^imm3u)-1) {
  10860. * src = (2^imm3u)-1;
  10861. * OV = 1;
  10862. * } else if (src < 0) {
  10863. * src = 0;
  10864. * OV = 1;
  10865. * }
  10866. * Rd.H[x] = src;
  10867. * for RV32: x=3...0,
  10868. * for RV64: x=7...0
  10869. * ~~~
  10870. *
  10871. * \param [in] a unsigned long type of value stored in a
  10872. * \param [in] b unsigned int type of value stored in b
  10873. * \return value stored in unsigned long type
  10874. */
  10875. #define __RV_UCLIP8(a, b) \
  10876. ({ \
  10877. unsigned long result; \
  10878. unsigned long __a = (unsigned long)(a); \
  10879. __ASM volatile("uclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  10880. result; \
  10881. })
  10882. /* ===== Inline Function End for 3.142. UCLIP8 ===== */
  10883. /* ===== Inline Function Start for 3.143. UCLIP16 ===== */
  10884. /**
  10885. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  10886. * \brief UCLIP16 (SIMD 16-bit Unsigned Clip Value)
  10887. * \details
  10888. * **Type**: SIMD
  10889. *
  10890. * **Syntax**:\n
  10891. * ~~~
  10892. * UCLIP16 Rt, Ra, imm4u
  10893. * ~~~
  10894. *
  10895. * **Purpose**:\n
  10896. * Limit the 16-bit signed elements of a register into an unsigned range simultaneously.
  10897. *
  10898. * **Description**:\n
  10899. * This instruction limits the 16-bit signed elements stored in Rs1 into an unsigned
  10900. * integer range between 2imm4u-1 and 0, and writes the limited results to Rd. For example, if imm4u is
  10901. * 3, the 16-bit input values should be saturated between 7 and 0. If saturation is performed, set OV bit
  10902. * to 1.
  10903. *
  10904. * **Operations**:\n
  10905. * ~~~
  10906. * src = Rs1.H[x];
  10907. * if (src > (2^imm4u)-1) {
  10908. * src = (2^imm4u)-1;
  10909. * OV = 1;
  10910. * } else if (src < 0) {
  10911. * src = 0;
  10912. * OV = 1;
  10913. * }
  10914. * Rd.H[x] = src;
  10915. * for RV32: x=1...0,
  10916. * for RV64: x=3...0
  10917. * ~~~
  10918. *
  10919. * \param [in] a unsigned long type of value stored in a
  10920. * \param [in] b unsigned int type of value stored in b
  10921. * \return value stored in unsigned long type
  10922. */
  10923. #define __RV_UCLIP16(a, b) \
  10924. ({ \
  10925. unsigned long result; \
  10926. unsigned long __a = (unsigned long)(a); \
  10927. __ASM volatile("uclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  10928. result; \
  10929. })
  10930. /* ===== Inline Function End for 3.143. UCLIP16 ===== */
  10931. /* ===== Inline Function Start for 3.144. UCLIP32 ===== */
  10932. /**
  10933. * \ingroup NMSIS_Core_DSP_Intrinsic_PART_SIMD_MISC
  10934. * \brief UCLIP32 (SIMD 32-bit Unsigned Clip Value)
  10935. * \details
  10936. * **Type**: SIMD
  10937. *
  10938. * **Syntax**:\n
  10939. * ~~~
  10940. * UCLIP32 Rd, Rs1, imm5u[4:0]
  10941. * ~~~
  10942. *
  10943. * **Purpose**:\n
  10944. * Limit the 32-bit signed integer elements of a register into an unsigned range
  10945. * simultaneously.
  10946. *
  10947. * **Description**:\n
  10948. * This instruction limits the 32-bit signed integer elements stored in Rs1 into an
  10949. * unsigned integer range between 2imm5u-1 and 0, and writes the limited results to Rd. For example, if
  10950. * imm5u is 3, the 32-bit input values should be saturated between 7 and 0. If saturation is performed,
  10951. * set OV bit to 1.
  10952. *
  10953. * **Operations**:\n
  10954. * ~~~
  10955. * src = Rs1.W[x];
  10956. * if (src > (2^imm5u)-1) {
  10957. * src = (2^imm5u)-1;
  10958. * OV = 1;
  10959. * } else if (src < 0) {
  10960. * src = 0;
  10961. * OV = 1;
  10962. * }
  10963. * Rd.W[x] = src
  10964. * for RV32: x=0,
  10965. * for RV64: x=1...0
  10966. * ~~~
  10967. *
  10968. * \param [in] a unsigned long type of value stored in a
  10969. * \param [in] b unsigned int type of value stored in b
  10970. * \return value stored in unsigned long type
  10971. */
  10972. #define __RV_UCLIP32(a, b) \
  10973. ({ \
  10974. unsigned long result; \
  10975. unsigned long __a = (unsigned long)(a); \
  10976. __ASM volatile("uclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  10977. result; \
  10978. })
  10979. /* ===== Inline Function End for 3.144. UCLIP32 ===== */
  10980. /* ===== Inline Function Start for 3.145. UCMPLE8 ===== */
  10981. /**
  10982. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
  10983. * \brief UCMPLE8 (SIMD 8-bit Unsigned Compare Less Than & Equal)
  10984. * \details
  10985. * **Type**: SIMD
  10986. *
  10987. * **Syntax**:\n
  10988. * ~~~
  10989. * UCMPLE8 Rd, Rs1, Rs2
  10990. * ~~~
  10991. *
  10992. * **Purpose**:\n
  10993. * Do 8-bit unsigned integer elements less than & equal comparisons simultaneously.
  10994. *
  10995. * **Description**:\n
  10996. * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
  10997. * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it
  10998. * is true, the result is 0xFF; otherwise, the result is 0x0. The four comparison results are written to
  10999. * Rd.
  11000. *
  11001. * **Operations**:\n
  11002. * ~~~
  11003. * Rd.B[x] = (Rs1.B[x] <=u Rs2.B[x])? 0xff : 0x0;
  11004. * for RV32: x=3...0,
  11005. * for RV64: x=7...0
  11006. * ~~~
  11007. *
  11008. * \param [in] a unsigned long type of value stored in a
  11009. * \param [in] b unsigned long type of value stored in b
  11010. * \return value stored in unsigned long type
  11011. */
  11012. __STATIC_FORCEINLINE unsigned long __RV_UCMPLE8(unsigned long a, unsigned long b)
  11013. {
  11014. unsigned long result;
  11015. __ASM volatile("ucmple8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11016. return result;
  11017. }
  11018. /* ===== Inline Function End for 3.145. UCMPLE8 ===== */
  11019. /* ===== Inline Function Start for 3.146. UCMPLE16 ===== */
  11020. /**
  11021. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
  11022. * \brief UCMPLE16 (SIMD 16-bit Unsigned Compare Less Than & Equal)
  11023. * \details
  11024. * **Type**: SIMD
  11025. *
  11026. * **Syntax**:\n
  11027. * ~~~
  11028. * UCMPLE16 Rd, Rs1, Rs2
  11029. * ~~~
  11030. *
  11031. * **Purpose**:\n
  11032. * Do 16-bit unsigned integer elements less than & equal comparisons simultaneously.
  11033. *
  11034. * **Description**:\n
  11035. * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
  11036. * unsigned integer elements in Rs2 to see if the one in Rs1 is less than or equal to the one in Rs2. If it
  11037. * is true, the result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are
  11038. * written to Rd.
  11039. *
  11040. * **Operations**:\n
  11041. * ~~~
  11042. * Rd.H[x] = (Rs1.H[x] <=u Rs2.H[x])? 0xffff : 0x0;
  11043. * for RV32: x=1...0,
  11044. * for RV64: x=3...0
  11045. * ~~~
  11046. *
  11047. * \param [in] a unsigned long type of value stored in a
  11048. * \param [in] b unsigned long type of value stored in b
  11049. * \return value stored in unsigned long type
  11050. */
  11051. __STATIC_FORCEINLINE unsigned long __RV_UCMPLE16(unsigned long a, unsigned long b)
  11052. {
  11053. unsigned long result;
  11054. __ASM volatile("ucmple16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11055. return result;
  11056. }
  11057. /* ===== Inline Function End for 3.146. UCMPLE16 ===== */
  11058. /* ===== Inline Function Start for 3.147. UCMPLT8 ===== */
  11059. /**
  11060. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_CMP
  11061. * \brief UCMPLT8 (SIMD 8-bit Unsigned Compare Less Than)
  11062. * \details
  11063. * **Type**: SIMD
  11064. *
  11065. * **Syntax**:\n
  11066. * ~~~
  11067. * UCMPLT8 Rd, Rs1, Rs2
  11068. * ~~~
  11069. *
  11070. * **Purpose**:\n
  11071. * Do 8-bit unsigned integer elements less than comparisons simultaneously.
  11072. *
  11073. * **Description**:\n
  11074. * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
  11075. * unsigned integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
  11076. * result is 0xFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
  11077. *
  11078. * **Operations**:\n
  11079. * ~~~
  11080. * Rd.B[x] = (Rs1.B[x] <u Rs2.B[x])? 0xff : 0x0;
  11081. * for RV32: x=3...0,
  11082. * for RV64: x=7...0
  11083. * ~~~
  11084. *
  11085. * \param [in] a unsigned long type of value stored in a
  11086. * \param [in] b unsigned long type of value stored in b
  11087. * \return value stored in unsigned long type
  11088. */
  11089. __STATIC_FORCEINLINE unsigned long __RV_UCMPLT8(unsigned long a, unsigned long b)
  11090. {
  11091. unsigned long result;
  11092. __ASM volatile("ucmplt8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11093. return result;
  11094. }
  11095. /* ===== Inline Function End for 3.147. UCMPLT8 ===== */
  11096. /* ===== Inline Function Start for 3.148. UCMPLT16 ===== */
  11097. /**
  11098. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_CMP
  11099. * \brief UCMPLT16 (SIMD 16-bit Unsigned Compare Less Than)
  11100. * \details
  11101. * **Type**: SIMD
  11102. *
  11103. * **Syntax**:\n
  11104. * ~~~
  11105. * UCMPLT16 Rd, Rs1, Rs2
  11106. * ~~~
  11107. *
  11108. * **Purpose**:\n
  11109. * Do 16-bit unsigned integer elements less than comparisons simultaneously.
  11110. *
  11111. * **Description**:\n
  11112. * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
  11113. * unsigned integer elements in Rs2 to see if the one in Rs1 is less than the one in Rs2. If it is true, the
  11114. * result is 0xFFFF; otherwise, the result is 0x0. The element comparison results are written to Rd.
  11115. *
  11116. * **Operations**:\n
  11117. * ~~~
  11118. * Rd.H[x] = (Rs1.H[x] <u Rs2.H[x])? 0xffff : 0x0;
  11119. * for RV32: x=1...0,
  11120. * for RV64: x=3...0
  11121. * ~~~
  11122. *
  11123. * \param [in] a unsigned long type of value stored in a
  11124. * \param [in] b unsigned long type of value stored in b
  11125. * \return value stored in unsigned long type
  11126. */
  11127. __STATIC_FORCEINLINE unsigned long __RV_UCMPLT16(unsigned long a, unsigned long b)
  11128. {
  11129. unsigned long result;
  11130. __ASM volatile("ucmplt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11131. return result;
  11132. }
  11133. /* ===== Inline Function End for 3.148. UCMPLT16 ===== */
  11134. /* ===== Inline Function Start for 3.149. UKADD8 ===== */
  11135. /**
  11136. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  11137. * \brief UKADD8 (SIMD 8-bit Unsigned Saturating Addition)
  11138. * \details
  11139. * **Type**: SIMD
  11140. *
  11141. * **Syntax**:\n
  11142. * ~~~
  11143. * UKADD8 Rd, Rs1, Rs2
  11144. * ~~~
  11145. *
  11146. * **Purpose**:\n
  11147. * Do 8-bit unsigned integer element saturating additions simultaneously.
  11148. *
  11149. * **Description**:\n
  11150. * This instruction adds the 8-bit unsigned integer elements in Rs1 with the 8-bit
  11151. * unsigned integer elements in Rs2. If any of the results are beyond the 8-bit unsigned number range
  11152. * (0 <= RES <= 28-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
  11153. * written to Rd.
  11154. *
  11155. * **Operations**:\n
  11156. * ~~~
  11157. * res[x] = Rs1.B[x] + Rs2.B[x];
  11158. * if (res[x] > (2^8)-1) {
  11159. * res[x] = (2^8)-1;
  11160. * OV = 1;
  11161. * }
  11162. * Rd.B[x] = res[x];
  11163. * for RV32: x=3...0,
  11164. * for RV64: x=7...0
  11165. * ~~~
  11166. *
  11167. * \param [in] a unsigned long type of value stored in a
  11168. * \param [in] b unsigned long type of value stored in b
  11169. * \return value stored in unsigned long type
  11170. */
  11171. __STATIC_FORCEINLINE unsigned long __RV_UKADD8(unsigned long a, unsigned long b)
  11172. {
  11173. unsigned long result;
  11174. __ASM volatile("ukadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11175. return result;
  11176. }
  11177. /* ===== Inline Function End for 3.149. UKADD8 ===== */
  11178. /* ===== Inline Function Start for 3.150. UKADD16 ===== */
  11179. /**
  11180. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  11181. * \brief UKADD16 (SIMD 16-bit Unsigned Saturating Addition)
  11182. * \details
  11183. * **Type**: SIMD
  11184. *
  11185. * **Syntax**:\n
  11186. * ~~~
  11187. * UKADD16 Rd, Rs1, Rs2
  11188. * ~~~
  11189. *
  11190. * **Purpose**:\n
  11191. * Do 16-bit unsigned integer element saturating additions simultaneously.
  11192. *
  11193. * **Description**:\n
  11194. * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit
  11195. * unsigned integer elements in Rs2. If any of the results are beyond the 16-bit unsigned number
  11196. * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated
  11197. * results are written to Rd.
  11198. *
  11199. * **Operations**:\n
  11200. * ~~~
  11201. * res[x] = Rs1.H[x] + Rs2.H[x];
  11202. * if (res[x] > (2^16)-1) {
  11203. * res[x] = (2^16)-1;
  11204. * OV = 1;
  11205. * }
  11206. * Rd.H[x] = res[x];
  11207. * for RV32: x=1...0,
  11208. * for RV64: x=3...0
  11209. * ~~~
  11210. *
  11211. * \param [in] a unsigned long type of value stored in a
  11212. * \param [in] b unsigned long type of value stored in b
  11213. * \return value stored in unsigned long type
  11214. */
  11215. __STATIC_FORCEINLINE unsigned long __RV_UKADD16(unsigned long a, unsigned long b)
  11216. {
  11217. unsigned long result;
  11218. __ASM volatile("ukadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11219. return result;
  11220. }
  11221. /* ===== Inline Function End for 3.150. UKADD16 ===== */
  11222. /* ===== Inline Function Start for 3.151. UKADD64 ===== */
  11223. /**
  11224. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  11225. * \brief UKADD64 (64-bit Unsigned Saturating Addition)
  11226. * \details
  11227. * **Type**: DSP (64-bit Profile)
  11228. *
  11229. * **Syntax**:\n
  11230. * ~~~
  11231. * UKADD64 Rd, Rs1, Rs2
  11232. * ~~~
  11233. *
  11234. * **Purpose**:\n
  11235. * Add two 64-bit unsigned integers. The result is saturated to the U64 range.
  11236. *
  11237. * **RV32 Description**:\n
  11238. * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers
  11239. * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by
  11240. * Rs2(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the
  11241. * range and the OV bit is set to 1. The saturated result is written to an even/odd pair of registers
  11242. * specified by Rd(4,1).
  11243. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  11244. * includes register 2d and 2d+1.
  11245. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  11246. * of the pair contains the low 32-bit of the result.
  11247. *
  11248. * **RV64 Description**:\n
  11249. * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned
  11250. * integer in Rs2. If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to
  11251. * the range and the OV bit is set to 1. The saturated result is written to Rd.
  11252. *
  11253. * **Operations**:\n
  11254. * ~~~
  11255. * * RV32:
  11256. * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
  11257. * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
  11258. * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
  11259. * result = R[a_H].R[a_L] + R[b_H].R[b_L];
  11260. * if (result > (2^64)-1) {
  11261. * result = (2^64)-1; OV = 1;
  11262. * }
  11263. * R[t_H].R[t_L] = result;
  11264. * * RV64:
  11265. * result = Rs1 + Rs2;
  11266. * if (result > (2^64)-1) {
  11267. * result = (2^64)-1; OV = 1;
  11268. * }
  11269. * Rd = result;
  11270. * ~~~
  11271. *
  11272. * \param [in] a unsigned long long type of value stored in a
  11273. * \param [in] b unsigned long long type of value stored in b
  11274. * \return value stored in unsigned long long type
  11275. */
  11276. __STATIC_FORCEINLINE unsigned long long __RV_UKADD64(unsigned long long a, unsigned long long b)
  11277. {
  11278. unsigned long long result;
  11279. __ASM volatile("ukadd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11280. return result;
  11281. }
  11282. /* ===== Inline Function End for 3.151. UKADD64 ===== */
  11283. /* ===== Inline Function Start for 3.152. UKADDH ===== */
  11284. /**
  11285. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
  11286. * \brief UKADDH (Unsigned Addition with U16 Saturation)
  11287. * \details
  11288. * **Type**: DSP
  11289. *
  11290. * **Syntax**:\n
  11291. * ~~~
  11292. * UKADDH Rd, Rs1, Rs2
  11293. * ~~~
  11294. *
  11295. * **Purpose**:\n
  11296. * Add the unsigned lower 32-bit content of two registers with U16 saturation.
  11297. *
  11298. * **Description**:\n
  11299. * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit
  11300. * content of Rs2. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then
  11301. * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
  11302. *
  11303. * **Operations**:\n
  11304. * ~~~
  11305. * tmp = Rs1.W[0] + Rs2.W[0];
  11306. * if (tmp > (2^16)-1) {
  11307. * tmp = (2^16)-1;
  11308. * OV = 1;
  11309. * }
  11310. * Rd = SE(tmp[15:0]);
  11311. * ~~~
  11312. *
  11313. * \param [in] a unsigned int type of value stored in a
  11314. * \param [in] b unsigned int type of value stored in b
  11315. * \return value stored in unsigned long type
  11316. */
  11317. __STATIC_FORCEINLINE unsigned long __RV_UKADDH(unsigned int a, unsigned int b)
  11318. {
  11319. unsigned long result;
  11320. __ASM volatile("ukaddh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11321. return result;
  11322. }
  11323. /* ===== Inline Function End for 3.152. UKADDH ===== */
  11324. /* ===== Inline Function Start for 3.153. UKADDW ===== */
  11325. /**
  11326. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  11327. * \brief UKADDW (Unsigned Addition with U32 Saturation)
  11328. * \details
  11329. * **Type**: DSP
  11330. *
  11331. * **Syntax**:\n
  11332. * ~~~
  11333. * UKADDW Rd, Rs1, Rs2
  11334. * ~~~
  11335. *
  11336. * **Purpose**:\n
  11337. * Add the unsigned lower 32-bit content of two registers with U32 saturation.
  11338. *
  11339. * **Description**:\n
  11340. * The unsigned lower 32-bit content of Rs1 is added with the unsigned lower 32-bit
  11341. * content of Rs2. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then
  11342. * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
  11343. *
  11344. * **Operations**:\n
  11345. * ~~~
  11346. * tmp = Rs1.W[0] + Rs2.W[0];
  11347. * if (tmp > (2^32)-1) {
  11348. * tmp[31:0] = (2^32)-1;
  11349. * OV = 1;
  11350. * }
  11351. * Rd = tmp[31:0]; // RV32
  11352. * Rd = SE(tmp[31:0]); // RV64
  11353. * ~~~
  11354. *
  11355. * \param [in] a unsigned int type of value stored in a
  11356. * \param [in] b unsigned int type of value stored in b
  11357. * \return value stored in unsigned long type
  11358. */
  11359. __STATIC_FORCEINLINE unsigned long __RV_UKADDW(unsigned int a, unsigned int b)
  11360. {
  11361. unsigned long result;
  11362. __ASM volatile("ukaddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11363. return result;
  11364. }
  11365. /* ===== Inline Function End for 3.153. UKADDW ===== */
  11366. /* ===== Inline Function Start for 3.154. UKCRAS16 ===== */
  11367. /**
  11368. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  11369. * \brief UKCRAS16 (SIMD 16-bit Unsigned Saturating Cross Addition & Subtraction)
  11370. * \details
  11371. * **Type**: SIMD
  11372. *
  11373. * **Syntax**:\n
  11374. * ~~~
  11375. * UKCRAS16 Rd, Rs1, Rs2
  11376. * ~~~
  11377. *
  11378. * **Purpose**:\n
  11379. * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned
  11380. * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed
  11381. * positions in 32-bit chunks.
  11382. *
  11383. * **Description**:\n
  11384. * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in
  11385. * Rs1 with the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it
  11386. * subtracts the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit
  11387. * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit
  11388. * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1.
  11389. * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit
  11390. * chunks in Rd for subtraction.
  11391. *
  11392. * **Operations**:\n
  11393. * ~~~
  11394. * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
  11395. * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
  11396. * if (res1 > (2^16)-1) {
  11397. * res1 = (2^16)-1;
  11398. * OV = 1;
  11399. * }
  11400. * if (res2 < 0) {
  11401. * res2 = 0;
  11402. * OV = 1;
  11403. * }
  11404. * Rd.W[x][31:16] = res1;
  11405. * Rd.W[x][15:0] = res2;
  11406. * for RV32, x=0
  11407. * for RV64, x=1...0
  11408. * ~~~
  11409. *
  11410. * \param [in] a unsigned long type of value stored in a
  11411. * \param [in] b unsigned long type of value stored in b
  11412. * \return value stored in unsigned long type
  11413. */
  11414. __STATIC_FORCEINLINE unsigned long __RV_UKCRAS16(unsigned long a, unsigned long b)
  11415. {
  11416. unsigned long result;
  11417. __ASM volatile("ukcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11418. return result;
  11419. }
  11420. /* ===== Inline Function End for 3.154. UKCRAS16 ===== */
  11421. /* ===== Inline Function Start for 3.155. UKCRSA16 ===== */
  11422. /**
  11423. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  11424. * \brief UKCRSA16 (SIMD 16-bit Unsigned Saturating Cross Subtraction & Addition)
  11425. * \details
  11426. * **Type**: SIMD
  11427. *
  11428. * **Syntax**:\n
  11429. * ~~~
  11430. * UKCRSA16 Rd, Rs1, Rs2
  11431. * ~~~
  11432. *
  11433. * **Purpose**:\n
  11434. * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned
  11435. * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from crossed
  11436. * positions in 32-bit chunks.
  11437. *
  11438. * **Description**:\n
  11439. * This instruction subtracts the 16-bit unsigned integer element in [15:0] of 32-bit
  11440. * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the
  11441. * same time, it adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2 with the 16-
  11442. * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the
  11443. * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set
  11444. * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of
  11445. * 32-bit chunks in Rd for addition.
  11446. *
  11447. * **Operations**:\n
  11448. * ~~~
  11449. * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
  11450. * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
  11451. * if (res1 < 0) {
  11452. * res1 = 0;
  11453. * OV = 1;
  11454. * } else if (res2 > (2^16)-1) {
  11455. * res2 = (2^16)-1;
  11456. * OV = 1;
  11457. * }
  11458. * Rd.W[x][31:16] = res1;
  11459. * Rd.W[x][15:0] = res2;
  11460. * for RV32, x=0
  11461. * for RV64, x=1...0
  11462. * ~~~
  11463. *
  11464. * \param [in] a unsigned long type of value stored in a
  11465. * \param [in] b unsigned long type of value stored in b
  11466. * \return value stored in unsigned long type
  11467. */
  11468. __STATIC_FORCEINLINE unsigned long __RV_UKCRSA16(unsigned long a, unsigned long b)
  11469. {
  11470. unsigned long result;
  11471. __ASM volatile("ukcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11472. return result;
  11473. }
  11474. /* ===== Inline Function End for 3.155. UKCRSA16 ===== */
  11475. /* ===== Inline Function Start for 3.156. UKMAR64 ===== */
  11476. /**
  11477. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  11478. * \brief UKMAR64 (Unsigned Multiply and Saturating Add to 64-Bit Data)
  11479. * \details
  11480. * **Type**: DSP (64-bit Profile)
  11481. *
  11482. * **Syntax**:\n
  11483. * ~~~
  11484. * UKMAR64 Rd, Rs1, Rs2
  11485. * ~~~
  11486. *
  11487. * **Purpose**:\n
  11488. * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication
  11489. * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is
  11490. * saturated to the U64 range and written back to the pair of registers (RV32) or the register (RV64).
  11491. *
  11492. * **RV32 Description**:\n
  11493. * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
  11494. * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers
  11495. * specified by Rd(4,1) with unlimited precision. If the 64-bit addition result is beyond the U64 number
  11496. * range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is
  11497. * written back to the even/odd pair of registers specified by Rd(4,1).
  11498. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  11499. * includes register 2d and 2d+1.
  11500. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  11501. * of the pair contains the low 32-bit of the result.
  11502. *
  11503. * **RV64 Description**:\n
  11504. * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
  11505. * It adds the 64-bit multiplication results to the 64-bit unsigned data in Rd with unlimited precision. If
  11506. * the 64-bit addition result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is saturated to the
  11507. * range and the OV bit is set to 1. The saturated result is written back to Rd.
  11508. *
  11509. * **Operations**:\n
  11510. * ~~~
  11511. * * RV32:
  11512. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  11513. * result = R[t_H].R[t_L] + (Rs1 * Rs2);
  11514. * if (result > (2^64)-1) {
  11515. * result = (2^64)-1; OV = 1;
  11516. * }
  11517. * R[t_H].R[t_L] = result;
  11518. * * RV64:
  11519. * // `result` has unlimited precision
  11520. * result = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]);
  11521. * if (result > (2^64)-1) {
  11522. * result = (2^64)-1; OV = 1;
  11523. * }
  11524. * Rd = result;
  11525. * ~~~
  11526. *
  11527. * \param [in] t unsigned long long type of value stored in t
  11528. * \param [in] a unsigned long type of value stored in a
  11529. * \param [in] b unsigned long type of value stored in b
  11530. * \return value stored in unsigned long long type
  11531. */
  11532. __STATIC_FORCEINLINE unsigned long long __RV_UKMAR64(unsigned long long t, unsigned long a, unsigned long b)
  11533. {
  11534. __ASM volatile("ukmar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  11535. return t;
  11536. }
  11537. /* ===== Inline Function End for 3.156. UKMAR64 ===== */
  11538. /* ===== Inline Function Start for 3.157. UKMSR64 ===== */
  11539. /**
  11540. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  11541. * \brief UKMSR64 (Unsigned Multiply and Saturating Subtract from 64-Bit Data)
  11542. * \details
  11543. * **Type**: DSP (64-bit Profile)
  11544. *
  11545. * **Syntax**:\n
  11546. * ~~~
  11547. * UKMSR64 Rd, Rs1, Rs2
  11548. * ~~~
  11549. *
  11550. * **Purpose**:\n
  11551. * Multiply the 32-bit unsigned elements in two registers and subtract the 64-bit
  11552. * multiplication results from the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64).
  11553. * The result is saturated to the U64 range and written back to the pair of registers (RV32) or a register
  11554. * (RV64).
  11555. *
  11556. * **RV32 Description**:\n
  11557. * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
  11558. * subtracts the 64-bit multiplication result from the 64-bit unsigned data of an even/odd pair of
  11559. * registers specified by Rd(4,1) with unlimited precision. If the 64-bit subtraction result is beyond the
  11560. * U64 number range (0 <= U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The
  11561. * saturated result is written back to the even/odd pair of registers specified by Rd(4,1).
  11562. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  11563. * includes register 2d and 2d+1.
  11564. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  11565. * of the pair contains the low 32-bit of the result.
  11566. *
  11567. * **RV64 Description**:\n
  11568. * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
  11569. * It subtracts the 64-bit multiplication results from the 64-bit unsigned data of Rd with unlimited
  11570. * precision. If the 64-bit subtraction result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is
  11571. * saturated to the range and the OV bit is set to 1. The saturated result is written back to Rd.
  11572. *
  11573. * **Operations**:\n
  11574. * ~~~
  11575. * * RV32:
  11576. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  11577. * result = R[t_H].R[t_L] - (Rs1 u* Rs2);
  11578. * if (result < 0) {
  11579. * result = 0; OV = 1;
  11580. * }
  11581. * R[t_H].R[t_L] = result;
  11582. * * RV64:
  11583. * // `result` has unlimited precision
  11584. * result = Rd - (Rs1.W[0] u* Rs2.W[0]) - (Rs1.W[1] u* Rs2.W[1]);
  11585. * if (result < 0) {
  11586. * result = 0; OV = 1;
  11587. * }
  11588. * Rd = result;
  11589. * ~~~
  11590. *
  11591. * \param [in] t unsigned long long type of value stored in t
  11592. * \param [in] a unsigned long type of value stored in a
  11593. * \param [in] b unsigned long type of value stored in b
  11594. * \return value stored in unsigned long long type
  11595. */
  11596. __STATIC_FORCEINLINE unsigned long long __RV_UKMSR64(unsigned long long t, unsigned long a, unsigned long b)
  11597. {
  11598. __ASM volatile("ukmsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  11599. return t;
  11600. }
  11601. /* ===== Inline Function End for 3.157. UKMSR64 ===== */
  11602. /* ===== Inline Function Start for 3.158. UKSTAS16 ===== */
  11603. /**
  11604. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  11605. * \brief UKSTAS16 (SIMD 16-bit Unsigned Saturating Straight Addition & Subtraction)
  11606. * \details
  11607. * **Type**: SIMD
  11608. *
  11609. * **Syntax**:\n
  11610. * ~~~
  11611. * UKSTAS16 Rd, Rs1, Rs2
  11612. * ~~~
  11613. *
  11614. * **Purpose**:\n
  11615. * Do one 16-bit unsigned integer element saturating addition and one 16-bit unsigned
  11616. * integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from
  11617. * corresponding positions in 32-bit chunks.
  11618. *
  11619. * **Description**:\n
  11620. * This instruction adds the 16-bit unsigned integer element in [31:16] of 32-bit chunks in
  11621. * Rs1 with the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it
  11622. * subtracts the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit
  11623. * unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the 16-bit
  11624. * unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1.
  11625. * The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit
  11626. * chunks in Rd for subtraction.
  11627. *
  11628. * **Operations**:\n
  11629. * ~~~
  11630. * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
  11631. * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
  11632. * if (res1 > (2^16)-1) {
  11633. * res1 = (2^16)-1;
  11634. * OV = 1;
  11635. * }
  11636. * if (res2 < 0) {
  11637. * res2 = 0;
  11638. * OV = 1;
  11639. * }
  11640. * Rd.W[x][31:16] = res1;
  11641. * Rd.W[x][15:0] = res2;
  11642. * for RV32, x=0
  11643. * for RV64, x=1...0
  11644. * ~~~
  11645. *
  11646. * \param [in] a unsigned long type of value stored in a
  11647. * \param [in] b unsigned long type of value stored in b
  11648. * \return value stored in unsigned long type
  11649. */
  11650. __STATIC_FORCEINLINE unsigned long __RV_UKSTAS16(unsigned long a, unsigned long b)
  11651. {
  11652. unsigned long result;
  11653. __ASM volatile("ukstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11654. return result;
  11655. }
  11656. /* ===== Inline Function End for 3.158. UKSTAS16 ===== */
  11657. /* ===== Inline Function Start for 3.159. UKSTSA16 ===== */
  11658. /**
  11659. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  11660. * \brief UKSTSA16 (SIMD 16-bit Unsigned Saturating Straight Subtraction & Addition)
  11661. * \details
  11662. * **Type**: SIMD
  11663. *
  11664. * **Syntax**:\n
  11665. * ~~~
  11666. * UKSTSA16 Rd, Rs1, Rs2
  11667. * ~~~
  11668. *
  11669. * **Purpose**:\n
  11670. * Do one 16-bit unsigned integer element saturating subtraction and one 16-bit unsigned
  11671. * integer element saturating addition in a 32-bit chunk simultaneously. Operands are from
  11672. * corresponding positions in 32-bit chunks.
  11673. *
  11674. * **Description**:\n
  11675. * This instruction subtracts the 16-bit unsigned integer element in [31:16] of 32-bit
  11676. * chunks in Rs2 from the 16-bit unsigned integer element in [31:16] of 32-bit chunks in Rs1; at the
  11677. * same time, it adds the 16-bit unsigned integer element in [15:0] of 32-bit chunks in Rs2 with the 16-
  11678. * bit unsigned integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the
  11679. * 16-bit unsigned number range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set
  11680. * to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of
  11681. * 32-bit chunks in Rd for addition.
  11682. *
  11683. * **Operations**:\n
  11684. * ~~~
  11685. * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
  11686. * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
  11687. * if (res1 < 0) {
  11688. * res1 = 0;
  11689. * OV = 1;
  11690. * } else if (res2 > (2^16)-1) {
  11691. * res2 = (2^16)-1;
  11692. * OV = 1;
  11693. * }
  11694. * Rd.W[x][31:16] = res1;
  11695. * Rd.W[x][15:0] = res2;
  11696. * for RV32, x=0
  11697. * for RV64, x=1...0
  11698. * ~~~
  11699. *
  11700. * \param [in] a unsigned long type of value stored in a
  11701. * \param [in] b unsigned long type of value stored in b
  11702. * \return value stored in unsigned long type
  11703. */
  11704. __STATIC_FORCEINLINE unsigned long __RV_UKSTSA16(unsigned long a, unsigned long b)
  11705. {
  11706. unsigned long result;
  11707. __ASM volatile("ukstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11708. return result;
  11709. }
  11710. /* ===== Inline Function End for 3.159. UKSTSA16 ===== */
  11711. /* ===== Inline Function Start for 3.160. UKSUB8 ===== */
  11712. /**
  11713. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  11714. * \brief UKSUB8 (SIMD 8-bit Unsigned Saturating Subtraction)
  11715. * \details
  11716. * **Type**: SIMD
  11717. *
  11718. * **Syntax**:\n
  11719. * ~~~
  11720. * UKSUB8 Rd, Rs1, Rs2
  11721. * ~~~
  11722. *
  11723. * **Purpose**:\n
  11724. * Do 8-bit unsigned integer elements saturating subtractions simultaneously.
  11725. *
  11726. * **Description**:\n
  11727. * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit
  11728. * unsigned integer elements in Rs1. If any of the results are beyond the 8-bit unsigned number range
  11729. * (0 <= RES <= 28-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
  11730. * written to Rd.
  11731. *
  11732. * **Operations**:\n
  11733. * ~~~
  11734. * res[x] = Rs1.B[x] - Rs2.B[x];
  11735. * if (res[x] < 0) {
  11736. * res[x] = 0;
  11737. * OV = 1;
  11738. * }
  11739. * Rd.B[x] = res[x];
  11740. * for RV32: x=3...0,
  11741. * for RV64: x=7...0
  11742. * ~~~
  11743. *
  11744. * \param [in] a unsigned long type of value stored in a
  11745. * \param [in] b unsigned long type of value stored in b
  11746. * \return value stored in unsigned long type
  11747. */
  11748. __STATIC_FORCEINLINE unsigned long __RV_UKSUB8(unsigned long a, unsigned long b)
  11749. {
  11750. unsigned long result;
  11751. __ASM volatile("uksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11752. return result;
  11753. }
  11754. /* ===== Inline Function End for 3.160. UKSUB8 ===== */
  11755. /* ===== Inline Function Start for 3.161. UKSUB16 ===== */
  11756. /**
  11757. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  11758. * \brief UKSUB16 (SIMD 16-bit Unsigned Saturating Subtraction)
  11759. * \details
  11760. * **Type**: SIMD
  11761. *
  11762. * **Syntax**:\n
  11763. * ~~~
  11764. * UKSUB16 Rd, Rs1, Rs2
  11765. * ~~~
  11766. *
  11767. * **Purpose**:\n
  11768. * Do 16-bit unsigned integer elements saturating subtractions simultaneously.
  11769. *
  11770. * **Description**:\n
  11771. * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit
  11772. * unsigned integer elements in Rs1. If any of the results are beyond the 16-bit unsigned number
  11773. * range (0 <= RES <= 2^16-1), they are saturated to the range and the OV bit is set to 1. The saturated
  11774. * results are written to Rd.
  11775. *
  11776. * **Operations**:\n
  11777. * ~~~
  11778. * res[x] = Rs1.H[x] - Rs2.H[x];
  11779. * if (res[x] < 0) {
  11780. * res[x] = 0;
  11781. * OV = 1;
  11782. * }
  11783. * Rd.H[x] = res[x];
  11784. * for RV32: x=1...0,
  11785. * for RV64: x=3...0
  11786. * ~~~
  11787. *
  11788. * \param [in] a unsigned long type of value stored in a
  11789. * \param [in] b unsigned long type of value stored in b
  11790. * \return value stored in unsigned long type
  11791. */
  11792. __STATIC_FORCEINLINE unsigned long __RV_UKSUB16(unsigned long a, unsigned long b)
  11793. {
  11794. unsigned long result;
  11795. __ASM volatile("uksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11796. return result;
  11797. }
  11798. /* ===== Inline Function End for 3.161. UKSUB16 ===== */
  11799. /* ===== Inline Function Start for 3.162. UKSUB64 ===== */
  11800. /**
  11801. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  11802. * \brief UKSUB64 (64-bit Unsigned Saturating Subtraction)
  11803. * \details
  11804. * **Type**: DSP (64-bit Profile)
  11805. *
  11806. * **Syntax**:\n
  11807. * ~~~
  11808. * UKSUB64 Rd, Rs1, Rs2
  11809. * ~~~
  11810. *
  11811. * **Purpose**:\n
  11812. * Perform a 64-bit signed integer subtraction. The result is saturated to the U64 range.
  11813. *
  11814. * **RV32 Description**:\n
  11815. * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of
  11816. * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers
  11817. * specified by Rs1(4,1). If the 64-bit result is beyond the U64 number range (0 <= U64 <= 2^64-1), it is
  11818. * saturated to the range and the OV bit is set to 1. The saturated result is then written to an even/odd
  11819. * pair of registers specified by Rd(4,1).
  11820. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  11821. * includes register 2d and 2d+1.
  11822. * The odd `2d+1` register of the pair contains the high 32-bit of the operand and the even `2d`
  11823. * register of the pair contains the low 32-bit of the operand.
  11824. *
  11825. * **RV64 Description**:\n
  11826. * This instruction subtracts the 64-bit unsigned integer of Rs2 from the 64-bit
  11827. * unsigned integer of an even/odd pair of Rs1. If the 64-bit result is beyond the U64 number range (0 <=
  11828. * U64 <= 2^64-1), it is saturated to the range and the OV bit is set to 1. The saturated result is then written
  11829. * to Rd.
  11830. *
  11831. * **Operations**:\n
  11832. * ~~~
  11833. * * RV32:
  11834. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  11835. * a_L = CONCAT(Rs1(4,1),1'b0); a_H = CONCAT(Rs1(4,1),1'b1);
  11836. * b_L = CONCAT(Rs2(4,1),1'b0); b_H = CONCAT(Rs2(4,1),1'b1);
  11837. * result = R[a_H].R[a_L] - R[b_H].R[b_L];
  11838. * if (result < 0) {
  11839. * result = 0; OV = 1;
  11840. * }
  11841. * R[t_H].R[t_L] = result;
  11842. * * RV64
  11843. * result = Rs1 - Rs2;
  11844. * if (result < 0) {
  11845. * result = 0; OV = 1;
  11846. * }
  11847. * Rd = result;
  11848. * ~~~
  11849. *
  11850. * \param [in] a unsigned long long type of value stored in a
  11851. * \param [in] b unsigned long long type of value stored in b
  11852. * \return value stored in unsigned long long type
  11853. */
  11854. __STATIC_FORCEINLINE unsigned long long __RV_UKSUB64(unsigned long long a, unsigned long long b)
  11855. {
  11856. unsigned long long result;
  11857. __ASM volatile("uksub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11858. return result;
  11859. }
  11860. /* ===== Inline Function End for 3.162. UKSUB64 ===== */
  11861. /* ===== Inline Function Start for 3.163. UKSUBH ===== */
  11862. /**
  11863. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q15_SAT_ALU
  11864. * \brief UKSUBH (Unsigned Subtraction with U16 Saturation)
  11865. * \details
  11866. * **Type**: DSP
  11867. *
  11868. * **Syntax**:\n
  11869. * ~~~
  11870. * UKSUBH Rd, Rs1, Rs2
  11871. * ~~~
  11872. *
  11873. * **Purpose**:\n
  11874. * Subtract the unsigned lower 32-bit content of two registers with U16 saturation.
  11875. *
  11876. * **Description**:\n
  11877. * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit
  11878. * content of Rs1. And the result is saturated to the 16-bit unsigned integer range of [0, 2^16-1] and then
  11879. * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
  11880. *
  11881. * **Operations**:\n
  11882. * ~~~
  11883. * tmp = Rs1.W[0] - Rs2.W[0];
  11884. * if (tmp > (2^16)-1) {
  11885. * tmp = (2^16)-1;
  11886. * OV = 1;
  11887. * }
  11888. * else if (tmp < 0) {
  11889. * tmp = 0;
  11890. * OV = 1;
  11891. * }
  11892. * Rd = SE(tmp[15:0]);
  11893. * ~~~
  11894. *
  11895. * \param [in] a unsigned int type of value stored in a
  11896. * \param [in] b unsigned int type of value stored in b
  11897. * \return value stored in unsigned long type
  11898. */
  11899. __STATIC_FORCEINLINE unsigned long __RV_UKSUBH(unsigned int a, unsigned int b)
  11900. {
  11901. unsigned long result;
  11902. __ASM volatile("uksubh %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11903. return result;
  11904. }
  11905. /* ===== Inline Function End for 3.163. UKSUBH ===== */
  11906. /* ===== Inline Function Start for 3.164. UKSUBW ===== */
  11907. /**
  11908. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_Q31_SAT_ALU
  11909. * \brief UKSUBW (Unsigned Subtraction with U32 Saturation)
  11910. * \details
  11911. * **Type**: DSP
  11912. *
  11913. * **Syntax**:\n
  11914. * ~~~
  11915. * UKSUBW Rd, Rs1, Rs2
  11916. * ~~~
  11917. *
  11918. * **Purpose**:\n
  11919. * Subtract the unsigned lower 32-bit content of two registers with unsigned 32-bit
  11920. * saturation.
  11921. *
  11922. * **Description**:\n
  11923. * The unsigned lower 32-bit content of Rs2 is subtracted from the unsigned lower 32-bit
  11924. * content of Rs1. And the result is saturated to the 32-bit unsigned integer range of [0, 2^32-1] and then
  11925. * sign-extended and written to Rd. If saturation happens, this instruction sets the OV flag.
  11926. *
  11927. * **Operations**:\n
  11928. * ~~~
  11929. * tmp = Rs1.W[0] - Rs2.W[0];
  11930. * if (tmp < 0) {
  11931. * tmp[31:0] = 0;
  11932. * OV = 1;
  11933. * }
  11934. * Rd = tmp[31:0]; // RV32
  11935. * Rd = SE(tmp[31:0]); // RV64
  11936. * ~~~
  11937. *
  11938. * \param [in] a unsigned int type of value stored in a
  11939. * \param [in] b unsigned int type of value stored in b
  11940. * \return value stored in unsigned long type
  11941. */
  11942. __STATIC_FORCEINLINE unsigned long __RV_UKSUBW(unsigned int a, unsigned int b)
  11943. {
  11944. unsigned long result;
  11945. __ASM volatile("uksubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  11946. return result;
  11947. }
  11948. /* ===== Inline Function End for 3.164. UKSUBW ===== */
  11949. /* ===== Inline Function Start for 3.165. UMAR64 ===== */
  11950. /**
  11951. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  11952. * \brief UMAR64 (Unsigned Multiply and Add to 64-Bit Data)
  11953. * \details
  11954. * **Type**: DSP (64-bit Profile)
  11955. *
  11956. * **Syntax**:\n
  11957. * ~~~
  11958. * UMAR64 Rd, Rs1, Rs2
  11959. * ~~~
  11960. *
  11961. * **Purpose**:\n
  11962. * Multiply the 32-bit unsigned elements in two registers and add the 64-bit multiplication
  11963. * results to the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64). The result is
  11964. * written back to the pair of registers (RV32) or a register (RV64).
  11965. *
  11966. * **RV32 Description**:\n
  11967. * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
  11968. * adds the 64-bit multiplication result to the 64-bit unsigned data of an even/odd pair of registers
  11969. * specified by Rd(4,1). The addition result is written back to the even/odd pair of registers specified by
  11970. * Rd(4,1).
  11971. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  11972. * includes register 2d and 2d+1.
  11973. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  11974. * of the pair contains the low 32-bit of the result.
  11975. *
  11976. * **RV64 Description**:\n
  11977. * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
  11978. * It adds the 64-bit multiplication results to the 64-bit unsigned data of Rd. The addition result is
  11979. * written back to Rd.
  11980. *
  11981. * **Operations**:\n
  11982. * ~~~
  11983. * * RV32:
  11984. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  11985. * R[t_H].R[t_L] = R[t_H].R[t_L] + (Rs1 * Rs2);
  11986. * * RV64:
  11987. * Rd = Rd + (Rs1.W[0] u* Rs2.W[0]) + (Rs1.W[1] u* Rs2.W[1]);
  11988. * ~~~
  11989. *
  11990. * \param [in] t unsigned long long type of value stored in t
  11991. * \param [in] a unsigned long type of value stored in a
  11992. * \param [in] b unsigned long type of value stored in b
  11993. * \return value stored in unsigned long long type
  11994. */
  11995. __STATIC_FORCEINLINE unsigned long long __RV_UMAR64(unsigned long long t, unsigned long a, unsigned long b)
  11996. {
  11997. __ASM volatile("umar64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  11998. return t;
  11999. }
  12000. /* ===== Inline Function End for 3.165. UMAR64 ===== */
  12001. /* ===== Inline Function Start for 3.166. UMAQA ===== */
  12002. /**
  12003. * \ingroup NMSIS_Core_DSP_Intrinsic_8B_MULT_32B_ADD
  12004. * \brief UMAQA (Unsigned Multiply Four Bytes with 32- bit Adds)
  12005. * \details
  12006. * **Type**: DSP
  12007. *
  12008. * **Syntax**:\n
  12009. * ~~~
  12010. * UMAQA Rd, Rs1, Rs2
  12011. * ~~~
  12012. *
  12013. * **Purpose**:\n
  12014. * Do four unsigned 8-bit multiplications from 32-bit chunks of two registers; and then adds
  12015. * the four 16-bit results and the content of corresponding 32-bit chunks of a third register together.
  12016. *
  12017. * **Description**:\n
  12018. * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four
  12019. * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
  12020. * unsigned content of the corresponding 32-bit chunks of Rd. The final results are written back to the
  12021. * corresponding 32-bit chunks in Rd.
  12022. *
  12023. * **Operations**:\n
  12024. * ~~~
  12025. * res[x] = Rd.W[x] + (Rs1.W[x].B[3] u* Rs2.W[x].B[3]) +
  12026. * (Rs1.W[x].B[2] u* Rs2.W[x].B[2]) + (Rs1.W[x].B[1] u* Rs2.W[x].B[1]) +
  12027. * (Rs1.W[x].B[0] u* Rs2.W[x].B[0]);
  12028. * Rd.W[x] = res[x];
  12029. * for RV32: x=0,
  12030. * for RV64: x=1...0
  12031. * ~~~
  12032. *
  12033. * \param [in] t unsigned long type of value stored in t
  12034. * \param [in] a unsigned long type of value stored in a
  12035. * \param [in] b unsigned long type of value stored in b
  12036. * \return value stored in unsigned long type
  12037. */
  12038. __STATIC_FORCEINLINE unsigned long __RV_UMAQA(unsigned long t, unsigned long a, unsigned long b)
  12039. {
  12040. __ASM volatile("umaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  12041. return t;
  12042. }
  12043. /* ===== Inline Function End for 3.166. UMAQA ===== */
  12044. /* ===== Inline Function Start for 3.167. UMAX8 ===== */
  12045. /**
  12046. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  12047. * \brief UMAX8 (SIMD 8-bit Unsigned Maximum)
  12048. * \details
  12049. * **Type**: SIMD
  12050. *
  12051. * **Syntax**:\n
  12052. * ~~~
  12053. * UMAX8 Rd, Rs1, Rs2
  12054. * ~~~
  12055. *
  12056. * **Purpose**:\n
  12057. * Do 8-bit unsigned integer elements finding maximum operations simultaneously.
  12058. *
  12059. * **Description**:\n
  12060. * This instruction compares the 8-bit unsigned integer elements in Rs1 with the four 8-
  12061. * bit unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
  12062. * two selected results are written to Rd.
  12063. *
  12064. * **Operations**:\n
  12065. * ~~~
  12066. * Rd.B[x] = (Rs1.B[x] >u Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
  12067. * for RV32: x=3...0,
  12068. * for RV64: x=7...0
  12069. * ~~~
  12070. *
  12071. * \param [in] a unsigned long type of value stored in a
  12072. * \param [in] b unsigned long type of value stored in b
  12073. * \return value stored in unsigned long type
  12074. */
  12075. __STATIC_FORCEINLINE unsigned long __RV_UMAX8(unsigned long a, unsigned long b)
  12076. {
  12077. unsigned long result;
  12078. __ASM volatile("umax8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12079. return result;
  12080. }
  12081. /* ===== Inline Function End for 3.167. UMAX8 ===== */
  12082. /* ===== Inline Function Start for 3.168. UMAX16 ===== */
  12083. /**
  12084. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  12085. * \brief UMAX16 (SIMD 16-bit Unsigned Maximum)
  12086. * \details
  12087. * **Type**: SIMD
  12088. *
  12089. * **Syntax**:\n
  12090. * ~~~
  12091. * UMAX16 Rd, Rs1, Rs2
  12092. * ~~~
  12093. *
  12094. * **Purpose**:\n
  12095. * Do 16-bit unsigned integer elements finding maximum operations simultaneously.
  12096. *
  12097. * **Description**:\n
  12098. * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
  12099. * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
  12100. * selected results are written to Rd.
  12101. *
  12102. * **Operations**:\n
  12103. * ~~~
  12104. * Rd.H[x] = (Rs1.H[x] >u Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
  12105. * for RV32: x=1...0,
  12106. * for RV64: x=3...0
  12107. * ~~~
  12108. *
  12109. * \param [in] a unsigned long type of value stored in a
  12110. * \param [in] b unsigned long type of value stored in b
  12111. * \return value stored in unsigned long type
  12112. */
  12113. __STATIC_FORCEINLINE unsigned long __RV_UMAX16(unsigned long a, unsigned long b)
  12114. {
  12115. unsigned long result;
  12116. __ASM volatile("umax16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12117. return result;
  12118. }
  12119. /* ===== Inline Function End for 3.168. UMAX16 ===== */
  12120. /* ===== Inline Function Start for 3.169. UMIN8 ===== */
  12121. /**
  12122. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MISC
  12123. * \brief UMIN8 (SIMD 8-bit Unsigned Minimum)
  12124. * \details
  12125. * **Type**: SIMD
  12126. *
  12127. * **Syntax**:\n
  12128. * ~~~
  12129. * UMIN8 Rd, Rs1, Rs2
  12130. * ~~~
  12131. *
  12132. * **Purpose**:\n
  12133. * Do 8-bit unsigned integer elements finding minimum operations simultaneously.
  12134. *
  12135. * **Description**:\n
  12136. * This instruction compares the 8-bit unsigned integer elements in Rs1 with the 8-bit
  12137. * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
  12138. * selected results are written to Rd.
  12139. *
  12140. * **Operations**:\n
  12141. * ~~~
  12142. * Rd.B[x] = (Rs1.B[x] <u Rs2.B[x])? Rs1.B[x] : Rs2.B[x];
  12143. * for RV32: x=3...0,
  12144. * for RV64: x=7...0
  12145. * ~~~
  12146. *
  12147. * \param [in] a unsigned long type of value stored in a
  12148. * \param [in] b unsigned long type of value stored in b
  12149. * \return value stored in unsigned long type
  12150. */
  12151. __STATIC_FORCEINLINE unsigned long __RV_UMIN8(unsigned long a, unsigned long b)
  12152. {
  12153. unsigned long result;
  12154. __ASM volatile("umin8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12155. return result;
  12156. }
  12157. /* ===== Inline Function End for 3.169. UMIN8 ===== */
  12158. /* ===== Inline Function Start for 3.170. UMIN16 ===== */
  12159. /**
  12160. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MISC
  12161. * \brief UMIN16 (SIMD 16-bit Unsigned Minimum)
  12162. * \details
  12163. * **Type**: SIMD
  12164. *
  12165. * **Syntax**:\n
  12166. * ~~~
  12167. * UMIN16 Rd, Rs1, Rs2
  12168. * ~~~
  12169. *
  12170. * **Purpose**:\n
  12171. * Do 16-bit unsigned integer elements finding minimum operations simultaneously.
  12172. *
  12173. * **Description**:\n
  12174. * This instruction compares the 16-bit unsigned integer elements in Rs1 with the 16-bit
  12175. * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
  12176. * selected results are written to Rd.
  12177. *
  12178. * **Operations**:\n
  12179. * ~~~
  12180. * Rd.H[x] = (Rs1.H[x] <u Rs2.H[x])? Rs1.H[x] : Rs2.H[x];
  12181. * for RV32: x=1...0,
  12182. * for RV64: x=3...0
  12183. * ~~~
  12184. *
  12185. * \param [in] a unsigned long type of value stored in a
  12186. * \param [in] b unsigned long type of value stored in b
  12187. * \return value stored in unsigned long type
  12188. */
  12189. __STATIC_FORCEINLINE unsigned long __RV_UMIN16(unsigned long a, unsigned long b)
  12190. {
  12191. unsigned long result;
  12192. __ASM volatile("umin16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12193. return result;
  12194. }
  12195. /* ===== Inline Function End for 3.170. UMIN16 ===== */
  12196. /* ===== Inline Function Start for 3.171. UMSR64 ===== */
  12197. /**
  12198. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_MULT_64B_ADDSUB
  12199. * \brief UMSR64 (Unsigned Multiply and Subtract from 64-Bit Data)
  12200. * \details
  12201. * **Type**: DSP (64-bit Profile)
  12202. *
  12203. * **Syntax**:\n
  12204. * ~~~
  12205. * UMSR64 Rd, Rs1, Rs2
  12206. * ~~~
  12207. *
  12208. * **Purpose**:\n
  12209. * Multiply the 32-bit unsigned elements in two registers and subtract the 64-bit
  12210. * multiplication results from the 64-bit unsigned data of a pair of registers (RV32) or a register (RV64).
  12211. * The result is written back to the pair of registers (RV32) or a register (RV64).
  12212. *
  12213. * **RV32 Description**:\n
  12214. * This instruction multiplies the 32-bit unsigned data of Rs1 with that of Rs2. It
  12215. * subtracts the 64-bit multiplication result from the 64-bit unsigned data of an even/odd pair of
  12216. * registers specified by Rd(4,1). The subtraction result is written back to the even/odd pair of registers
  12217. * specified by Rd(4,1).
  12218. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  12219. * includes register 2d and 2d+1.
  12220. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  12221. * of the pair contains the low 32-bit of the result.
  12222. *
  12223. * **RV64 Description**:\n
  12224. * This instruction multiplies the 32-bit unsigned elements of Rs1 with that of Rs2.
  12225. * It subtracts the 64-bit multiplication results from the 64-bit unsigned data of Rd. The subtraction
  12226. * result is written back to Rd.
  12227. *
  12228. * **Operations**:\n
  12229. * ~~~
  12230. * * RV32:
  12231. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  12232. * R[t_H].R[t_L] = R[t_H].R[t_L] - (Rs1 * Rs2);
  12233. * * RV64:
  12234. * Rd = Rd - (Rs1.W[0] u* Rs2.W[0]) - (Rs1.W[1] u* Rs2.W[1]);
  12235. * ~~~
  12236. *
  12237. * \param [in] t unsigned long long type of value stored in t
  12238. * \param [in] a unsigned long type of value stored in a
  12239. * \param [in] b unsigned long type of value stored in b
  12240. * \return value stored in unsigned long long type
  12241. */
  12242. __STATIC_FORCEINLINE unsigned long long __RV_UMSR64(unsigned long long t, unsigned long a, unsigned long b)
  12243. {
  12244. __ASM volatile("umsr64 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  12245. return t;
  12246. }
  12247. /* ===== Inline Function End for 3.171. UMSR64 ===== */
  12248. /* ===== Inline Function Start for 3.172.1. UMUL8 ===== */
  12249. /**
  12250. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
  12251. * \brief UMUL8 (SIMD Unsigned 8-bit Multiply)
  12252. * \details
  12253. * **Type**: SIMD
  12254. *
  12255. * **Syntax**:\n
  12256. * ~~~
  12257. * UMUL8 Rd, Rs1, Rs2
  12258. * UMULX8 Rd, Rs1, Rs2
  12259. * ~~~
  12260. *
  12261. * **Purpose**:\n
  12262. * Do unsigned 8-bit multiplications and generate four 16-bit results simultaneously.
  12263. *
  12264. * **RV32 Description**:\n
  12265. * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
  12266. * with the corresponding unsigned 8-bit data elements of Rs2.
  12267. * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
  12268. * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
  12269. * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
  12270. * elements of Rs2.
  12271. * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
  12272. * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  12273. * includes register 2d and 2d+1.
  12274. * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
  12275. * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
  12276. * part of Rs1.
  12277. *
  12278. * **RV64 Description**:\n
  12279. * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
  12280. * with the corresponding unsigned 8-bit data elements of Rs2.
  12281. * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
  12282. * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
  12283. * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
  12284. * elements of Rs2.
  12285. * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
  12286. * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
  12287. * the bottom part of Rs1.
  12288. *
  12289. * **Operations**:\n
  12290. * ~~~
  12291. * * RV32:
  12292. * if (is `UMUL8`) {
  12293. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  12294. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  12295. * } else if (is `UMULX8`) {
  12296. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  12297. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  12298. * }
  12299. * rest[x/2] = op1t[x/2] u* op2t[x/2];
  12300. * resb[x/2] = op1b[x/2] u* op2b[x/2];
  12301. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  12302. * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
  12303. * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
  12304. * x = 0 and 2
  12305. * * RV64:
  12306. * if (is `UMUL8`) {
  12307. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  12308. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  12309. * } else if (is `UMULX8`) {
  12310. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  12311. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  12312. * }
  12313. * rest[x/2] = op1t[x/2] u* op2t[x/2];
  12314. * resb[x/2] = op1b[x/2] u* op2b[x/2];
  12315. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  12316. * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
  12317. * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; x = 0 and 2
  12318. * ~~~
  12319. *
  12320. * \param [in] a unsigned int type of value stored in a
  12321. * \param [in] b unsigned int type of value stored in b
  12322. * \return value stored in unsigned long long type
  12323. */
  12324. __STATIC_FORCEINLINE unsigned long long __RV_UMUL8(unsigned int a, unsigned int b)
  12325. {
  12326. unsigned long long result;
  12327. __ASM volatile("umul8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12328. return result;
  12329. }
  12330. /* ===== Inline Function End for 3.172.1. UMUL8 ===== */
  12331. /* ===== Inline Function Start for 3.172.2. UMULX8 ===== */
  12332. /**
  12333. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_MULTIPLY
  12334. * \brief UMULX8 (SIMD Unsigned Crossed 8-bit Multiply)
  12335. * \details
  12336. * **Type**: SIMD
  12337. *
  12338. * **Syntax**:\n
  12339. * ~~~
  12340. * UMUL8 Rd, Rs1, Rs2
  12341. * UMULX8 Rd, Rs1, Rs2
  12342. * ~~~
  12343. *
  12344. * **Purpose**:\n
  12345. * Do unsigned 8-bit multiplications and generate four 16-bit results simultaneously.
  12346. *
  12347. * **RV32 Description**:\n
  12348. * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
  12349. * with the corresponding unsigned 8-bit data elements of Rs2.
  12350. * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
  12351. * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
  12352. * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
  12353. * elements of Rs2.
  12354. * The four 16-bit results are then written into an even/odd pair of registers specified by Rd(4,1).
  12355. * Rd(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  12356. * includes register 2d and 2d+1.
  12357. * The odd `2d+1` register of the pair contains the two 16-bit results calculated from the top part of
  12358. * Rs1 and the even `2d` register of the pair contains the two 16-bit results calculated from the bottom
  12359. * part of Rs1.
  12360. *
  12361. * **RV64 Description**:\n
  12362. * For the `UMUL8` instruction, multiply the unsigned 8-bit data elements of Rs1
  12363. * with the corresponding unsigned 8-bit data elements of Rs2.
  12364. * For the `UMULX8` instruction, multiply the first and second unsigned 8-bit data elements of Rs1
  12365. * with the second and first unsigned 8-bit data elements of Rs2. At the same time, multiply the third
  12366. * and fourth unsigned 8-bit data elements of Rs1 with the fourth and third unsigned 8-bit data
  12367. * elements of Rs2.
  12368. * The four 16-bit results are then written into Rd. The Rd.W[1] contains the two 16-bit results
  12369. * calculated from the top part of Rs1 and the Rd.W[0] contains the two 16-bit results calculated from
  12370. * the bottom part of Rs1.
  12371. *
  12372. * **Operations**:\n
  12373. * ~~~
  12374. * * RV32:
  12375. * if (is `UMUL8`) {
  12376. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  12377. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  12378. * } else if (is `UMULX8`) {
  12379. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  12380. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  12381. * }
  12382. * rest[x/2] = op1t[x/2] u* op2t[x/2];
  12383. * resb[x/2] = op1b[x/2] u* op2b[x/2];
  12384. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  12385. * R[t_H].H[1] = rest[1]; R[t_H].H[0] = resb[1];
  12386. * R[t_L].H[1] = rest[0]; R[t_L].H[0] = resb[0];
  12387. * x = 0 and 2
  12388. * * RV64:
  12389. * if (is `UMUL8`) {
  12390. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x+1]; // top
  12391. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x]; // bottom
  12392. * } else if (is `UMULX8`) {
  12393. * op1t[x/2] = Rs1.B[x+1]; op2t[x/2] = Rs2.B[x]; // Rs1 top
  12394. * op1b[x/2] = Rs1.B[x]; op2b[x/2] = Rs2.B[x+1]; // Rs1 bottom
  12395. * }
  12396. * rest[x/2] = op1t[x/2] u* op2t[x/2];
  12397. * resb[x/2] = op1b[x/2] u* op2b[x/2];
  12398. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  12399. * Rd.W[1].H[1] = rest[1]; Rd.W[1].H[0] = resb[1];
  12400. * Rd.W[0].H[1] = rest[0]; Rd.W[0].H[0] = resb[0]; x = 0 and 2
  12401. * ~~~
  12402. *
  12403. * \param [in] a unsigned int type of value stored in a
  12404. * \param [in] b unsigned int type of value stored in b
  12405. * \return value stored in unsigned long long type
  12406. */
  12407. __STATIC_FORCEINLINE unsigned long long __RV_UMULX8(unsigned int a, unsigned int b)
  12408. {
  12409. unsigned long long result;
  12410. __ASM volatile("umulx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12411. return result;
  12412. }
  12413. /* ===== Inline Function End for 3.172.2. UMULX8 ===== */
  12414. /* ===== Inline Function Start for 3.173.1. UMUL16 ===== */
  12415. /**
  12416. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
  12417. * \brief UMUL16 (SIMD Unsigned 16-bit Multiply)
  12418. * \details
  12419. * **Type**: SIMD
  12420. *
  12421. * **Syntax**:\n
  12422. * ~~~
  12423. * UMUL16 Rd, Rs1, Rs2
  12424. * UMULX16 Rd, Rs1, Rs2
  12425. * ~~~
  12426. *
  12427. * **Purpose**:\n
  12428. * Do unsigned 16-bit multiplications and generate two 32-bit results simultaneously.
  12429. *
  12430. * **RV32 Description**:\n
  12431. * For the `UMUL16` instruction, multiply the top 16-bit U16 content of Rs1 with
  12432. * the top 16-bit U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1
  12433. * with the bottom 16-bit U16 content of Rs2.
  12434. * For the `UMULX16` instruction, multiply the top 16-bit U16 content of Rs1 with the bottom 16-bit
  12435. * U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1 with the top 16-
  12436. * bit U16 content of Rs2.
  12437. * The two U32 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
  12438. * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
  12439. * register 2d and 2d+1.
  12440. * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
  12441. * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
  12442. *
  12443. * **RV64 Description**:\n
  12444. * For the `UMUL16` instruction, multiply the top 16-bit U16 content of the lower
  12445. * 32-bit word in Rs1 with the top 16-bit U16 content of the lower 32-bit word in Rs2. At the same time,
  12446. * multiply the bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the bottom 16-bit U16
  12447. * content of the lower 32-bit word in Rs2.
  12448. * For the `UMULX16` instruction, multiply the top 16-bit U16 content of the lower 32-bit word in Rs1
  12449. * with the bottom 16-bit U16 content of the lower 32-bit word in Rs2. At the same time, multiply the
  12450. * bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the top 16-bit U16 content of the
  12451. * lower 32-bit word in Rs2.
  12452. * The two 32-bit U32 results are then written into Rd. The result calculated from the top 16-bit of the
  12453. * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
  12454. * the lower 32-bit word in Rs1 is written to Rd.W[0]
  12455. *
  12456. * **Operations**:\n
  12457. * ~~~
  12458. * * RV32:
  12459. * if (is `UMUL16`) {
  12460. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  12461. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  12462. * } else if (is `UMULX16`) {
  12463. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  12464. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  12465. * }
  12466. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  12467. * res = aop u* bop;
  12468. * }
  12469. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  12470. * R[t_H] = rest;
  12471. * R[t_L] = resb;
  12472. * * RV64:
  12473. * if (is `UMUL16`) {
  12474. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  12475. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  12476. * } else if (is `UMULX16`) {
  12477. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  12478. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  12479. * }
  12480. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  12481. * res = aop u* bop;
  12482. * }
  12483. * Rd.W[1] = rest;
  12484. * Rd.W[0] = resb;
  12485. * ~~~
  12486. *
  12487. * \param [in] a unsigned int type of value stored in a
  12488. * \param [in] b unsigned int type of value stored in b
  12489. * \return value stored in unsigned long long type
  12490. */
  12491. __STATIC_FORCEINLINE unsigned long long __RV_UMUL16(unsigned int a, unsigned int b)
  12492. {
  12493. unsigned long long result;
  12494. __ASM volatile("umul16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12495. return result;
  12496. }
  12497. /* ===== Inline Function End for 3.173.1. UMUL16 ===== */
  12498. /* ===== Inline Function Start for 3.173.2. UMULX16 ===== */
  12499. /**
  12500. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_MULTIPLY
  12501. * \brief UMULX16 (SIMD Unsigned Crossed 16-bit Multiply)
  12502. * \details
  12503. * **Type**: SIMD
  12504. *
  12505. * **Syntax**:\n
  12506. * ~~~
  12507. * UMUL16 Rd, Rs1, Rs2
  12508. * UMULX16 Rd, Rs1, Rs2
  12509. * ~~~
  12510. *
  12511. * **Purpose**:\n
  12512. * Do unsigned 16-bit multiplications and generate two 32-bit results simultaneously.
  12513. *
  12514. * **RV32 Description**:\n
  12515. * For the `UMUL16` instruction, multiply the top 16-bit U16 content of Rs1 with
  12516. * the top 16-bit U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1
  12517. * with the bottom 16-bit U16 content of Rs2.
  12518. * For the `UMULX16` instruction, multiply the top 16-bit U16 content of Rs1 with the bottom 16-bit
  12519. * U16 content of Rs2. At the same time, multiply the bottom 16-bit U16 content of Rs1 with the top 16-
  12520. * bit U16 content of Rs2.
  12521. * The two U32 results are then written into an even/odd pair of registers specified by Rd(4,1). Rd(4,1),
  12522. * i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair includes
  12523. * register 2d and 2d+1.
  12524. * The odd `2d+1` register of the pair contains the 32-bit result calculated from the top part of Rs1 and
  12525. * the even `2d` register of the pair contains the 32-bit result calculated from the bottom part of Rs1.
  12526. *
  12527. * **RV64 Description**:\n
  12528. * For the `UMUL16` instruction, multiply the top 16-bit U16 content of the lower
  12529. * 32-bit word in Rs1 with the top 16-bit U16 content of the lower 32-bit word in Rs2. At the same time,
  12530. * multiply the bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the bottom 16-bit U16
  12531. * content of the lower 32-bit word in Rs2.
  12532. * For the `UMULX16` instruction, multiply the top 16-bit U16 content of the lower 32-bit word in Rs1
  12533. * with the bottom 16-bit U16 content of the lower 32-bit word in Rs2. At the same time, multiply the
  12534. * bottom 16-bit U16 content of the lower 32-bit word in Rs1 with the top 16-bit U16 content of the
  12535. * lower 32-bit word in Rs2.
  12536. * The two 32-bit U32 results are then written into Rd. The result calculated from the top 16-bit of the
  12537. * lower 32-bit word in Rs1 is written to Rd.W[1]. And the result calculated from the bottom 16-bit of
  12538. * the lower 32-bit word in Rs1 is written to Rd.W[0]
  12539. *
  12540. * **Operations**:\n
  12541. * ~~~
  12542. * * RV32:
  12543. * if (is `UMUL16`) {
  12544. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  12545. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  12546. * } else if (is `UMULX16`) {
  12547. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  12548. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  12549. * }
  12550. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  12551. * res = aop u* bop;
  12552. * }
  12553. * t_L = CONCAT(Rd(4,1),1'b0); t_H = CONCAT(Rd(4,1),1'b1);
  12554. * R[t_H] = rest;
  12555. * R[t_L] = resb;
  12556. * * RV64:
  12557. * if (is `UMUL16`) {
  12558. * op1t = Rs1.H[1]; op2t = Rs2.H[1]; // top
  12559. * op1b = Rs1.H[0]; op2b = Rs2.H[0]; // bottom
  12560. * } else if (is `UMULX16`) {
  12561. * op1t = Rs1.H[1]; op2t = Rs2.H[0]; // Rs1 top
  12562. * op1b = Rs1.H[0]; op2b = Rs2.H[1]; // Rs1 bottom
  12563. * }
  12564. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  12565. * res = aop u* bop;
  12566. * }
  12567. * Rd.W[1] = rest;
  12568. * Rd.W[0] = resb;
  12569. * ~~~
  12570. *
  12571. * \param [in] a unsigned int type of value stored in a
  12572. * \param [in] b unsigned int type of value stored in b
  12573. * \return value stored in unsigned long long type
  12574. */
  12575. __STATIC_FORCEINLINE unsigned long long __RV_UMULX16(unsigned int a, unsigned int b)
  12576. {
  12577. unsigned long long result;
  12578. __ASM volatile("umulx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12579. return result;
  12580. }
  12581. /* ===== Inline Function End for 3.173.2. UMULX16 ===== */
  12582. /* ===== Inline Function Start for 3.174. URADD8 ===== */
  12583. /**
  12584. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  12585. * \brief URADD8 (SIMD 8-bit Unsigned Halving Addition)
  12586. * \details
  12587. * **Type**: SIMD
  12588. *
  12589. * **Syntax**:\n
  12590. * ~~~
  12591. * URADD8 Rd, Rs1, Rs2
  12592. * ~~~
  12593. *
  12594. * **Purpose**:\n
  12595. * Do 8-bit unsigned integer element additions simultaneously. The results are halved to
  12596. * avoid overflow or saturation.
  12597. *
  12598. * **Description**:\n
  12599. * This instruction adds the 8-bit unsigned integer elements in Rs1 with the 8-bit
  12600. * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
  12601. * written to Rd.
  12602. *
  12603. * **Examples**:\n
  12604. * ~~~
  12605. * * Ra = 0x7F, Rb = 0x7F, Rt = 0x7F
  12606. * * Ra = 0x80, Rb = 0x80, Rt = 0x80
  12607. * * Ra = 0x40, Rb = 0x80, Rt = 0x60
  12608. * ~~~
  12609. *
  12610. * **Operations**:\n
  12611. * ~~~
  12612. * Rd.B[x] = (Rs1.B[x] + Rs2.B[x]) u>> 1;
  12613. * for RV32: x=3...0,
  12614. * for RV64: x=7...0
  12615. * ~~~
  12616. *
  12617. * \param [in] a unsigned long type of value stored in a
  12618. * \param [in] b unsigned long type of value stored in b
  12619. * \return value stored in unsigned long type
  12620. */
  12621. __STATIC_FORCEINLINE unsigned long __RV_URADD8(unsigned long a, unsigned long b)
  12622. {
  12623. unsigned long result;
  12624. __ASM volatile("uradd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12625. return result;
  12626. }
  12627. /* ===== Inline Function End for 3.174. URADD8 ===== */
  12628. /* ===== Inline Function Start for 3.175. URADD16 ===== */
  12629. /**
  12630. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  12631. * \brief URADD16 (SIMD 16-bit Unsigned Halving Addition)
  12632. * \details
  12633. * **Type**: SIMD
  12634. *
  12635. * **Syntax**:\n
  12636. * ~~~
  12637. * URADD16 Rd, Rs1, Rs2
  12638. * ~~~
  12639. *
  12640. * **Purpose**:\n
  12641. * Do 16-bit unsigned integer element additions simultaneously. The results are halved to
  12642. * avoid overflow or saturation.
  12643. *
  12644. * **Description**:\n
  12645. * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit
  12646. * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
  12647. * written to Rd.
  12648. *
  12649. * **Examples**:\n
  12650. * ~~~
  12651. * * Ra = 0x7FFF, Rb = 0x7FFF Rt = 0x7FFF
  12652. * * Ra = 0x8000, Rb = 0x8000 Rt = 0x8000
  12653. * * Ra = 0x4000, Rb = 0x8000 Rt = 0x6000
  12654. * ~~~
  12655. *
  12656. * **Operations**:\n
  12657. * ~~~
  12658. * Rd.H[x] = (Rs1.H[x] + Rs2.H[x]) u>> 1;
  12659. * for RV32: x=1...0,
  12660. * for RV64: x=3...0
  12661. * ~~~
  12662. *
  12663. * \param [in] a unsigned long type of value stored in a
  12664. * \param [in] b unsigned long type of value stored in b
  12665. * \return value stored in unsigned long type
  12666. */
  12667. __STATIC_FORCEINLINE unsigned long __RV_URADD16(unsigned long a, unsigned long b)
  12668. {
  12669. unsigned long result;
  12670. __ASM volatile("uradd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12671. return result;
  12672. }
  12673. /* ===== Inline Function End for 3.175. URADD16 ===== */
  12674. /* ===== Inline Function Start for 3.176. URADD64 ===== */
  12675. /**
  12676. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  12677. * \brief URADD64 (64-bit Unsigned Halving Addition)
  12678. * \details
  12679. * **Type**: DSP (64-bit Profile)
  12680. *
  12681. * **Syntax**:\n
  12682. * ~~~
  12683. * URADD64 Rd, Rs1, Rs2
  12684. * ~~~
  12685. *
  12686. * **Purpose**:\n
  12687. * Add two 64-bit unsigned integers. The result is halved to avoid overflow or saturation.
  12688. *
  12689. * **RV32 Description**:\n
  12690. * This instruction adds the 64-bit unsigned integer of an even/odd pair of registers
  12691. * specified by Rs1(4,1) with the 64-bit unsigned integer of an even/odd pair of registers specified by
  12692. * Rs2(4,1). The 64-bit addition result is first logically right-shifted by 1 bit and then written to an
  12693. * even/odd pair of registers specified by Rd(4,1).
  12694. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  12695. * includes register 2d and 2d+1.
  12696. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  12697. * of the pair contains the low 32-bit of the result.
  12698. *
  12699. * **RV64 Description**:\n
  12700. * This instruction adds the 64-bit unsigned integer in Rs1 with the 64-bit unsigned
  12701. * integer Rs2. The 64-bit addition result is first logically right-shifted by 1 bit and then written to Rd.
  12702. *
  12703. * **Operations**:\n
  12704. * ~~~
  12705. * * RV32:
  12706. * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
  12707. * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
  12708. * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
  12709. * R[t_H].R[t_L] = (R[a_H].R[a_L] + R[b_H].R[b_L]) u>> 1;
  12710. * * RV64:
  12711. * Rd = (Rs1 + Rs2) u>> 1;
  12712. * ~~~
  12713. *
  12714. * \param [in] a unsigned long long type of value stored in a
  12715. * \param [in] b unsigned long long type of value stored in b
  12716. * \return value stored in unsigned long long type
  12717. */
  12718. __STATIC_FORCEINLINE unsigned long long __RV_URADD64(unsigned long long a, unsigned long long b)
  12719. {
  12720. unsigned long long result;
  12721. __ASM volatile("uradd64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12722. return result;
  12723. }
  12724. /* ===== Inline Function End for 3.176. URADD64 ===== */
  12725. /* ===== Inline Function Start for 3.177. URADDW ===== */
  12726. /**
  12727. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  12728. * \brief URADDW (32-bit Unsigned Halving Addition)
  12729. * \details
  12730. * **Type**: DSP
  12731. *
  12732. * **Syntax**:\n
  12733. * ~~~
  12734. * URADDW Rd, Rs1, Rs2
  12735. * ~~~
  12736. *
  12737. * **Purpose**:\n
  12738. * Add 32-bit unsigned integers and the results are halved to avoid overflow or saturation.
  12739. *
  12740. * **Description**:\n
  12741. * This instruction adds the first 32-bit unsigned integer in Rs1 with the first 32-bit
  12742. * unsigned integer in Rs2. The result is first logically right-shifted by 1 bit and then sign-extended and
  12743. * written to Rd.
  12744. *
  12745. * **Examples**:\n
  12746. * ~~~
  12747. * * Ra = 0x7FFFFFFF, Rb = 0x7FFFFFFF Rt = 0x7FFFFFFF
  12748. * * Ra = 0x80000000, Rb = 0x80000000 Rt = 0x80000000
  12749. * * Ra = 0x40000000, Rb = 0x80000000 Rt = 0x60000000
  12750. * ~~~
  12751. *
  12752. * **Operations**:\n
  12753. * ~~~
  12754. * * RV32:
  12755. * Rd[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1;
  12756. * * RV64:
  12757. * resw[31:0] = (Rs1[31:0] + Rs2[31:0]) u>> 1;
  12758. * Rd[63:0] = SE(resw[31:0]);
  12759. * ~~~
  12760. *
  12761. * \param [in] a unsigned int type of value stored in a
  12762. * \param [in] b unsigned int type of value stored in b
  12763. * \return value stored in unsigned long type
  12764. */
  12765. __STATIC_FORCEINLINE unsigned long __RV_URADDW(unsigned int a, unsigned int b)
  12766. {
  12767. unsigned long result;
  12768. __ASM volatile("uraddw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12769. return result;
  12770. }
  12771. /* ===== Inline Function End for 3.177. URADDW ===== */
  12772. /* ===== Inline Function Start for 3.178. URCRAS16 ===== */
  12773. /**
  12774. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  12775. * \brief URCRAS16 (SIMD 16-bit Unsigned Halving Cross Addition & Subtraction)
  12776. * \details
  12777. * **Type**: SIMD
  12778. *
  12779. * **Syntax**:\n
  12780. * ~~~
  12781. * URCRAS16 Rd, Rs1, Rs2
  12782. * ~~~
  12783. *
  12784. * **Purpose**:\n
  12785. * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element
  12786. * subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
  12787. * The results are halved to avoid overflow or saturation.
  12788. *
  12789. * **Description**:\n
  12790. * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1
  12791. * with the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned
  12792. * integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks
  12793. * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32-
  12794. * bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  12795. *
  12796. * **Examples**:\n
  12797. * ~~~
  12798. * Please see `URADD16` and `URSUB16` instructions.
  12799. * ~~~
  12800. *
  12801. * **Operations**:\n
  12802. * ~~~
  12803. * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) u>> 1;
  12804. * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) u>> 1;
  12805. * for RV32, x=0
  12806. * for RV64, x=1...0
  12807. * ~~~
  12808. *
  12809. * \param [in] a unsigned long type of value stored in a
  12810. * \param [in] b unsigned long type of value stored in b
  12811. * \return value stored in unsigned long type
  12812. */
  12813. __STATIC_FORCEINLINE unsigned long __RV_URCRAS16(unsigned long a, unsigned long b)
  12814. {
  12815. unsigned long result;
  12816. __ASM volatile("urcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12817. return result;
  12818. }
  12819. /* ===== Inline Function End for 3.178. URCRAS16 ===== */
  12820. /* ===== Inline Function Start for 3.179. URCRSA16 ===== */
  12821. /**
  12822. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  12823. * \brief URCRSA16 (SIMD 16-bit Unsigned Halving Cross Subtraction & Addition)
  12824. * \details
  12825. * **Type**: SIMD
  12826. *
  12827. * **Syntax**:\n
  12828. * ~~~
  12829. * URCRSA16 Rd, Rs1, Rs2
  12830. * ~~~
  12831. *
  12832. * **Purpose**:\n
  12833. * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element
  12834. * addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
  12835. * The results are halved to avoid overflow or saturation.
  12836. *
  12837. * **Description**:\n
  12838. * This instruction subtracts the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2
  12839. * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned
  12840. * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [31:16] of 32-bit chunks
  12841. * in Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit
  12842. * chunks in Rd and [15:0] of 32-bit chunks in Rd.
  12843. *
  12844. * **Examples**:\n
  12845. * ~~~
  12846. * Please see `URADD16` and `URSUB16` instructions.
  12847. * ~~~
  12848. *
  12849. * **Operations**:\n
  12850. * ~~~
  12851. * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) u>> 1;
  12852. * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) u>> 1;
  12853. * for RV32, x=0
  12854. * for RV64, x=1...0
  12855. * ~~~
  12856. *
  12857. * \param [in] a unsigned long type of value stored in a
  12858. * \param [in] b unsigned long type of value stored in b
  12859. * \return value stored in unsigned long type
  12860. */
  12861. __STATIC_FORCEINLINE unsigned long __RV_URCRSA16(unsigned long a, unsigned long b)
  12862. {
  12863. unsigned long result;
  12864. __ASM volatile("urcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12865. return result;
  12866. }
  12867. /* ===== Inline Function End for 3.179. URCRSA16 ===== */
  12868. /* ===== Inline Function Start for 3.180. URSTAS16 ===== */
  12869. /**
  12870. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  12871. * \brief URSTAS16 (SIMD 16-bit Unsigned Halving Straight Addition & Subtraction)
  12872. * \details
  12873. * **Type**: SIMD
  12874. *
  12875. * **Syntax**:\n
  12876. * ~~~
  12877. * URSTAS16 Rd, Rs1, Rs2
  12878. * ~~~
  12879. *
  12880. * **Purpose**:\n
  12881. * Do 16-bit unsigned integer element addition and 16-bit unsigned integer element
  12882. * subtraction in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit
  12883. * chunks. The results are halved to avoid overflow or saturation.
  12884. *
  12885. * **Description**:\n
  12886. * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1
  12887. * with the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned
  12888. * integer in [15:0] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks
  12889. * in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32-
  12890. * bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  12891. *
  12892. * **Examples**:\n
  12893. * ~~~
  12894. * Please see `URADD16` and `URSUB16` instructions.
  12895. * ~~~
  12896. *
  12897. * **Operations**:\n
  12898. * ~~~
  12899. * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][31:16]) u>> 1;
  12900. * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][15:0]) u>> 1;
  12901. * for RV32, x=0
  12902. * for RV64, x=1...0
  12903. * ~~~
  12904. *
  12905. * \param [in] a unsigned long type of value stored in a
  12906. * \param [in] b unsigned long type of value stored in b
  12907. * \return value stored in unsigned long type
  12908. */
  12909. __STATIC_FORCEINLINE unsigned long __RV_URSTAS16(unsigned long a, unsigned long b)
  12910. {
  12911. unsigned long result;
  12912. __ASM volatile("urstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12913. return result;
  12914. }
  12915. /* ===== Inline Function End for 3.180. URSTAS16 ===== */
  12916. /* ===== Inline Function Start for 3.181. URSTSA16 ===== */
  12917. /**
  12918. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  12919. * \brief URSTSA16 (SIMD 16-bit Unsigned Halving Straight Subtraction & Addition)
  12920. * \details
  12921. * **Type**: SIMD
  12922. *
  12923. * **Syntax**:\n
  12924. * ~~~
  12925. * URCRSA16 Rd, Rs1, Rs2
  12926. * ~~~
  12927. *
  12928. * **Purpose**:\n
  12929. * Do 16-bit unsigned integer element subtraction and 16-bit unsigned integer element
  12930. * addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit
  12931. * chunks. The results are halved to avoid overflow or saturation.
  12932. *
  12933. * **Description**:\n
  12934. * This instruction subtracts the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2
  12935. * from the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1, and adds the 16-bit unsigned
  12936. * integer in [15:0] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [15:0] of 32-bit chunks in
  12937. * Rs2. The two results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit
  12938. * chunks in Rd and [15:0] of 32-bit chunks in Rd.
  12939. *
  12940. * **Examples**:\n
  12941. * ~~~
  12942. * Please see `URADD16` and `URSUB16` instructions.
  12943. * ~~~
  12944. *
  12945. * **Operations**:\n
  12946. * ~~~
  12947. * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][31:16]) u>> 1;
  12948. * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][15:0]) u>> 1;
  12949. * for RV32, x=0
  12950. * for RV64, x=1...0
  12951. * ~~~
  12952. *
  12953. * \param [in] a unsigned long type of value stored in a
  12954. * \param [in] b unsigned long type of value stored in b
  12955. * \return value stored in unsigned long type
  12956. */
  12957. __STATIC_FORCEINLINE unsigned long __RV_URSTSA16(unsigned long a, unsigned long b)
  12958. {
  12959. unsigned long result;
  12960. __ASM volatile("urstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  12961. return result;
  12962. }
  12963. /* ===== Inline Function End for 3.181. URSTSA16 ===== */
  12964. /* ===== Inline Function Start for 3.182. URSUB8 ===== */
  12965. /**
  12966. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_ADDSUB
  12967. * \brief URSUB8 (SIMD 8-bit Unsigned Halving Subtraction)
  12968. * \details
  12969. * **Type**: SIMD
  12970. *
  12971. * **Syntax**:\n
  12972. * ~~~
  12973. * URSUB8 Rd, Rs1, Rs2
  12974. * ~~~
  12975. *
  12976. * **Purpose**:\n
  12977. * Do 8-bit unsigned integer element subtractions simultaneously. The results are halved to
  12978. * avoid overflow or saturation.
  12979. *
  12980. * **Description**:\n
  12981. * This instruction subtracts the 8-bit unsigned integer elements in Rs2 from the 8-bit
  12982. * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
  12983. * written to Rd.
  12984. *
  12985. * **Examples**:\n
  12986. * ~~~
  12987. * * Ra = 0x7F, Rb = 0x80 Rt = 0xFF
  12988. * * Ra = 0x80, Rb = 0x7F Rt = 0x00
  12989. * * Ra = 0x80, Rb = 0x40 Rt = 0x20
  12990. * ~~~
  12991. *
  12992. * **Operations**:\n
  12993. * ~~~
  12994. * Rd.B[x] = (Rs1.B[x] - Rs2.B[x]) u>> 1;
  12995. * for RV32: x=3...0,
  12996. * for RV64: x=7...0
  12997. * ~~~
  12998. *
  12999. * \param [in] a unsigned long type of value stored in a
  13000. * \param [in] b unsigned long type of value stored in b
  13001. * \return value stored in unsigned long type
  13002. */
  13003. __STATIC_FORCEINLINE unsigned long __RV_URSUB8(unsigned long a, unsigned long b)
  13004. {
  13005. unsigned long result;
  13006. __ASM volatile("ursub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13007. return result;
  13008. }
  13009. /* ===== Inline Function End for 3.182. URSUB8 ===== */
  13010. /* ===== Inline Function Start for 3.183. URSUB16 ===== */
  13011. /**
  13012. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_16B_ADDSUB
  13013. * \brief URSUB16 (SIMD 16-bit Unsigned Halving Subtraction)
  13014. * \details
  13015. * **Type**: SIMD
  13016. *
  13017. * **Syntax**:\n
  13018. * ~~~
  13019. * URSUB16 Rd, Rs1, Rs2
  13020. * ~~~
  13021. *
  13022. * **Purpose**:\n
  13023. * Do 16-bit unsigned integer element subtractions simultaneously. The results are halved to
  13024. * avoid overflow or saturation.
  13025. *
  13026. * **Description**:\n
  13027. * This instruction subtracts the 16-bit unsigned integer elements in Rs2 from the 16-bit
  13028. * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
  13029. * written to Rd.
  13030. *
  13031. * **Examples**:\n
  13032. * ~~~
  13033. * * Ra = 0x7FFF, Rb = 0x8000 Rt = 0xFFFF
  13034. * * Ra = 0x8000, Rb = 0x7FFF Rt = 0x0000
  13035. * * Ra = 0x8000, Rb = 0x4000 Rt = 0x2000
  13036. * ~~~
  13037. *
  13038. * **Operations**:\n
  13039. * ~~~
  13040. * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) u>> 1;
  13041. * for RV32: x=1...0,
  13042. * for RV64: x=3...0
  13043. * ~~~
  13044. *
  13045. * \param [in] a unsigned long type of value stored in a
  13046. * \param [in] b unsigned long type of value stored in b
  13047. * \return value stored in unsigned long type
  13048. */
  13049. __STATIC_FORCEINLINE unsigned long __RV_URSUB16(unsigned long a, unsigned long b)
  13050. {
  13051. unsigned long result;
  13052. __ASM volatile("ursub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13053. return result;
  13054. }
  13055. /* ===== Inline Function End for 3.183. URSUB16 ===== */
  13056. /* ===== Inline Function Start for 3.184. URSUB64 ===== */
  13057. /**
  13058. * \ingroup NMSIS_Core_DSP_Intrinsic_64B_ADDSUB
  13059. * \brief URSUB64 (64-bit Unsigned Halving Subtraction)
  13060. * \details
  13061. * **Type**: DSP (64-bit Profile)
  13062. *
  13063. * **Syntax**:\n
  13064. * ~~~
  13065. * URSUB64 Rd, Rs1, Rs2
  13066. * ~~~
  13067. *
  13068. * **Purpose**:\n
  13069. * Perform a 64-bit unsigned integer subtraction. The result is halved to avoid overflow or
  13070. * saturation.
  13071. *
  13072. * **RV32 Description**:\n
  13073. * This instruction subtracts the 64-bit unsigned integer of an even/odd pair of
  13074. * registers specified by Rs2(4,1) from the 64-bit unsigned integer of an even/odd pair of registers
  13075. * specified by Rs1(4,1). The subtraction result is first logically right-shifted by 1 bit and then written
  13076. * to an even/odd pair of registers specified by Rd(4,1).
  13077. * Rx(4,1), i.e., d, determines the even/odd pair group of two registers. Specifically, the register pair
  13078. * includes register 2d and 2d+1.
  13079. * The odd `2d+1` register of the pair contains the high 32-bit of the result and the even `2d` register
  13080. * of the pair contains the low 32-bit of the result.
  13081. *
  13082. * **RV64 Description**:\n
  13083. * This instruction subtracts the 64-bit unsigned integer in Rs2 from the 64-bit
  13084. * unsigned integer in Rs1. The subtraction result is first logically right-shifted by 1 bit and then
  13085. * written to Rd.
  13086. *
  13087. * **Operations**:\n
  13088. * ~~~
  13089. * * RV32:
  13090. * t_L = CONCAT(Rt(4,1),1'b0); t_H = CONCAT(Rt(4,1),1'b1);
  13091. * a_L = CONCAT(Ra(4,1),1'b0); a_H = CONCAT(Ra(4,1),1'b1);
  13092. * b_L = CONCAT(Rb(4,1),1'b0); b_H = CONCAT(Rb(4,1),1'b1);
  13093. * R[t_H].R[t_L] = (R[a_H].R[a_L] - R[b_H].R[b_L]) u>> 1;
  13094. * * RV64:
  13095. * Rd = (Rs1 - Rs2) u>> 1;
  13096. * ~~~
  13097. *
  13098. * \param [in] a unsigned long long type of value stored in a
  13099. * \param [in] b unsigned long long type of value stored in b
  13100. * \return value stored in unsigned long long type
  13101. */
  13102. __STATIC_FORCEINLINE unsigned long long __RV_URSUB64(unsigned long long a, unsigned long long b)
  13103. {
  13104. unsigned long long result;
  13105. __ASM volatile("ursub64 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13106. return result;
  13107. }
  13108. /* ===== Inline Function End for 3.184. URSUB64 ===== */
  13109. /* ===== Inline Function Start for 3.185. URSUBW ===== */
  13110. /**
  13111. * \ingroup NMSIS_Core_DSP_Intrinsic_32B_COMPUTATION
  13112. * \brief URSUBW (32-bit Unsigned Halving Subtraction)
  13113. * \details
  13114. * **Type**: DSP
  13115. *
  13116. * **Syntax**:\n
  13117. * ~~~
  13118. * URSUBW Rd, Rs1, Rs2
  13119. * ~~~
  13120. *
  13121. * **Purpose**:\n
  13122. * Subtract 32-bit unsigned integers and the result is halved to avoid overflow or saturation.
  13123. *
  13124. * **Description**:\n
  13125. * This instruction subtracts the first 32-bit signed integer in Rs2 from the first 32-bit
  13126. * signed integer in Rs1. The result is first logically right-shifted by 1 bit and then sign-extended and
  13127. * written to Rd.
  13128. *
  13129. * **Examples**:\n
  13130. * ~~~
  13131. * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0xFFFFFFFF
  13132. * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x00000000
  13133. * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0x20000000
  13134. * ~~~
  13135. *
  13136. * **Operations**:\n
  13137. * ~~~
  13138. * * RV32:
  13139. * Rd[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1;
  13140. * * RV64:
  13141. * resw[31:0] = (Rs1[31:0] - Rs2[31:0]) u>> 1;
  13142. * Rd[63:0] = SE(resw[31:0]);
  13143. * ~~~
  13144. *
  13145. * \param [in] a unsigned int type of value stored in a
  13146. * \param [in] b unsigned int type of value stored in b
  13147. * \return value stored in unsigned long type
  13148. */
  13149. __STATIC_FORCEINLINE unsigned long __RV_URSUBW(unsigned int a, unsigned int b)
  13150. {
  13151. unsigned long result;
  13152. __ASM volatile("ursubw %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13153. return result;
  13154. }
  13155. /* ===== Inline Function End for 3.185. URSUBW ===== */
  13156. /* ===== Inline Function Start for 3.186. WEXTI ===== */
  13157. /**
  13158. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  13159. * \brief WEXTI (Extract Word from 64-bit Immediate)
  13160. * \details
  13161. * **Type**: DSP
  13162. *
  13163. * **Syntax**:\n
  13164. * ~~~
  13165. * WEXTI Rd, Rs1, #LSBloc
  13166. * ~~~
  13167. *
  13168. * **Purpose**:\n
  13169. * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or
  13170. * a register (RV64) starting from a specified immediate LSB bit position.
  13171. *
  13172. * **RV32 Description**:\n
  13173. * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified
  13174. * by Rs1(4,1) starting from a specified immediate LSB bit position, #LSBloc. The extracted word is
  13175. * written to Rd.
  13176. * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register
  13177. * pair includes register 2d and 2d+1.
  13178. * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d`
  13179. * register of the pair contains the low 32-bit of the 64-bit value.
  13180. *
  13181. * **RV64 Description**:\n
  13182. * This instruction extracts a 32-bit word from a 64-bit value in Rs1 starting from a specified
  13183. * immediate LSB bit position, #LSBloc. The extracted word is sign-extended and written to lower 32-
  13184. * bit of Rd.
  13185. *
  13186. * **Operations**:\n
  13187. * ~~~
  13188. * * RV32:
  13189. * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs2(4,1),1'b1);
  13190. * src[63:0] = Concat(R[Idx1], R[Idx0]);
  13191. * Rd = src[31+LSBloc:LSBloc];
  13192. * * RV64:
  13193. * ExtractW = Rs1[31+LSBloc:LSBloc];
  13194. * Rd = SE(ExtractW)
  13195. * ~~~
  13196. *
  13197. * \param [in] a long long type of value stored in a
  13198. * \param [in] b unsigned int type of value stored in b
  13199. * \return value stored in unsigned long type
  13200. */
  13201. #define __RV_WEXTI(a, b) \
  13202. ({ \
  13203. unsigned long result; \
  13204. long long __a = (long long)(a); \
  13205. __ASM volatile("wexti %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  13206. result; \
  13207. })
  13208. /* ===== Inline Function End for 3.186. WEXTI ===== */
  13209. /* ===== Inline Function Start for 3.187. WEXT ===== */
  13210. /**
  13211. * \ingroup NMSIS_Core_DSP_Intrinsic_NON_SIMD_MISC
  13212. * \brief WEXT (Extract Word from 64-bit)
  13213. * \details
  13214. * **Type**: DSP
  13215. *
  13216. * **Syntax**:\n
  13217. * ~~~
  13218. * WEXT Rd, Rs1, Rs2
  13219. * ~~~
  13220. *
  13221. * **Purpose**:\n
  13222. * Extract a 32-bit word from a 64-bit value stored in an even/odd pair of registers (RV32) or
  13223. * a register (RV64) starting from a specified LSB bit position in a register.
  13224. *
  13225. * **RV32 Description**:\n
  13226. * This instruction extracts a 32-bit word from a 64-bit value of an even/odd pair of registers specified
  13227. * by Rs1(4,1) starting from a specified LSB bit position, specified in Rs2[4:0]. The extracted word is
  13228. * written to Rd.
  13229. * Rs1(4,1), i.e., d, determines the even/odd pair group of the two registers. Specifically, the register
  13230. * pair includes register 2d and 2d+1.
  13231. * The odd `2d+1` register of the pair contains the high 32-bit of the 64-bit value and the even `2d`
  13232. * register of the pair contains the low 32-bit of the 64-bit value.
  13233. *
  13234. * **Operations**:\n
  13235. * ~~~
  13236. * * RV32:
  13237. * Idx0 = CONCAT(Rs1(4,1),1'b0); Idx1 = CONCAT(Rs1(4,1),1'b1);
  13238. * src[63:0] = Concat(R[Idx1], R[Idx0]);
  13239. * LSBloc = Rs2[4:0];
  13240. * Rd = src[31+LSBloc:LSBloc];
  13241. * * RV64:
  13242. * LSBloc = Rs2[4:0];
  13243. * ExtractW = Rs1[31+LSBloc:LSBloc];
  13244. * Rd = SE(ExtractW)
  13245. * ~~~
  13246. *
  13247. * \param [in] a long long type of value stored in a
  13248. * \param [in] b unsigned int type of value stored in b
  13249. * \return value stored in unsigned long type
  13250. */
  13251. __STATIC_FORCEINLINE unsigned long __RV_WEXT(long long a, unsigned int b)
  13252. {
  13253. unsigned long result;
  13254. __ASM volatile("wext %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13255. return result;
  13256. }
  13257. /* ===== Inline Function End for 3.187. WEXT ===== */
  13258. /* ===== Inline Function Start for 3.188.1. ZUNPKD810 ===== */
  13259. /**
  13260. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  13261. * \brief ZUNPKD810 (Unsigned Unpacking Bytes 1 & 0)
  13262. * \details
  13263. * **Type**: DSP
  13264. *
  13265. * **Syntax**:\n
  13266. * ~~~
  13267. * ZUNPKD8xy Rd, Rs1
  13268. * xy = {10, 20, 30, 31, 32}
  13269. * ~~~
  13270. *
  13271. * **Purpose**:\n
  13272. * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
  13273. * halfwords of 32-bit chunks in a register.
  13274. *
  13275. * **Description**:\n
  13276. * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  13277. * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
  13278. * chunks in Rd.
  13279. *
  13280. * **Operations**:\n
  13281. * ~~~
  13282. * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
  13283. * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
  13284. * // ZUNPKD810, x=1,y=0
  13285. * // ZUNPKD820, x=2,y=0
  13286. * // ZUNPKD830, x=3,y=0
  13287. * // ZUNPKD831, x=3,y=1
  13288. * // ZUNPKD832, x=3,y=2
  13289. * for RV32: m=0,
  13290. * for RV64: m=1...0
  13291. * ~~~
  13292. *
  13293. * \param [in] a unsigned long type of value stored in a
  13294. * \return value stored in unsigned long type
  13295. */
  13296. __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD810(unsigned long a)
  13297. {
  13298. unsigned long result;
  13299. __ASM volatile("zunpkd810 %0, %1" : "=r"(result) : "r"(a));
  13300. return result;
  13301. }
  13302. /* ===== Inline Function End for 3.188.1. ZUNPKD810 ===== */
  13303. /* ===== Inline Function Start for 3.188.2. ZUNPKD820 ===== */
  13304. /**
  13305. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  13306. * \brief ZUNPKD820 (Unsigned Unpacking Bytes 2 & 0)
  13307. * \details
  13308. * **Type**: DSP
  13309. *
  13310. * **Syntax**:\n
  13311. * ~~~
  13312. * ZUNPKD8xy Rd, Rs1
  13313. * xy = {10, 20, 30, 31, 32}
  13314. * ~~~
  13315. *
  13316. * **Purpose**:\n
  13317. * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
  13318. * halfwords of 32-bit chunks in a register.
  13319. *
  13320. * **Description**:\n
  13321. * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  13322. * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
  13323. * chunks in Rd.
  13324. *
  13325. * **Operations**:\n
  13326. * ~~~
  13327. * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
  13328. * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
  13329. * // ZUNPKD810, x=1,y=0
  13330. * // ZUNPKD820, x=2,y=0
  13331. * // ZUNPKD830, x=3,y=0
  13332. * // ZUNPKD831, x=3,y=1
  13333. * // ZUNPKD832, x=3,y=2
  13334. * for RV32: m=0,
  13335. * for RV64: m=1...0
  13336. * ~~~
  13337. *
  13338. * \param [in] a unsigned long type of value stored in a
  13339. * \return value stored in unsigned long type
  13340. */
  13341. __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD820(unsigned long a)
  13342. {
  13343. unsigned long result;
  13344. __ASM volatile("zunpkd820 %0, %1" : "=r"(result) : "r"(a));
  13345. return result;
  13346. }
  13347. /* ===== Inline Function End for 3.188.2. ZUNPKD820 ===== */
  13348. /* ===== Inline Function Start for 3.188.3. ZUNPKD830 ===== */
  13349. /**
  13350. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  13351. * \brief ZUNPKD830 (Unsigned Unpacking Bytes 3 & 0)
  13352. * \details
  13353. * **Type**: DSP
  13354. *
  13355. * **Syntax**:\n
  13356. * ~~~
  13357. * ZUNPKD8xy Rd, Rs1
  13358. * xy = {10, 20, 30, 31, 32}
  13359. * ~~~
  13360. *
  13361. * **Purpose**:\n
  13362. * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
  13363. * halfwords of 32-bit chunks in a register.
  13364. *
  13365. * **Description**:\n
  13366. * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  13367. * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
  13368. * chunks in Rd.
  13369. *
  13370. * **Operations**:\n
  13371. * ~~~
  13372. * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
  13373. * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
  13374. * // ZUNPKD810, x=1,y=0
  13375. * // ZUNPKD820, x=2,y=0
  13376. * // ZUNPKD830, x=3,y=0
  13377. * // ZUNPKD831, x=3,y=1
  13378. * // ZUNPKD832, x=3,y=2
  13379. * for RV32: m=0,
  13380. * for RV64: m=1...0
  13381. * ~~~
  13382. *
  13383. * \param [in] a unsigned long type of value stored in a
  13384. * \return value stored in unsigned long type
  13385. */
  13386. __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD830(unsigned long a)
  13387. {
  13388. unsigned long result;
  13389. __ASM volatile("zunpkd830 %0, %1" : "=r"(result) : "r"(a));
  13390. return result;
  13391. }
  13392. /* ===== Inline Function End for 3.188.3. ZUNPKD830 ===== */
  13393. /* ===== Inline Function Start for 3.188.4. ZUNPKD831 ===== */
  13394. /**
  13395. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  13396. * \brief ZUNPKD831 (Unsigned Unpacking Bytes 3 & 1)
  13397. * \details
  13398. * **Type**: DSP
  13399. *
  13400. * **Syntax**:\n
  13401. * ~~~
  13402. * ZUNPKD8xy Rd, Rs1
  13403. * xy = {10, 20, 30, 31, 32}
  13404. * ~~~
  13405. *
  13406. * **Purpose**:\n
  13407. * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
  13408. * halfwords of 32-bit chunks in a register.
  13409. *
  13410. * **Description**:\n
  13411. * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  13412. * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
  13413. * chunks in Rd.
  13414. *
  13415. * **Operations**:\n
  13416. * ~~~
  13417. * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
  13418. * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
  13419. * // ZUNPKD810, x=1,y=0
  13420. * // ZUNPKD820, x=2,y=0
  13421. * // ZUNPKD830, x=3,y=0
  13422. * // ZUNPKD831, x=3,y=1
  13423. * // ZUNPKD832, x=3,y=2
  13424. * for RV32: m=0,
  13425. * for RV64: m=1...0
  13426. * ~~~
  13427. *
  13428. * \param [in] a unsigned long type of value stored in a
  13429. * \return value stored in unsigned long type
  13430. */
  13431. __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD831(unsigned long a)
  13432. {
  13433. unsigned long result;
  13434. __ASM volatile("zunpkd831 %0, %1" : "=r"(result) : "r"(a));
  13435. return result;
  13436. }
  13437. /* ===== Inline Function End for 3.188.4. ZUNPKD831 ===== */
  13438. /* ===== Inline Function Start for 3.188.5. ZUNPKD832 ===== */
  13439. /**
  13440. * \ingroup NMSIS_Core_DSP_Intrinsic_SIMD_8B_UNPACK
  13441. * \brief ZUNPKD832 (Unsigned Unpacking Bytes 3 & 2)
  13442. * \details
  13443. * **Type**: DSP
  13444. *
  13445. * **Syntax**:\n
  13446. * ~~~
  13447. * ZUNPKD8xy Rd, Rs1
  13448. * xy = {10, 20, 30, 31, 32}
  13449. * ~~~
  13450. *
  13451. * **Purpose**:\n
  13452. * Unpack byte x and byte y of 32-bit chunks in a register into two 16-bit unsigned
  13453. * halfwords of 32-bit chunks in a register.
  13454. *
  13455. * **Description**:\n
  13456. * For the `ZUNPKD8(x)(*y*)` instruction, it unpacks byte *x and byte y* of 32-bit chunks in Rs1 into
  13457. * two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit
  13458. * chunks in Rd.
  13459. *
  13460. * **Operations**:\n
  13461. * ~~~
  13462. * Rd.W[m].H[1] = ZE16(Rs1.W[m].B[x])
  13463. * Rd.W[m].H[0] = ZE16(Rs1.W[m].B[y])
  13464. * // ZUNPKD810, x=1,y=0
  13465. * // ZUNPKD820, x=2,y=0
  13466. * // ZUNPKD830, x=3,y=0
  13467. * // ZUNPKD831, x=3,y=1
  13468. * // ZUNPKD832, x=3,y=2
  13469. * for RV32: m=0,
  13470. * for RV64: m=1...0
  13471. * ~~~
  13472. *
  13473. * \param [in] a unsigned long type of value stored in a
  13474. * \return value stored in unsigned long type
  13475. */
  13476. __STATIC_FORCEINLINE unsigned long __RV_ZUNPKD832(unsigned long a)
  13477. {
  13478. unsigned long result;
  13479. __ASM volatile("zunpkd832 %0, %1" : "=r"(result) : "r"(a));
  13480. return result;
  13481. }
  13482. /* ===== Inline Function End for 3.188.5. ZUNPKD832 ===== */
  13483. #if (__RISCV_XLEN == 64) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__)
  13484. /* ===== Inline Function Start for 4.1. ADD32 ===== */
  13485. /**
  13486. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  13487. * \brief ADD32 (SIMD 32-bit Addition)
  13488. * \details
  13489. * **Type**: SIMD (RV64 Only)
  13490. *
  13491. * **Syntax**:\n
  13492. * ~~~
  13493. * ADD32 Rd, Rs1, Rs2
  13494. * ~~~
  13495. *
  13496. * **Purpose**:\n
  13497. * Do 32-bit integer element additions simultaneously.
  13498. *
  13499. * **Description**:\n
  13500. * This instruction adds the 32-bit integer elements in Rs1 with the 32-bit integer
  13501. * elements in Rs2, and then writes the 32-bit element results to Rd.
  13502. *
  13503. * **Note**:\n
  13504. * This instruction can be used for either signed or unsigned addition.
  13505. *
  13506. * **Operations**:\n
  13507. * ~~~
  13508. * Rd.W[x] = Rs1.W[x] + Rs2.W[x];
  13509. * for RV64: x=1...0
  13510. * ~~~
  13511. *
  13512. * \param [in] a unsigned long type of value stored in a
  13513. * \param [in] b unsigned long type of value stored in b
  13514. * \return value stored in unsigned long type
  13515. */
  13516. __STATIC_FORCEINLINE unsigned long __RV_ADD32(unsigned long a, unsigned long b)
  13517. {
  13518. unsigned long result;
  13519. __ASM volatile("add32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13520. return result;
  13521. }
  13522. /* ===== Inline Function End for 4.1. ADD32 ===== */
  13523. /* ===== Inline Function Start for 4.2. CRAS32 ===== */
  13524. /**
  13525. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  13526. * \brief CRAS32 (SIMD 32-bit Cross Addition & Subtraction)
  13527. * \details
  13528. * **Type**: SIMD (RV64 Only)
  13529. *
  13530. * **Syntax**:\n
  13531. * ~~~
  13532. * CRAS32 Rd, Rs1, Rs2
  13533. * ~~~
  13534. *
  13535. * **Purpose**:\n
  13536. * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit
  13537. * chunk simultaneously. Operands are from crossed 32-bit elements.
  13538. *
  13539. * **Description**:\n
  13540. * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
  13541. * integer element in [31:0] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts
  13542. * the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and
  13543. * writes the result to [31:0] of Rd.
  13544. *
  13545. * **Note**:\n
  13546. * This instruction can be used for either signed or unsigned operations.
  13547. *
  13548. * **Operations**:\n
  13549. * ~~~
  13550. * Rd.W[1] = Rs1.W[1] + Rs2.W[0];
  13551. * Rd.W[0] = Rs1.W[0] - Rs2.W[1];
  13552. * ~~~
  13553. *
  13554. * \param [in] a unsigned long type of value stored in a
  13555. * \param [in] b unsigned long type of value stored in b
  13556. * \return value stored in unsigned long type
  13557. */
  13558. __STATIC_FORCEINLINE unsigned long __RV_CRAS32(unsigned long a, unsigned long b)
  13559. {
  13560. unsigned long result;
  13561. __ASM volatile("cras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13562. return result;
  13563. }
  13564. /* ===== Inline Function End for 4.2. CRAS32 ===== */
  13565. /* ===== Inline Function Start for 4.3. CRSA32 ===== */
  13566. /**
  13567. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  13568. * \brief CRSA32 (SIMD 32-bit Cross Subtraction & Addition)
  13569. * \details
  13570. * **Type**: SIMD (RV64 Only)
  13571. *
  13572. * **Syntax**:\n
  13573. * ~~~
  13574. * CRSA32 Rd, Rs1, Rs2
  13575. * ~~~
  13576. *
  13577. * **Purpose**:\n
  13578. * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit
  13579. * chunk simultaneously. Operands are from crossed 32-bit elements.
  13580. * *Description: *
  13581. * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element
  13582. * in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer
  13583. * element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2, and writes the result to
  13584. * [31:0] of Rd
  13585. *
  13586. * **Note**:\n
  13587. * This instruction can be used for either signed or unsigned operations.
  13588. *
  13589. * **Operations**:\n
  13590. * ~~~
  13591. * Rd.W[1] = Rs1.W[1] - Rs2.W[0];
  13592. * Rd.W[0] = Rs1.W[0] + Rs2.W[1];
  13593. * ~~~
  13594. *
  13595. * \param [in] a unsigned long type of value stored in a
  13596. * \param [in] b unsigned long type of value stored in b
  13597. * \return value stored in unsigned long type
  13598. */
  13599. __STATIC_FORCEINLINE unsigned long __RV_CRSA32(unsigned long a, unsigned long b)
  13600. {
  13601. unsigned long result;
  13602. __ASM volatile("crsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13603. return result;
  13604. }
  13605. /* ===== Inline Function End for 4.3. CRSA32 ===== */
  13606. /* ===== Inline Function Start for 4.4. KABS32 ===== */
  13607. /**
  13608. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
  13609. * \brief KABS32 (Scalar 32-bit Absolute Value with Saturation)
  13610. * \details
  13611. * **Type**: DSP (RV64 Only)
  13612. 24 20
  13613. 19 15
  13614. 14 12
  13615. 11 7
  13616. KABS32
  13617. 10010
  13618. Rs1
  13619. 000
  13620. Rd
  13621. 6 0
  13622. GE80B
  13623. 1111111
  13624. *
  13625. * **Syntax**:\n
  13626. * ~~~
  13627. * KABS32 Rd, Rs1
  13628. * ~~~
  13629. *
  13630. * **Purpose**:\n
  13631. * Get the absolute value of signed 32-bit integer elements in a general register.
  13632. *
  13633. * **Description**:\n
  13634. * This instruction calculates the absolute value of signed 32-bit integer elements stored
  13635. * in Rs1. The results are written to Rd. This instruction with the minimum negative integer input of
  13636. * 0x80000000 will produce a saturated output of maximum positive integer of 0x7fffffff and the OV
  13637. * flag will be set to 1.
  13638. *
  13639. * **Operations**:\n
  13640. * ~~~
  13641. * if (Rs1.W[x] >= 0) {
  13642. * res[x] = Rs1.W[x];
  13643. * } else {
  13644. * If (Rs1.W[x] == 0x80000000) {
  13645. * res[x] = 0x7fffffff;
  13646. * OV = 1;
  13647. * } else {
  13648. * res[x] = -Rs1.W[x];
  13649. * }
  13650. * }
  13651. * Rd.W[x] = res[x];
  13652. * for RV64: x=1...0
  13653. * ~~~
  13654. *
  13655. * \param [in] a unsigned long type of value stored in a
  13656. * \return value stored in unsigned long type
  13657. */
  13658. __STATIC_FORCEINLINE unsigned long __RV_KABS32(unsigned long a)
  13659. {
  13660. unsigned long result;
  13661. __ASM volatile("kabs32 %0, %1" : "=r"(result) : "r"(a));
  13662. return result;
  13663. }
  13664. /* ===== Inline Function End for 4.4. KABS32 ===== */
  13665. /* ===== Inline Function Start for 4.5. KADD32 ===== */
  13666. /**
  13667. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  13668. * \brief KADD32 (SIMD 32-bit Signed Saturating Addition)
  13669. * \details
  13670. * **Type**: SIMD (RV64 Only)
  13671. *
  13672. * **Syntax**:\n
  13673. * ~~~
  13674. * KADD32 Rd, Rs1, Rs2
  13675. * ~~~
  13676. *
  13677. * **Purpose**:\n
  13678. * Do 32-bit signed integer element saturating additions simultaneously.
  13679. *
  13680. * **Description**:\n
  13681. * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed
  13682. * integer elements in Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1),
  13683. * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
  13684. *
  13685. * **Operations**:\n
  13686. * ~~~
  13687. * res[x] = Rs1.W[x] + Rs2.W[x];
  13688. * if (res[x] > (2^31)-1) {
  13689. * res[x] = (2^31)-1;
  13690. * OV = 1;
  13691. * } else if (res[x] < -2^31) {
  13692. * res[x] = -2^31;
  13693. * OV = 1;
  13694. * }
  13695. * Rd.W[x] = res[x];
  13696. * for RV64: x=1...0
  13697. * ~~~
  13698. *
  13699. * \param [in] a unsigned long type of value stored in a
  13700. * \param [in] b unsigned long type of value stored in b
  13701. * \return value stored in unsigned long type
  13702. */
  13703. __STATIC_FORCEINLINE unsigned long __RV_KADD32(unsigned long a, unsigned long b)
  13704. {
  13705. unsigned long result;
  13706. __ASM volatile("kadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13707. return result;
  13708. }
  13709. /* ===== Inline Function End for 4.5. KADD32 ===== */
  13710. /* ===== Inline Function Start for 4.6. KCRAS32 ===== */
  13711. /**
  13712. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  13713. * \brief KCRAS32 (SIMD 32-bit Signed Saturating Cross Addition & Subtraction)
  13714. * \details
  13715. * **Type**: SIM (RV64 Only)
  13716. *
  13717. * **Syntax**:\n
  13718. * ~~~
  13719. * KCRAS32 Rd, Rs1, Rs2
  13720. * ~~~
  13721. *
  13722. * **Purpose**:\n
  13723. * Do 32-bit signed integer element saturating addition and 32-bit signed integer element
  13724. * saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.
  13725. *
  13726. * **Description**:\n
  13727. * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
  13728. * integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit integer element in [63:32] of
  13729. * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number
  13730. * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated
  13731. * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction.
  13732. *
  13733. * **Operations**:\n
  13734. * ~~~
  13735. * res[1] = Rs1.W[1] + Rs2.W[0];
  13736. * res[0] = Rs1.W[0] - Rs2.W[1];
  13737. * if (res[x] > (2^31)-1) {
  13738. * res[x] = (2^31)-1;
  13739. * OV = 1;
  13740. * } else if (res < -2^31) {
  13741. * res[x] = -2^31;
  13742. * OV = 1;
  13743. * }
  13744. * Rd.W[1] = res[1];
  13745. * Rd.W[0] = res[0];
  13746. * for RV64, x=1...0
  13747. * ~~~
  13748. *
  13749. * \param [in] a unsigned long type of value stored in a
  13750. * \param [in] b unsigned long type of value stored in b
  13751. * \return value stored in unsigned long type
  13752. */
  13753. __STATIC_FORCEINLINE unsigned long __RV_KCRAS32(unsigned long a, unsigned long b)
  13754. {
  13755. unsigned long result;
  13756. __ASM volatile("kcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13757. return result;
  13758. }
  13759. /* ===== Inline Function End for 4.6. KCRAS32 ===== */
  13760. /* ===== Inline Function Start for 4.7. KCRSA32 ===== */
  13761. /**
  13762. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  13763. * \brief KCRSA32 (SIMD 32-bit Signed Saturating Cross Subtraction & Addition)
  13764. * \details
  13765. * **Type**: SIMD (RV64 Only)
  13766. *
  13767. * **Syntax**:\n
  13768. * ~~~
  13769. * KCRSA32 Rd, Rs1, Rs2
  13770. * ~~~
  13771. *
  13772. * **Purpose**:\n
  13773. * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element
  13774. * saturating addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.
  13775. * *Description: *
  13776. * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element
  13777. * in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit
  13778. * integer element in [63:32] of Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31
  13779. * <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
  13780. * [63:32] of Rd for subtraction and [31:0] of Rd for addition.
  13781. *
  13782. * **Operations**:\n
  13783. * ~~~
  13784. * res[1] = Rs1.W[1] - Rs2.W[0];
  13785. * res[0] = Rs1.W[0] + Rs2.W[1];
  13786. * if (res[x] > (2^31)-1) {
  13787. * res[x] = (2^31)-1;
  13788. * OV = 1;
  13789. * } else if (res < -2^31) {
  13790. * res[x] = -2^31;
  13791. * OV = 1;
  13792. * }
  13793. * Rd.W[1] = res[1];
  13794. * Rd.W[0] = res[0];
  13795. * for RV64, x=1...0
  13796. * ~~~
  13797. *
  13798. * \param [in] a unsigned long type of value stored in a
  13799. * \param [in] b unsigned long type of value stored in b
  13800. * \return value stored in unsigned long type
  13801. */
  13802. __STATIC_FORCEINLINE unsigned long __RV_KCRSA32(unsigned long a, unsigned long b)
  13803. {
  13804. unsigned long result;
  13805. __ASM volatile("kcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13806. return result;
  13807. }
  13808. /* ===== Inline Function End for 4.7. KCRSA32 ===== */
  13809. /* ===== Inline Function Start for 4.8.1. KDMBB16 ===== */
  13810. /**
  13811. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  13812. * \brief KDMBB16 (SIMD Signed Saturating Double Multiply B16 x B16)
  13813. * \details
  13814. * **Type**: SIMD (RV64 only)
  13815. *
  13816. * **Syntax**:\n
  13817. * ~~~
  13818. * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  13819. * ~~~
  13820. *
  13821. * **Purpose**:\n
  13822. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  13823. * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
  13824. * in the destination register. If saturation happens, an overflow flag OV will be set.
  13825. *
  13826. * **Description**:\n
  13827. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  13828. * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
  13829. * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
  13830. * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
  13831. * and the overflow flag OV will be set.
  13832. *
  13833. * **Operations**:\n
  13834. * ~~~
  13835. * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
  13836. * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
  13837. * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
  13838. * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
  13839. * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
  13840. * Mresult[z] = aop[z] * bop[z];
  13841. * resQ31[z] = Mresult[z] << 1;
  13842. * } else {
  13843. * resQ31[z] = 0x7FFFFFFF;
  13844. * OV = 1;
  13845. * }
  13846. * Rd.W[z] = resQ31[z];
  13847. * ~~~
  13848. *
  13849. * \param [in] a unsigned long type of value stored in a
  13850. * \param [in] b unsigned long type of value stored in b
  13851. * \return value stored in unsigned long type
  13852. */
  13853. __STATIC_FORCEINLINE unsigned long __RV_KDMBB16(unsigned long a, unsigned long b)
  13854. {
  13855. unsigned long result;
  13856. __ASM volatile("kdmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13857. return result;
  13858. }
  13859. /* ===== Inline Function End for 4.8.1. KDMBB16 ===== */
  13860. /* ===== Inline Function Start for 4.8.2. KDMBT16 ===== */
  13861. /**
  13862. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  13863. * \brief KDMBT16 (SIMD Signed Saturating Double Multiply B16 x T16)
  13864. * \details
  13865. * **Type**: SIMD (RV64 only)
  13866. *
  13867. * **Syntax**:\n
  13868. * ~~~
  13869. * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  13870. * ~~~
  13871. *
  13872. * **Purpose**:\n
  13873. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  13874. * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
  13875. * in the destination register. If saturation happens, an overflow flag OV will be set.
  13876. *
  13877. * **Description**:\n
  13878. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  13879. * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
  13880. * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
  13881. * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
  13882. * and the overflow flag OV will be set.
  13883. *
  13884. * **Operations**:\n
  13885. * ~~~
  13886. * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
  13887. * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
  13888. * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
  13889. * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
  13890. * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
  13891. * Mresult[z] = aop[z] * bop[z];
  13892. * resQ31[z] = Mresult[z] << 1;
  13893. * } else {
  13894. * resQ31[z] = 0x7FFFFFFF;
  13895. * OV = 1;
  13896. * }
  13897. * Rd.W[z] = resQ31[z];
  13898. * ~~~
  13899. *
  13900. * \param [in] a unsigned long type of value stored in a
  13901. * \param [in] b unsigned long type of value stored in b
  13902. * \return value stored in unsigned long type
  13903. */
  13904. __STATIC_FORCEINLINE unsigned long __RV_KDMBT16(unsigned long a, unsigned long b)
  13905. {
  13906. unsigned long result;
  13907. __ASM volatile("kdmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13908. return result;
  13909. }
  13910. /* ===== Inline Function End for 4.8.2. KDMBT16 ===== */
  13911. /* ===== Inline Function Start for 4.8.3. KDMTT16 ===== */
  13912. /**
  13913. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  13914. * \brief KDMTT16 (SIMD Signed Saturating Double Multiply T16 x T16)
  13915. * \details
  13916. * **Type**: SIMD (RV64 only)
  13917. *
  13918. * **Syntax**:\n
  13919. * ~~~
  13920. * KDMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  13921. * ~~~
  13922. *
  13923. * **Purpose**:\n
  13924. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  13925. * of the 32-bit chunks in registers and then double and saturate the Q31 results into the 32-bit chunks
  13926. * in the destination register. If saturation happens, an overflow flag OV will be set.
  13927. *
  13928. * **Description**:\n
  13929. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  13930. * or bottom 16-bit Q15 content of the 32-bit portions in Rs2. The Q30 results are then doubled and
  13931. * saturated into Q31 values. The Q31 values are then written into the 32-bit chunks in Rd. When both
  13932. * the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated to 0x7FFFFFFF
  13933. * and the overflow flag OV will be set.
  13934. *
  13935. * **Operations**:\n
  13936. * ~~~
  13937. * // KDMBB16: (x,y,z)=(0,0,0),(2,2,1)
  13938. * // KDMBT16: (x,y,z)=(0,1,0),(2,3,1)
  13939. * // KDMTT16: (x,y,z)=(1,1,0),(3,3,1)
  13940. * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
  13941. * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
  13942. * Mresult[z] = aop[z] * bop[z];
  13943. * resQ31[z] = Mresult[z] << 1;
  13944. * } else {
  13945. * resQ31[z] = 0x7FFFFFFF;
  13946. * OV = 1;
  13947. * }
  13948. * Rd.W[z] = resQ31[z];
  13949. * ~~~
  13950. *
  13951. * \param [in] a unsigned long type of value stored in a
  13952. * \param [in] b unsigned long type of value stored in b
  13953. * \return value stored in unsigned long type
  13954. */
  13955. __STATIC_FORCEINLINE unsigned long __RV_KDMTT16(unsigned long a, unsigned long b)
  13956. {
  13957. unsigned long result;
  13958. __ASM volatile("kdmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  13959. return result;
  13960. }
  13961. /* ===== Inline Function End for 4.8.3. KDMTT16 ===== */
  13962. /* ===== Inline Function Start for 4.9.1. KDMABB16 ===== */
  13963. /**
  13964. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  13965. * \brief KDMABB16 (SIMD Signed Saturating Double Multiply Addition B16 x B16)
  13966. * \details
  13967. * **Type**: SIMD (RV64 only)
  13968. *
  13969. * **Syntax**:\n
  13970. * ~~~
  13971. * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  13972. * ~~~
  13973. *
  13974. * **Purpose**:\n
  13975. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  13976. * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
  13977. * the values of the corresponding 32-bit chunks from the destination register and write the saturated
  13978. * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
  13979. * happens, an overflow flag OV will be set.
  13980. *
  13981. * **Description**:\n
  13982. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  13983. * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
  13984. * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
  13985. * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
  13986. * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
  13987. * are written back to Rd.
  13988. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
  13989. * set.
  13990. *
  13991. * **Operations**:\n
  13992. * ~~~
  13993. * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
  13994. * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
  13995. * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
  13996. * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
  13997. * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
  13998. * Mresult[z] = aop[z] * bop[z];
  13999. * resQ31[z] = Mresult[z] << 1;
  14000. * } else {
  14001. * resQ31[z] = 0x7FFFFFFF;
  14002. * OV = 1;
  14003. * }
  14004. * resadd[z] = Rd.W[z] + resQ31[z];
  14005. * if (resadd[z] > (2^31)-1) {
  14006. * resadd[z] = (2^31)-1;
  14007. * OV = 1;
  14008. * } else if (resadd[z] < -2^31) {
  14009. * resadd[z] = -2^31;
  14010. * OV = 1;
  14011. * }
  14012. * Rd.W[z] = resadd[z];
  14013. * ~~~
  14014. *
  14015. * \param [in] t unsigned long type of value stored in t
  14016. * \param [in] a unsigned long type of value stored in a
  14017. * \param [in] b unsigned long type of value stored in b
  14018. * \return value stored in unsigned long type
  14019. */
  14020. __STATIC_FORCEINLINE unsigned long __RV_KDMABB16(unsigned long t, unsigned long a, unsigned long b)
  14021. {
  14022. __ASM volatile("kdmabb16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14023. return t;
  14024. }
  14025. /* ===== Inline Function End for 4.9.1. KDMABB16 ===== */
  14026. /* ===== Inline Function Start for 4.9.2. KDMABT16 ===== */
  14027. /**
  14028. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  14029. * \brief KDMABT16 (SIMD Signed Saturating Double Multiply Addition B16 x T16)
  14030. * \details
  14031. * **Type**: SIMD (RV64 only)
  14032. *
  14033. * **Syntax**:\n
  14034. * ~~~
  14035. * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  14036. * ~~~
  14037. *
  14038. * **Purpose**:\n
  14039. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  14040. * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
  14041. * the values of the corresponding 32-bit chunks from the destination register and write the saturated
  14042. * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
  14043. * happens, an overflow flag OV will be set.
  14044. *
  14045. * **Description**:\n
  14046. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  14047. * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
  14048. * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
  14049. * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
  14050. * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
  14051. * are written back to Rd.
  14052. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
  14053. * set.
  14054. *
  14055. * **Operations**:\n
  14056. * ~~~
  14057. * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
  14058. * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
  14059. * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
  14060. * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
  14061. * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
  14062. * Mresult[z] = aop[z] * bop[z];
  14063. * resQ31[z] = Mresult[z] << 1;
  14064. * } else {
  14065. * resQ31[z] = 0x7FFFFFFF;
  14066. * OV = 1;
  14067. * }
  14068. * resadd[z] = Rd.W[z] + resQ31[z];
  14069. * if (resadd[z] > (2^31)-1) {
  14070. * resadd[z] = (2^31)-1;
  14071. * OV = 1;
  14072. * } else if (resadd[z] < -2^31) {
  14073. * resadd[z] = -2^31;
  14074. * OV = 1;
  14075. * }
  14076. * Rd.W[z] = resadd[z];
  14077. * ~~~
  14078. *
  14079. * \param [in] t unsigned long type of value stored in t
  14080. * \param [in] a unsigned long type of value stored in a
  14081. * \param [in] b unsigned long type of value stored in b
  14082. * \return value stored in unsigned long type
  14083. */
  14084. __STATIC_FORCEINLINE unsigned long __RV_KDMABT16(unsigned long t, unsigned long a, unsigned long b)
  14085. {
  14086. __ASM volatile("kdmabt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14087. return t;
  14088. }
  14089. /* ===== Inline Function End for 4.9.2. KDMABT16 ===== */
  14090. /* ===== Inline Function Start for 4.9.3. KDMATT16 ===== */
  14091. /**
  14092. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  14093. * \brief KDMATT16 (SIMD Signed Saturating Double Multiply Addition T16 x T16)
  14094. * \details
  14095. * **Type**: SIMD (RV64 only)
  14096. *
  14097. * **Syntax**:\n
  14098. * ~~~
  14099. * KDMAxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  14100. * ~~~
  14101. *
  14102. * **Purpose**:\n
  14103. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  14104. * of the 32-bit chunks in registers and then double and saturate the Q31 results, add the results with
  14105. * the values of the corresponding 32-bit chunks from the destination register and write the saturated
  14106. * addition results back into the corresponding 32-bit chunks of the destination register. If saturation
  14107. * happens, an overflow flag OV will be set.
  14108. *
  14109. * **Description**:\n
  14110. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  14111. * or bottom 16-bit Q15 content of the corresponding 32-bit portions in Rs2. The Q30 results are then
  14112. * doubled and saturated into Q31 values. The Q31 values are then added with the content of the
  14113. * corresponding 32-bit portions of Rd. If the addition results are beyond the Q31 number range (-2^31 <=
  14114. * Q31 <= 2^31-1), they are saturated to the range and the OV flag is set to 1. The results after saturation
  14115. * are written back to Rd.
  14116. * When both the two Q15 inputs are 0x8000, saturation will happen and the overflow flag OV will be
  14117. * set.
  14118. *
  14119. * **Operations**:\n
  14120. * ~~~
  14121. * // KDMABB16: (x,y,z)=(0,0,0),(2,2,1)
  14122. * // KDMABT16: (x,y,z)=(0,1,0),(2,3,1)
  14123. * // KDMATT16: (x,y,z)=(1,1,0),(3,3,1)
  14124. * aop[z] = Rs1.H[x]; bop[z] = Rs2.H[y];
  14125. * If (0x8000 != aop[z] | 0x8000 != bop[z]) {
  14126. * Mresult[z] = aop[z] * bop[z];
  14127. * resQ31[z] = Mresult[z] << 1;
  14128. * } else {
  14129. * resQ31[z] = 0x7FFFFFFF;
  14130. * OV = 1;
  14131. * }
  14132. * resadd[z] = Rd.W[z] + resQ31[z];
  14133. * if (resadd[z] > (2^31)-1) {
  14134. * resadd[z] = (2^31)-1;
  14135. * OV = 1;
  14136. * } else if (resadd[z] < -2^31) {
  14137. * resadd[z] = -2^31;
  14138. * OV = 1;
  14139. * }
  14140. * Rd.W[z] = resadd[z];
  14141. * ~~~
  14142. *
  14143. * \param [in] t unsigned long type of value stored in t
  14144. * \param [in] a unsigned long type of value stored in a
  14145. * \param [in] b unsigned long type of value stored in b
  14146. * \return value stored in unsigned long type
  14147. */
  14148. __STATIC_FORCEINLINE unsigned long __RV_KDMATT16(unsigned long t, unsigned long a, unsigned long b)
  14149. {
  14150. __ASM volatile("kdmatt16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14151. return t;
  14152. }
  14153. /* ===== Inline Function End for 4.9.3. KDMATT16 ===== */
  14154. /* ===== Inline Function Start for 4.10.1. KHMBB16 ===== */
  14155. /**
  14156. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  14157. * \brief KHMBB16 (SIMD Signed Saturating Half Multiply B16 x B16)
  14158. * \details
  14159. * **Type**: SIMD (RV64 Only)
  14160. *
  14161. * **Syntax**:\n
  14162. * ~~~
  14163. * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  14164. * ~~~
  14165. *
  14166. * **Purpose**:\n
  14167. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  14168. * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
  14169. * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
  14170. * overflow flag OV will be set.
  14171. *
  14172. * **Description**:\n
  14173. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  14174. * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
  14175. * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
  14176. * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
  14177. * to 0x7FFF and the overflow flag OV will be set.
  14178. *
  14179. * **Operations**:\n
  14180. * ~~~
  14181. * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
  14182. * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
  14183. * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
  14184. * aop = Rs1.H[x]; bop = Rs2.H[y];
  14185. * If (0x8000 != aop | 0x8000 != bop) {
  14186. * Mresult[31:0] = aop * bop;
  14187. * res[15:0] = Mresult[30:15];
  14188. * } else {
  14189. * res[15:0] = 0x7FFF;
  14190. * OV = 1;
  14191. * }
  14192. * Rd.W[z] = SE32(res[15:0]);
  14193. * ~~~
  14194. *
  14195. * \param [in] a unsigned long type of value stored in a
  14196. * \param [in] b unsigned long type of value stored in b
  14197. * \return value stored in unsigned long type
  14198. */
  14199. __STATIC_FORCEINLINE unsigned long __RV_KHMBB16(unsigned long a, unsigned long b)
  14200. {
  14201. unsigned long result;
  14202. __ASM volatile("khmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  14203. return result;
  14204. }
  14205. /* ===== Inline Function End for 4.10.1. KHMBB16 ===== */
  14206. /* ===== Inline Function Start for 4.10.2. KHMBT16 ===== */
  14207. /**
  14208. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  14209. * \brief KHMBT16 (SIMD Signed Saturating Half Multiply B16 x T16)
  14210. * \details
  14211. * **Type**: SIMD (RV64 Only)
  14212. *
  14213. * **Syntax**:\n
  14214. * ~~~
  14215. * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  14216. * ~~~
  14217. *
  14218. * **Purpose**:\n
  14219. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  14220. * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
  14221. * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
  14222. * overflow flag OV will be set.
  14223. *
  14224. * **Description**:\n
  14225. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  14226. * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
  14227. * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
  14228. * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
  14229. * to 0x7FFF and the overflow flag OV will be set.
  14230. *
  14231. * **Operations**:\n
  14232. * ~~~
  14233. * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
  14234. * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
  14235. * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
  14236. * aop = Rs1.H[x]; bop = Rs2.H[y];
  14237. * If (0x8000 != aop | 0x8000 != bop) {
  14238. * Mresult[31:0] = aop * bop;
  14239. * res[15:0] = Mresult[30:15];
  14240. * } else {
  14241. * res[15:0] = 0x7FFF;
  14242. * OV = 1;
  14243. * }
  14244. * Rd.W[z] = SE32(res[15:0]);
  14245. * ~~~
  14246. *
  14247. * \param [in] a unsigned long type of value stored in a
  14248. * \param [in] b unsigned long type of value stored in b
  14249. * \return value stored in unsigned long type
  14250. */
  14251. __STATIC_FORCEINLINE unsigned long __RV_KHMBT16(unsigned long a, unsigned long b)
  14252. {
  14253. unsigned long result;
  14254. __ASM volatile("khmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  14255. return result;
  14256. }
  14257. /* ===== Inline Function End for 4.10.2. KHMBT16 ===== */
  14258. /* ===== Inline Function Start for 4.10.3. KHMTT16 ===== */
  14259. /**
  14260. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_Q15_SAT_MULT
  14261. * \brief KHMTT16 (SIMD Signed Saturating Half Multiply T16 x T16)
  14262. * \details
  14263. * **Type**: SIMD (RV64 Only)
  14264. *
  14265. * **Syntax**:\n
  14266. * ~~~
  14267. * KHMxy16 Rd, Rs1, Rs2 (xy = BB, BT, TT)
  14268. * ~~~
  14269. *
  14270. * **Purpose**:\n
  14271. * Multiply the signed Q15 integer contents of two 16-bit data in the corresponding portion
  14272. * of the 32-bit chunks in registers and then right-shift 15 bits to turn the Q30 results into Q15
  14273. * numbers again and saturate the Q15 results into the destination register. If saturation happens, an
  14274. * overflow flag OV will be set.
  14275. *
  14276. * **Description**:\n
  14277. * Multiply the top or bottom 16-bit Q15 content of the 32-bit portions in Rs1 with the top
  14278. * or bottom 16-bit Q15 content of the 32-bit portion in Rs2. The Q30 results are then right-shifted 15-
  14279. * bits and saturated into Q15 values. The 32-bit Q15 values are then written into the 32-bit chunks in
  14280. * Rd. When both the two Q15 inputs are 0x8000, saturation will happen. The result will be saturated
  14281. * to 0x7FFF and the overflow flag OV will be set.
  14282. *
  14283. * **Operations**:\n
  14284. * ~~~
  14285. * // KHMBB16: (x,y,z)=(0,0,0),(2,2,1)
  14286. * // KHMBT16: (x,y,z)=(0,1,0),(2,3,1)
  14287. * // KHMTT16: (x,y,z)=(1,1,0),(3,3,1)
  14288. * aop = Rs1.H[x]; bop = Rs2.H[y];
  14289. * If (0x8000 != aop | 0x8000 != bop) {
  14290. * Mresult[31:0] = aop * bop;
  14291. * res[15:0] = Mresult[30:15];
  14292. * } else {
  14293. * res[15:0] = 0x7FFF;
  14294. * OV = 1;
  14295. * }
  14296. * Rd.W[z] = SE32(res[15:0]);
  14297. * ~~~
  14298. *
  14299. * \param [in] a unsigned long type of value stored in a
  14300. * \param [in] b unsigned long type of value stored in b
  14301. * \return value stored in unsigned long type
  14302. */
  14303. __STATIC_FORCEINLINE unsigned long __RV_KHMTT16(unsigned long a, unsigned long b)
  14304. {
  14305. unsigned long result;
  14306. __ASM volatile("khmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  14307. return result;
  14308. }
  14309. /* ===== Inline Function End for 4.10.3. KHMTT16 ===== */
  14310. /* ===== Inline Function Start for 4.11.1. KMABB32 ===== */
  14311. /**
  14312. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
  14313. * \brief KMABB32 (Saturating Signed Multiply Bottom Words & Add)
  14314. * \details
  14315. * **Type**: DSP (RV64 Only)
  14316. *
  14317. * **Syntax**:\n
  14318. * ~~~
  14319. * KMABB32 Rd, Rs1, Rs2
  14320. * KMABT32 Rd, Rs1, Rs2
  14321. * KMATT32 Rd, Rs1, Rs2
  14322. * ~~~
  14323. *
  14324. * **Purpose**:\n
  14325. * Multiply the signed 32-bit element in a register with the 32-bit element in another register
  14326. * and add the result to the content of 64-bit data in the third register. The addition result may be
  14327. * saturated and is written to the third register.
  14328. * * KMABB32: rd + bottom*bottom
  14329. * * KMABT32: rd + bottom*top
  14330. * * KMATT32: rd + top*top
  14331. *
  14332. * **Description**:\n
  14333. * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
  14334. * element in Rs2.
  14335. * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
  14336. * element in Rs2.
  14337. * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
  14338. * element in Rs2.
  14339. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
  14340. * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
  14341. * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
  14342. * integers.
  14343. *
  14344. * **Operations**:\n
  14345. * ~~~
  14346. * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
  14347. * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
  14348. * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
  14349. * if (res > (2^63)-1) {
  14350. * res = (2^63)-1;
  14351. * OV = 1;
  14352. * } else if (res < -2^63) {
  14353. * res = -2^63;
  14354. * OV = 1;
  14355. * }
  14356. * Rd = res;
  14357. * *Exceptions:* None
  14358. * ~~~
  14359. *
  14360. * \param [in] t long type of value stored in t
  14361. * \param [in] a unsigned long type of value stored in a
  14362. * \param [in] b unsigned long type of value stored in b
  14363. * \return value stored in long type
  14364. */
  14365. __STATIC_FORCEINLINE long __RV_KMABB32(long t, unsigned long a, unsigned long b)
  14366. {
  14367. __ASM volatile("kmabb32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14368. return t;
  14369. }
  14370. /* ===== Inline Function End for 4.11.1. KMABB32 ===== */
  14371. /* ===== Inline Function Start for 4.11.2. KMABT32 ===== */
  14372. /**
  14373. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
  14374. * \brief KMABT32 (Saturating Signed Multiply Bottom & Top Words & Add)
  14375. * \details
  14376. * **Type**: DSP (RV64 Only)
  14377. *
  14378. * **Syntax**:\n
  14379. * ~~~
  14380. * KMABB32 Rd, Rs1, Rs2
  14381. * KMABT32 Rd, Rs1, Rs2
  14382. * KMATT32 Rd, Rs1, Rs2
  14383. * ~~~
  14384. *
  14385. * **Purpose**:\n
  14386. * Multiply the signed 32-bit element in a register with the 32-bit element in another register
  14387. * and add the result to the content of 64-bit data in the third register. The addition result may be
  14388. * saturated and is written to the third register.
  14389. * * KMABB32: rd + bottom*bottom
  14390. * * KMABT32: rd + bottom*top
  14391. * * KMATT32: rd + top*top
  14392. *
  14393. * **Description**:\n
  14394. * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
  14395. * element in Rs2.
  14396. * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
  14397. * element in Rs2.
  14398. * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
  14399. * element in Rs2.
  14400. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
  14401. * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
  14402. * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
  14403. * integers.
  14404. *
  14405. * **Operations**:\n
  14406. * ~~~
  14407. * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
  14408. * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
  14409. * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
  14410. * if (res > (2^63)-1) {
  14411. * res = (2^63)-1;
  14412. * OV = 1;
  14413. * } else if (res < -2^63) {
  14414. * res = -2^63;
  14415. * OV = 1;
  14416. * }
  14417. * Rd = res;
  14418. * *Exceptions:* None
  14419. * ~~~
  14420. *
  14421. * \param [in] t long type of value stored in t
  14422. * \param [in] a unsigned long type of value stored in a
  14423. * \param [in] b unsigned long type of value stored in b
  14424. * \return value stored in long type
  14425. */
  14426. __STATIC_FORCEINLINE long __RV_KMABT32(long t, unsigned long a, unsigned long b)
  14427. {
  14428. __ASM volatile("kmabt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14429. return t;
  14430. }
  14431. /* ===== Inline Function End for 4.11.2. KMABT32 ===== */
  14432. /* ===== Inline Function Start for 4.11.3. KMATT32 ===== */
  14433. /**
  14434. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT_ADD
  14435. * \brief KMATT32 (Saturating Signed Multiply Top Words & Add)
  14436. * \details
  14437. * **Type**: DSP (RV64 Only)
  14438. *
  14439. * **Syntax**:\n
  14440. * ~~~
  14441. * KMABB32 Rd, Rs1, Rs2
  14442. * KMABT32 Rd, Rs1, Rs2
  14443. * KMATT32 Rd, Rs1, Rs2
  14444. * ~~~
  14445. *
  14446. * **Purpose**:\n
  14447. * Multiply the signed 32-bit element in a register with the 32-bit element in another register
  14448. * and add the result to the content of 64-bit data in the third register. The addition result may be
  14449. * saturated and is written to the third register.
  14450. * * KMABB32: rd + bottom*bottom
  14451. * * KMABT32: rd + bottom*top
  14452. * * KMATT32: rd + top*top
  14453. *
  14454. * **Description**:\n
  14455. * For the `KMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
  14456. * element in Rs2.
  14457. * For the `KMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
  14458. * element in Rs2.
  14459. * For the `KMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
  14460. * element in Rs2.
  14461. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond
  14462. * the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The
  14463. * result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
  14464. * integers.
  14465. *
  14466. * **Operations**:\n
  14467. * ~~~
  14468. * res = Rd + (Rs1.W[0] * Rs2.W[0]); // KMABB32
  14469. * res = Rd + (Rs1.W[0] * Rs2.W[1]); // KMABT32
  14470. * res = Rd + (Rs1.W[1] * Rs2.W[1]); // KMATT32
  14471. * if (res > (2^63)-1) {
  14472. * res = (2^63)-1;
  14473. * OV = 1;
  14474. * } else if (res < -2^63) {
  14475. * res = -2^63;
  14476. * OV = 1;
  14477. * }
  14478. * Rd = res;
  14479. * *Exceptions:* None
  14480. * ~~~
  14481. *
  14482. * \param [in] t long type of value stored in t
  14483. * \param [in] a unsigned long type of value stored in a
  14484. * \param [in] b unsigned long type of value stored in b
  14485. * \return value stored in long type
  14486. */
  14487. __STATIC_FORCEINLINE long __RV_KMATT32(long t, unsigned long a, unsigned long b)
  14488. {
  14489. __ASM volatile("kmatt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14490. return t;
  14491. }
  14492. /* ===== Inline Function End for 4.11.3. KMATT32 ===== */
  14493. /* ===== Inline Function Start for 4.12.1. KMADA32 ===== */
  14494. /**
  14495. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14496. * \brief KMADA32 (Saturating Signed Multiply Two Words and Two Adds)
  14497. * \details
  14498. * **Type**: DSP (RV64 Only)
  14499. *
  14500. * **Syntax**:\n
  14501. * ~~~
  14502. * KMADA32 Rd, Rs1, Rs2
  14503. * KMAXDA32 Rd, Rs1, Rs2
  14504. * ~~~
  14505. *
  14506. * **Purpose**:\n
  14507. * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the
  14508. * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated.
  14509. * * KMADA32: rd + top*top + bottom*bottom
  14510. * * KMAXDA32: rd + top*bottom + bottom*top
  14511. *
  14512. * **Description**:\n
  14513. * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-
  14514. * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1
  14515. * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction.
  14516. * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit
  14517. * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
  14518. * with the top 32-bit element in Rs2.
  14519. * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63
  14520. * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit
  14521. * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  14522. *
  14523. * **Operations**:\n
  14524. * ~~~
  14525. * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32
  14526. * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32
  14527. * if (res > (2^63)-1) {
  14528. * res = (2^63)-1;
  14529. * OV = 1;
  14530. * } else if (res < -2^63) {
  14531. * res = -2^63;
  14532. * OV = 1;
  14533. * }
  14534. * Rd = res;
  14535. * ~~~
  14536. *
  14537. * \param [in] t long type of value stored in t
  14538. * \param [in] a unsigned long type of value stored in a
  14539. * \param [in] b unsigned long type of value stored in b
  14540. * \return value stored in long type
  14541. */
  14542. __STATIC_FORCEINLINE long __RV_KMADA32(long t, unsigned long a, unsigned long b)
  14543. {
  14544. __ASM volatile("kmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14545. return t;
  14546. }
  14547. /* ===== Inline Function End for 4.12.1. KMADA32 ===== */
  14548. /* ===== Inline Function Start for 4.12.2. KMAXDA32 ===== */
  14549. /**
  14550. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14551. * \brief KMAXDA32 (Saturating Signed Crossed Multiply Two Words and Two Adds)
  14552. * \details
  14553. * **Type**: DSP (RV64 Only)
  14554. *
  14555. * **Syntax**:\n
  14556. * ~~~
  14557. * KMADA32 Rd, Rs1, Rs2
  14558. * KMAXDA32 Rd, Rs1, Rs2
  14559. * ~~~
  14560. *
  14561. * **Purpose**:\n
  14562. * Do two signed 32-bit multiplications from 32-bit data in two registers; and then adds the
  14563. * two 64-bit results and 64-bit data in a third register together. The addition result may be saturated.
  14564. * * KMADA32: rd + top*top + bottom*bottom
  14565. * * KMAXDA32: rd + top*bottom + bottom*top
  14566. *
  14567. * **Description**:\n
  14568. * For the `KMADA32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-
  14569. * bit element in Rs2 and then adds the result to the result of multiplying the top 32-bit element in Rs1
  14570. * with the top 32-bit element in Rs2. It is actually an alias of the `KMAR64` instruction.
  14571. * For the `KMAXDA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit
  14572. * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
  14573. * with the top 32-bit element in Rs2.
  14574. * The result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63
  14575. * number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The 64-bit
  14576. * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  14577. *
  14578. * **Operations**:\n
  14579. * ~~~
  14580. * res = Rd + (Rs1.W[1] * Rs2.w[1]) + (Rs1.W[0] * Rs2.W[0]); // KMADA32
  14581. * res = Rd + (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMAXDA32
  14582. * if (res > (2^63)-1) {
  14583. * res = (2^63)-1;
  14584. * OV = 1;
  14585. * } else if (res < -2^63) {
  14586. * res = -2^63;
  14587. * OV = 1;
  14588. * }
  14589. * Rd = res;
  14590. * ~~~
  14591. *
  14592. * \param [in] t long type of value stored in t
  14593. * \param [in] a unsigned long type of value stored in a
  14594. * \param [in] b unsigned long type of value stored in b
  14595. * \return value stored in long type
  14596. */
  14597. __STATIC_FORCEINLINE long __RV_KMAXDA32(long t, unsigned long a, unsigned long b)
  14598. {
  14599. __ASM volatile("kmaxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14600. return t;
  14601. }
  14602. /* ===== Inline Function End for 4.12.2. KMAXDA32 ===== */
  14603. /* ===== Inline Function Start for 4.13.1. KMDA32 ===== */
  14604. /**
  14605. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14606. * \brief KMDA32 (Signed Multiply Two Words and Add)
  14607. * \details
  14608. * **Type**: DSP (RV64 Only)
  14609. *
  14610. * **Syntax**:\n
  14611. * ~~~
  14612. * KMDA32 Rd, Rs1, Rs2
  14613. * KMXDA32 Rd, Rs1, Rs2
  14614. * ~~~
  14615. *
  14616. * **Purpose**:\n
  14617. * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
  14618. * adds the two 64-bit results together. The addition result may be saturated.
  14619. * * KMDA32: top*top + bottom*bottom
  14620. * * KMXDA32: top*bottom + bottom*top
  14621. *
  14622. * **Description**:\n
  14623. * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  14624. * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
  14625. * with the top 32-bit element of Rs2.
  14626. * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  14627. * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
  14628. * with the bottom 32-bit element of Rs2.
  14629. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1.
  14630. * The final result is written to Rd. The 32-bit contents are treated as signed integers.
  14631. *
  14632. * **Operations**:\n
  14633. * ~~~
  14634. * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) {
  14635. * Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32
  14636. * Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32
  14637. * } else {
  14638. * Rd = 0x7fffffffffffffff;
  14639. * OV = 1;
  14640. * }
  14641. * ~~~
  14642. *
  14643. * \param [in] a unsigned long type of value stored in a
  14644. * \param [in] b unsigned long type of value stored in b
  14645. * \return value stored in long type
  14646. */
  14647. __STATIC_FORCEINLINE long __RV_KMDA32(unsigned long a, unsigned long b)
  14648. {
  14649. long result;
  14650. __ASM volatile("kmda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  14651. return result;
  14652. }
  14653. /* ===== Inline Function End for 4.13.1. KMDA32 ===== */
  14654. /* ===== Inline Function Start for 4.13.2. KMXDA32 ===== */
  14655. /**
  14656. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14657. * \brief KMXDA32 (Signed Crossed Multiply Two Words and Add)
  14658. * \details
  14659. * **Type**: DSP (RV64 Only)
  14660. *
  14661. * **Syntax**:\n
  14662. * ~~~
  14663. * KMDA32 Rd, Rs1, Rs2
  14664. * KMXDA32 Rd, Rs1, Rs2
  14665. * ~~~
  14666. *
  14667. * **Purpose**:\n
  14668. * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
  14669. * adds the two 64-bit results together. The addition result may be saturated.
  14670. * * KMDA32: top*top + bottom*bottom
  14671. * * KMXDA32: top*bottom + bottom*top
  14672. *
  14673. * **Description**:\n
  14674. * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  14675. * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
  14676. * with the top 32-bit element of Rs2.
  14677. * For the `KMXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  14678. * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
  14679. * with the bottom 32-bit element of Rs2.
  14680. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^63-1.
  14681. * The final result is written to Rd. The 32-bit contents are treated as signed integers.
  14682. *
  14683. * **Operations**:\n
  14684. * ~~~
  14685. * if ((Rs1 != 0x8000000080000000) or (Rs2 != 0x8000000080000000)) {
  14686. * Rd = (Rs1.W[1] * Rs2.W[1]) + (Rs1.W[0] * Rs2.W[0]); // KMDA32
  14687. * Rd = (Rs1.W[1] * Rs2.W[0]) + (Rs1.W[0] * Rs2.W[1]); // KMXDA32
  14688. * } else {
  14689. * Rd = 0x7fffffffffffffff;
  14690. * OV = 1;
  14691. * }
  14692. * ~~~
  14693. *
  14694. * \param [in] a unsigned long type of value stored in a
  14695. * \param [in] b unsigned long type of value stored in b
  14696. * \return value stored in long type
  14697. */
  14698. __STATIC_FORCEINLINE long __RV_KMXDA32(unsigned long a, unsigned long b)
  14699. {
  14700. long result;
  14701. __ASM volatile("kmxda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  14702. return result;
  14703. }
  14704. /* ===== Inline Function End for 4.13.2. KMXDA32 ===== */
  14705. /* ===== Inline Function Start for 4.14.1. KMADS32 ===== */
  14706. /**
  14707. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14708. * \brief KMADS32 (Saturating Signed Multiply Two Words & Subtract & Add)
  14709. * \details
  14710. * **Type**: DSP (RV64 Only)
  14711. *
  14712. * **Syntax**:\n
  14713. * ~~~
  14714. * KMADS32 Rd, Rs1, Rs2
  14715. * KMADRS32 Rd, Rs1, Rs2
  14716. * KMAXDS32 Rd, Rs1, Rs2
  14717. * ~~~
  14718. *
  14719. * **Purpose**:\n
  14720. * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
  14721. * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
  14722. * 64-bit data in a third register. The addition result may be saturated.
  14723. * * KMADS32: rd + (top*top - bottom*bottom)
  14724. * * KMADRS32: rd + (bottom*bottom - top*top)
  14725. * * KMAXDS32: rd + (top*bottom - bottom*top)
  14726. *
  14727. * **Description**:\n
  14728. * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
  14729. * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
  14730. * Rs1 with the top 32-bit element in Rs2.
  14731. * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
  14732. * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  14733. * element in Rs1 with the bottom 32-bit element in Rs2.
  14734. * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
  14735. * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
  14736. * Rs1 with the bottom 32-bit element in Rs2.
  14737. * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
  14738. * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
  14739. * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
  14740. * as signed integers.
  14741. *
  14742. * **Operations**:\n
  14743. * ~~~
  14744. * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
  14745. * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
  14746. * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
  14747. * if (res > (2^63)-1) {
  14748. * res = (2^63)-1;
  14749. * OV = 1;
  14750. * } else if (res < -2^63) {
  14751. * res = -2^63;
  14752. * OV = 1;
  14753. * }
  14754. * Rd = res;
  14755. * ~~~
  14756. *
  14757. * \param [in] t long type of value stored in t
  14758. * \param [in] a unsigned long type of value stored in a
  14759. * \param [in] b unsigned long type of value stored in b
  14760. * \return value stored in long type
  14761. */
  14762. __STATIC_FORCEINLINE long __RV_KMADS32(long t, unsigned long a, unsigned long b)
  14763. {
  14764. __ASM volatile("kmads32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14765. return t;
  14766. }
  14767. /* ===== Inline Function End for 4.14.1. KMADS32 ===== */
  14768. /* ===== Inline Function Start for 4.14.2. KMADRS32 ===== */
  14769. /**
  14770. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14771. * \brief KMADRS32 (Saturating Signed Multiply Two Words & Reverse Subtract & Add)
  14772. * \details
  14773. * **Type**: DSP (RV64 Only)
  14774. *
  14775. * **Syntax**:\n
  14776. * ~~~
  14777. * KMADS32 Rd, Rs1, Rs2
  14778. * KMADRS32 Rd, Rs1, Rs2
  14779. * KMAXDS32 Rd, Rs1, Rs2
  14780. * ~~~
  14781. *
  14782. * **Purpose**:\n
  14783. * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
  14784. * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
  14785. * 64-bit data in a third register. The addition result may be saturated.
  14786. * * KMADS32: rd + (top*top - bottom*bottom)
  14787. * * KMADRS32: rd + (bottom*bottom - top*top)
  14788. * * KMAXDS32: rd + (top*bottom - bottom*top)
  14789. *
  14790. * **Description**:\n
  14791. * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
  14792. * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
  14793. * Rs1 with the top 32-bit element in Rs2.
  14794. * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
  14795. * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  14796. * element in Rs1 with the bottom 32-bit element in Rs2.
  14797. * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
  14798. * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
  14799. * Rs1 with the bottom 32-bit element in Rs2.
  14800. * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
  14801. * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
  14802. * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
  14803. * as signed integers.
  14804. *
  14805. * **Operations**:\n
  14806. * ~~~
  14807. * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
  14808. * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
  14809. * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
  14810. * if (res > (2^63)-1) {
  14811. * res = (2^63)-1;
  14812. * OV = 1;
  14813. * } else if (res < -2^63) {
  14814. * res = -2^63;
  14815. * OV = 1;
  14816. * }
  14817. * Rd = res;
  14818. * ~~~
  14819. *
  14820. * \param [in] t long type of value stored in t
  14821. * \param [in] a unsigned long type of value stored in a
  14822. * \param [in] b unsigned long type of value stored in b
  14823. * \return value stored in long type
  14824. */
  14825. __STATIC_FORCEINLINE long __RV_KMADRS32(long t, unsigned long a, unsigned long b)
  14826. {
  14827. __ASM volatile("kmadrs32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14828. return t;
  14829. }
  14830. /* ===== Inline Function End for 4.14.2. KMADRS32 ===== */
  14831. /* ===== Inline Function Start for 4.14.3. KMAXDS32 ===== */
  14832. /**
  14833. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14834. * \brief KMAXDS32 (Saturating Signed Crossed Multiply Two Words & Subtract & Add)
  14835. * \details
  14836. * **Type**: DSP (RV64 Only)
  14837. *
  14838. * **Syntax**:\n
  14839. * ~~~
  14840. * KMADS32 Rd, Rs1, Rs2
  14841. * KMADRS32 Rd, Rs1, Rs2
  14842. * KMAXDS32 Rd, Rs1, Rs2
  14843. * ~~~
  14844. *
  14845. * **Purpose**:\n
  14846. * Do two signed 32-bit multiplications from 32-bit elements in two registers; and then
  14847. * perform a subtraction operation between the two 64-bit results. Then add the subtraction result to
  14848. * 64-bit data in a third register. The addition result may be saturated.
  14849. * * KMADS32: rd + (top*top - bottom*bottom)
  14850. * * KMADRS32: rd + (bottom*bottom - top*top)
  14851. * * KMAXDS32: rd + (top*bottom - bottom*top)
  14852. *
  14853. * **Description**:\n
  14854. * For the `KMADS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit
  14855. * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
  14856. * Rs1 with the top 32-bit element in Rs2.
  14857. * For the `KMADRS32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit
  14858. * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  14859. * element in Rs1 with the bottom 32-bit element in Rs2.
  14860. * For the `KMAXDS32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit
  14861. * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
  14862. * Rs1 with the bottom 32-bit element in Rs2.
  14863. * The subtraction result is then added to the content of 64-bit data in Rd. If the addition result is
  14864. * beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to
  14865. * 1. The 64-bit result after saturation is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated
  14866. * as signed integers.
  14867. *
  14868. * **Operations**:\n
  14869. * ~~~
  14870. * res = Rd + (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMADS32
  14871. * res = Rd + (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // KMADRS32
  14872. * res = Rd + (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMAXDS32
  14873. * if (res > (2^63)-1) {
  14874. * res = (2^63)-1;
  14875. * OV = 1;
  14876. * } else if (res < -2^63) {
  14877. * res = -2^63;
  14878. * OV = 1;
  14879. * }
  14880. * Rd = res;
  14881. * ~~~
  14882. *
  14883. * \param [in] t long type of value stored in t
  14884. * \param [in] a unsigned long type of value stored in a
  14885. * \param [in] b unsigned long type of value stored in b
  14886. * \return value stored in long type
  14887. */
  14888. __STATIC_FORCEINLINE long __RV_KMAXDS32(long t, unsigned long a, unsigned long b)
  14889. {
  14890. __ASM volatile("kmaxds32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14891. return t;
  14892. }
  14893. /* ===== Inline Function End for 4.14.3. KMAXDS32 ===== */
  14894. /* ===== Inline Function Start for 4.15.1. KMSDA32 ===== */
  14895. /**
  14896. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14897. * \brief KMSDA32 (Saturating Signed Multiply Two Words & Add & Subtract)
  14898. * \details
  14899. * **Type**: DSP (RV64 Only)
  14900. *
  14901. * **Syntax**:\n
  14902. * ~~~
  14903. * KMSDA32 Rd, Rs1, Rs2
  14904. * KMSXDA32 Rd, Rs1, Rs2
  14905. * ~~~
  14906. *
  14907. * **Purpose**:\n
  14908. * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
  14909. * subtracts the two 64-bit results from a third register. The subtraction result may be saturated.
  14910. * * KMSDA: rd - top*top - bottom*bottom
  14911. * * KMSXDA: rd - top*bottom - bottom*top
  14912. *
  14913. * **Description**:\n
  14914. * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  14915. * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
  14916. * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  14917. * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
  14918. * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction
  14919. * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit
  14920. * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed
  14921. * integers.
  14922. *
  14923. * **Operations**:\n
  14924. * ~~~
  14925. * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32
  14926. * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32
  14927. * if (res > (2^63)-1) {
  14928. * res = (2^63)-1;
  14929. * OV = 1;
  14930. * } else if (res < -2^63) {
  14931. * res = -2^63;
  14932. * OV = 1;
  14933. * }
  14934. * Rd = res;
  14935. * ~~~
  14936. *
  14937. * \param [in] t long type of value stored in t
  14938. * \param [in] a unsigned long type of value stored in a
  14939. * \param [in] b unsigned long type of value stored in b
  14940. * \return value stored in long type
  14941. */
  14942. __STATIC_FORCEINLINE long __RV_KMSDA32(long t, unsigned long a, unsigned long b)
  14943. {
  14944. __ASM volatile("kmsda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14945. return t;
  14946. }
  14947. /* ===== Inline Function End for 4.15.1. KMSDA32 ===== */
  14948. /* ===== Inline Function Start for 4.15.2. KMSXDA32 ===== */
  14949. /**
  14950. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  14951. * \brief KMSXDA32 (Saturating Signed Crossed Multiply Two Words & Add & Subtract)
  14952. * \details
  14953. * **Type**: DSP (RV64 Only)
  14954. *
  14955. * **Syntax**:\n
  14956. * ~~~
  14957. * KMSDA32 Rd, Rs1, Rs2
  14958. * KMSXDA32 Rd, Rs1, Rs2
  14959. * ~~~
  14960. *
  14961. * **Purpose**:\n
  14962. * Do two signed 32-bit multiplications from the 32-bit element of two registers; and then
  14963. * subtracts the two 64-bit results from a third register. The subtraction result may be saturated.
  14964. * * KMSDA: rd - top*top - bottom*bottom
  14965. * * KMSXDA: rd - top*bottom - bottom*top
  14966. *
  14967. * **Description**:\n
  14968. * For the `KMSDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  14969. * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
  14970. * For the `KMSXDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  14971. * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
  14972. * The two 64-bit multiplication results are then subtracted from the content of Rd. If the subtraction
  14973. * result is beyond the Q63 number range (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit
  14974. * is set to 1. The result after saturation is written to Rd. The 32-bit contents are treated as signed
  14975. * integers.
  14976. *
  14977. * **Operations**:\n
  14978. * ~~~
  14979. * res = Rd - (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // KMSDA32
  14980. * res = Rd - (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // KMSXDA32
  14981. * if (res > (2^63)-1) {
  14982. * res = (2^63)-1;
  14983. * OV = 1;
  14984. * } else if (res < -2^63) {
  14985. * res = -2^63;
  14986. * OV = 1;
  14987. * }
  14988. * Rd = res;
  14989. * ~~~
  14990. *
  14991. * \param [in] t long type of value stored in t
  14992. * \param [in] a unsigned long type of value stored in a
  14993. * \param [in] b unsigned long type of value stored in b
  14994. * \return value stored in long type
  14995. */
  14996. __STATIC_FORCEINLINE long __RV_KMSXDA32(long t, unsigned long a, unsigned long b)
  14997. {
  14998. __ASM volatile("kmsxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  14999. return t;
  15000. }
  15001. /* ===== Inline Function End for 4.15.2. KMSXDA32 ===== */
  15002. /* ===== Inline Function Start for 4.16. KSLL32 ===== */
  15003. /**
  15004. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  15005. * \brief KSLL32 (SIMD 32-bit Saturating Shift Left Logical)
  15006. * \details
  15007. * **Type**: SIMD (RV64 Only)
  15008. *
  15009. * **Syntax**:\n
  15010. * ~~~
  15011. * KSLL32 Rd, Rs1, Rs2
  15012. * ~~~
  15013. *
  15014. * **Purpose**:\n
  15015. * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift
  15016. * amount is a variable from a GPR.
  15017. *
  15018. * **Description**:\n
  15019. * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
  15020. * with zero and the shift amount is specified by the low-order 5-bits of the value in the Rs2 register.
  15021. * Any shifted value greater than 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is
  15022. * saturated to -2^31. And the saturated results are written to Rd. If any saturation is performed, set OV
  15023. * bit to 1.
  15024. *
  15025. * **Operations**:\n
  15026. * ~~~
  15027. * sa = Rs2[4:0];
  15028. * if (sa != 0) {
  15029. * res[(31+sa):0] = Rs1.W[x] << sa;
  15030. * if (res > (2^31)-1) {
  15031. * res = 0x7fffffff; OV = 1;
  15032. * } else if (res < -2^31) {
  15033. * res = 0x80000000; OV = 1;
  15034. * }
  15035. * Rd.W[x] = res[31:0];
  15036. * } else {
  15037. * Rd = Rs1;
  15038. * }
  15039. * for RV64: x=1...0
  15040. * ~~~
  15041. *
  15042. * \param [in] a unsigned long type of value stored in a
  15043. * \param [in] b unsigned int type of value stored in b
  15044. * \return value stored in unsigned long type
  15045. */
  15046. __STATIC_FORCEINLINE unsigned long __RV_KSLL32(unsigned long a, unsigned int b)
  15047. {
  15048. unsigned long result;
  15049. __ASM volatile("ksll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15050. return result;
  15051. }
  15052. /* ===== Inline Function End for 4.16. KSLL32 ===== */
  15053. /* ===== Inline Function Start for 4.17. KSLLI32 ===== */
  15054. /**
  15055. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  15056. * \brief KSLLI32 (SIMD 32-bit Saturating Shift Left Logical Immediate)
  15057. * \details
  15058. * **Type**: SIMD (RV64 Only)
  15059. *
  15060. * **Syntax**:\n
  15061. * ~~~
  15062. * KSLLI32 Rd, Rs1, imm5u
  15063. * ~~~
  15064. *
  15065. * **Purpose**:\n
  15066. * Do 32-bit elements logical left shift operations with saturation simultaneously. The shift
  15067. * amount is an immediate value.
  15068. *
  15069. * **Description**:\n
  15070. * The 32-bit data elements in Rs1 are left-shifted logically. The shifted out bits are filled
  15071. * with zero and the shift amount is specified by the imm5u constant. Any shifted value greater than
  15072. * 2^31-1 is saturated to 2^31-1. Any shifted value smaller than -2^31 is saturated to -2^31. And the saturated
  15073. * results are written to Rd. If any saturation is performed, set OV bit to 1.
  15074. *
  15075. * **Operations**:\n
  15076. * ~~~
  15077. * sa = imm5u[4:0];
  15078. * if (sa != 0) {
  15079. * res[(31+sa):0] = Rs1.W[x] << sa;
  15080. * if (res > (2^31)-1) {
  15081. * res = 0x7fffffff; OV = 1;
  15082. * } else if (res < -2^31) {
  15083. * res = 0x80000000; OV = 1;
  15084. * }
  15085. * Rd.W[x] = res[31:0];
  15086. * } else {
  15087. * Rd = Rs1;
  15088. * }
  15089. * for RV64: x=1...0
  15090. * ~~~
  15091. *
  15092. * \param [in] a unsigned long type of value stored in a
  15093. * \param [in] b unsigned int type of value stored in b
  15094. * \return value stored in unsigned long type
  15095. */
  15096. #define __RV_KSLLI32(a, b) \
  15097. ({ \
  15098. unsigned long result; \
  15099. unsigned long __a = (unsigned long)(a); \
  15100. __ASM volatile("kslli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  15101. result; \
  15102. })
  15103. /* ===== Inline Function End for 4.17. KSLLI32 ===== */
  15104. /* ===== Inline Function Start for 4.18.1. KSLRA32 ===== */
  15105. /**
  15106. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  15107. * \brief KSLRA32 (SIMD 32-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
  15108. * \details
  15109. * **Type**: SIMD (RV64 Only)
  15110. *
  15111. * **Syntax**:\n
  15112. * ~~~
  15113. * KSLRA32 Rd, Rs1, Rs2
  15114. * KSLRA32.u Rd, Rs1, Rs2
  15115. * ~~~
  15116. *
  15117. * **Purpose**:\n
  15118. * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  15119. * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the
  15120. * right shift.
  15121. *
  15122. * **Description**:\n
  15123. * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  15124. * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
  15125. * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
  15126. * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be
  15127. * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`.
  15128. * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u`
  15129. * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
  15130. * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
  15131. * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect
  15132. * this instruction.
  15133. *
  15134. * **Operations**:\n
  15135. * ~~~
  15136. * if (Rs2[5:0] < 0) {
  15137. * sa = -Rs2[5:0];
  15138. * sa = (sa == 32)? 31 : sa;
  15139. * if (`.u` form) {
  15140. * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
  15141. * Rd.W[x] = res[31:0];
  15142. * } else {
  15143. * Rd.W[x] = SE32(Rs1.W[x][31:sa]);
  15144. * }
  15145. * } else {
  15146. * sa = Rs2[4:0];
  15147. * res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
  15148. * if (res > (2^31)-1) {
  15149. * res[31:0] = 0x7fffffff; OV = 1;
  15150. * } else if (res < -2^31) {
  15151. * res[31:0] = 0x80000000; OV = 1;
  15152. * }
  15153. * Rd.W[x] = res[31:0];
  15154. * }
  15155. * for RV64: x=1...0
  15156. * ~~~
  15157. *
  15158. * \param [in] a unsigned long type of value stored in a
  15159. * \param [in] b int type of value stored in b
  15160. * \return value stored in unsigned long type
  15161. */
  15162. __STATIC_FORCEINLINE unsigned long __RV_KSLRA32(unsigned long a, int b)
  15163. {
  15164. unsigned long result;
  15165. __ASM volatile("kslra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15166. return result;
  15167. }
  15168. /* ===== Inline Function End for 4.18.1. KSLRA32 ===== */
  15169. /* ===== Inline Function Start for 4.18.2. KSLRA32.u ===== */
  15170. /**
  15171. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  15172. * \brief KSLRA32.u (SIMD 32-bit Shift Left Logical with Saturation or Rounding Shift Right Arithmetic)
  15173. * \details
  15174. * **Type**: SIMD (RV64 Only)
  15175. *
  15176. * **Syntax**:\n
  15177. * ~~~
  15178. * KSLRA32 Rd, Rs1, Rs2
  15179. * KSLRA32.u Rd, Rs1, Rs2
  15180. * ~~~
  15181. *
  15182. * **Purpose**:\n
  15183. * Do 32-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  15184. * Q31 saturation for the left shift. The `.u` form performs additional rounding up operations for the
  15185. * right shift.
  15186. *
  15187. * **Description**:\n
  15188. * The 32-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  15189. * based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-25, 25-1]. A positive Rs2[5:0] means
  15190. * logical left shift and a negative Rs2[5:0] means arithmetic right shift. The shift amount is the
  15191. * absolute value of Rs2[5:0]. However, the behavior of `Rs2[5:0]==-25 (0x20)` is defined to be
  15192. * equivalent to the behavior of `Rs2[5:0]==-(25-1) (0x21)`.
  15193. * The left-shifted results are saturated to the 32-bit signed integer range of [-2^31, 2^31-1]. For the `.u`
  15194. * form of the instruction, the right-shifted results are added a 1 to the most significant discarded bit
  15195. * position for rounding effect. After the shift, saturation, or rounding, the final results are written to
  15196. * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:6] will not affect
  15197. * this instruction.
  15198. *
  15199. * **Operations**:\n
  15200. * ~~~
  15201. * if (Rs2[5:0] < 0) {
  15202. * sa = -Rs2[5:0];
  15203. * sa = (sa == 32)? 31 : sa;
  15204. * if (`.u` form) {
  15205. * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
  15206. * Rd.W[x] = res[31:0];
  15207. * } else {
  15208. * Rd.W[x] = SE32(Rs1.W[x][31:sa]);
  15209. * }
  15210. * } else {
  15211. * sa = Rs2[4:0];
  15212. * res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
  15213. * if (res > (2^31)-1) {
  15214. * res[31:0] = 0x7fffffff; OV = 1;
  15215. * } else if (res < -2^31) {
  15216. * res[31:0] = 0x80000000; OV = 1;
  15217. * }
  15218. * Rd.W[x] = res[31:0];
  15219. * }
  15220. * for RV64: x=1...0
  15221. * ~~~
  15222. *
  15223. * \param [in] a unsigned long type of value stored in a
  15224. * \param [in] b int type of value stored in b
  15225. * \return value stored in unsigned long type
  15226. */
  15227. __STATIC_FORCEINLINE unsigned long __RV_KSLRA32_U(unsigned long a, int b)
  15228. {
  15229. unsigned long result;
  15230. __ASM volatile("kslra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15231. return result;
  15232. }
  15233. /* ===== Inline Function End for 4.18.2. KSLRA32.u ===== */
  15234. /* ===== Inline Function Start for 4.19. KSTAS32 ===== */
  15235. /**
  15236. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15237. * \brief KSTAS32 (SIMD 32-bit Signed Saturating Straight Addition & Subtraction)
  15238. * \details
  15239. * **Type**: SIMD (RV64 Only)
  15240. *
  15241. * **Syntax**:\n
  15242. * ~~~
  15243. * KSTAS32 Rd, Rs1, Rs2
  15244. * ~~~
  15245. *
  15246. * **Purpose**:\n
  15247. * Do 32-bit signed integer element saturating addition and 32-bit signed integer element
  15248. * saturating subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit
  15249. * elements.
  15250. *
  15251. * **Description**:\n
  15252. * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
  15253. * integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit integer element in [31:0] of
  15254. * Rs2 from the 32-bit integer element in [31:0] of Rs1. If any of the results are beyond the Q31 number
  15255. * range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated
  15256. * results are written to [63:32] of Rd for addition and [31:0] of Rd for subtraction.
  15257. *
  15258. * **Operations**:\n
  15259. * ~~~
  15260. * res[1] = Rs1.W[1] + Rs2.W[1];
  15261. * res[0] = Rs1.W[0] - Rs2.W[0];
  15262. * if (res[x] > (2^31)-1) {
  15263. * res[x] = (2^31)-1;
  15264. * OV = 1;
  15265. * } else if (res < -2^31) {
  15266. * res[x] = -2^31;
  15267. * OV = 1;
  15268. * }
  15269. * Rd.W[1] = res[1];
  15270. * Rd.W[0] = res[0];
  15271. * for RV64, x=1...0
  15272. * ~~~
  15273. *
  15274. * \param [in] a unsigned long type of value stored in a
  15275. * \param [in] b unsigned long type of value stored in b
  15276. * \return value stored in unsigned long type
  15277. */
  15278. __STATIC_FORCEINLINE unsigned long __RV_KSTAS32(unsigned long a, unsigned long b)
  15279. {
  15280. unsigned long result;
  15281. __ASM volatile("kstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15282. return result;
  15283. }
  15284. /* ===== Inline Function End for 4.19. KSTAS32 ===== */
  15285. /* ===== Inline Function Start for 4.20. KSTSA32 ===== */
  15286. /**
  15287. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15288. * \brief KSTSA32 (SIMD 32-bit Signed Saturating Straight Subtraction & Addition)
  15289. * \details
  15290. * **Type**: SIM (RV64 Only)
  15291. *
  15292. * **Syntax**:\n
  15293. * ~~~
  15294. * KSTSA32 Rd, Rs1, Rs2
  15295. * ~~~
  15296. *
  15297. * **Purpose**:\n
  15298. * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element
  15299. * saturating addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit
  15300. * elements.
  15301. * *Description: *
  15302. * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer
  15303. * element in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with
  15304. * the 32-bit integer element in [31:0] of Rs2. If any of the results are beyond the Q31 number range (
  15305. * -2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are
  15306. * written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
  15307. *
  15308. * **Operations**:\n
  15309. * ~~~
  15310. * res[1] = Rs1.W[1] - Rs2.W[1];
  15311. * res[0] = Rs1.W[0] + Rs2.W[0];
  15312. * if (res[x] > (2^31)-1) {
  15313. * res[x] = (2^31)-1;
  15314. * OV = 1;
  15315. * } else if (res < -2^31) {
  15316. * res[x] = -2^31;
  15317. * OV = 1;
  15318. * }
  15319. * Rd.W[1] = res[1];
  15320. * Rd.W[0] = res[0];
  15321. * for RV64, x=1...0
  15322. * ~~~
  15323. *
  15324. * \param [in] a unsigned long type of value stored in a
  15325. * \param [in] b unsigned long type of value stored in b
  15326. * \return value stored in unsigned long type
  15327. */
  15328. __STATIC_FORCEINLINE unsigned long __RV_KSTSA32(unsigned long a, unsigned long b)
  15329. {
  15330. unsigned long result;
  15331. __ASM volatile("kstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15332. return result;
  15333. }
  15334. /* ===== Inline Function End for 4.20. KSTSA32 ===== */
  15335. /* ===== Inline Function Start for 4.21. KSUB32 ===== */
  15336. /**
  15337. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15338. * \brief KSUB32 (SIMD 32-bit Signed Saturating Subtraction)
  15339. * \details
  15340. * **Type**: SIMD (RV64 Only)
  15341. *
  15342. * **Syntax**:\n
  15343. * ~~~
  15344. * KSUB32 Rd, Rs1, Rs2
  15345. * ~~~
  15346. *
  15347. * **Purpose**:\n
  15348. * Do 32-bit signed integer elements saturating subtractions simultaneously.
  15349. *
  15350. * **Description**:\n
  15351. * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit
  15352. * signed integer elements in Rs1. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <=
  15353. * 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
  15354. * Rd.
  15355. *
  15356. * **Operations**:\n
  15357. * ~~~
  15358. * res[x] = Rs1.W[x] - Rs2.W[x];
  15359. * if (res[x] > (2^31)-1) {
  15360. * res[x] = (2^31)-1;
  15361. * OV = 1;
  15362. * } else if (res[x] < -2^31) {
  15363. * res[x] = -2^31;
  15364. * OV = 1;
  15365. * }
  15366. * Rd.W[x] = res[x];
  15367. * for RV64: x=1...0
  15368. * ~~~
  15369. *
  15370. * \param [in] a unsigned long type of value stored in a
  15371. * \param [in] b unsigned long type of value stored in b
  15372. * \return value stored in unsigned long type
  15373. */
  15374. __STATIC_FORCEINLINE unsigned long __RV_KSUB32(unsigned long a, unsigned long b)
  15375. {
  15376. unsigned long result;
  15377. __ASM volatile("ksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15378. return result;
  15379. }
  15380. /* ===== Inline Function End for 4.21. KSUB32 ===== */
  15381. /* ===== Inline Function Start for 4.22.1. PKBB32 ===== */
  15382. /**
  15383. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
  15384. * \brief PKBB32 (Pack Two 32-bit Data from Both Bottom Half)
  15385. * \details
  15386. * **Type**: DSP (RV64 Only)
  15387. *
  15388. * **Syntax**:\n
  15389. * ~~~
  15390. * PKBB32 Rd, Rs1, Rs2
  15391. * PKBT32 Rd, Rs1, Rs2
  15392. * PKTT32 Rd, Rs1, Rs2
  15393. * PKTB32 Rd, Rs1, Rs2
  15394. * ~~~
  15395. *
  15396. * **Purpose**:\n
  15397. * Pack 32-bit data from 64-bit chunks in two registers.
  15398. * * PKBB32: bottom.bottom
  15399. * * PKBT32: bottom.top
  15400. * * PKTT32: top.top
  15401. * * PKTB32: top.bottom
  15402. *
  15403. * **Description**:\n
  15404. * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15405. * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15406. * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15407. * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15408. *
  15409. * **Operations**:\n
  15410. * ~~~
  15411. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
  15412. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
  15413. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
  15414. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
  15415. * ~~~
  15416. *
  15417. * \param [in] a unsigned long type of value stored in a
  15418. * \param [in] b unsigned long type of value stored in b
  15419. * \return value stored in unsigned long type
  15420. */
  15421. __STATIC_FORCEINLINE unsigned long __RV_PKBB32(unsigned long a, unsigned long b)
  15422. {
  15423. unsigned long result;
  15424. __ASM volatile("pkbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15425. return result;
  15426. }
  15427. /* ===== Inline Function End for 4.22.1. PKBB32 ===== */
  15428. /* ===== Inline Function Start for 4.22.2. PKBT32 ===== */
  15429. /**
  15430. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
  15431. * \brief PKBT32 (Pack Two 32-bit Data from Bottom and Top Half)
  15432. * \details
  15433. * **Type**: DSP (RV64 Only)
  15434. *
  15435. * **Syntax**:\n
  15436. * ~~~
  15437. * PKBB32 Rd, Rs1, Rs2
  15438. * PKBT32 Rd, Rs1, Rs2
  15439. * PKTT32 Rd, Rs1, Rs2
  15440. * PKTB32 Rd, Rs1, Rs2
  15441. * ~~~
  15442. *
  15443. * **Purpose**:\n
  15444. * Pack 32-bit data from 64-bit chunks in two registers.
  15445. * * PKBB32: bottom.bottom
  15446. * * PKBT32: bottom.top
  15447. * * PKTT32: top.top
  15448. * * PKTB32: top.bottom
  15449. *
  15450. * **Description**:\n
  15451. * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15452. * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15453. * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15454. * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15455. *
  15456. * **Operations**:\n
  15457. * ~~~
  15458. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
  15459. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
  15460. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
  15461. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
  15462. * ~~~
  15463. *
  15464. * \param [in] a unsigned long type of value stored in a
  15465. * \param [in] b unsigned long type of value stored in b
  15466. * \return value stored in unsigned long type
  15467. */
  15468. __STATIC_FORCEINLINE unsigned long __RV_PKBT32(unsigned long a, unsigned long b)
  15469. {
  15470. unsigned long result;
  15471. __ASM volatile("pkbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15472. return result;
  15473. }
  15474. /* ===== Inline Function End for 4.22.2. PKBT32 ===== */
  15475. /* ===== Inline Function Start for 4.22.3. PKTT32 ===== */
  15476. /**
  15477. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
  15478. * \brief PKTT32 (Pack Two 32-bit Data from Both Top Half)
  15479. * \details
  15480. * **Type**: DSP (RV64 Only)
  15481. *
  15482. * **Syntax**:\n
  15483. * ~~~
  15484. * PKBB32 Rd, Rs1, Rs2
  15485. * PKBT32 Rd, Rs1, Rs2
  15486. * PKTT32 Rd, Rs1, Rs2
  15487. * PKTB32 Rd, Rs1, Rs2
  15488. * ~~~
  15489. *
  15490. * **Purpose**:\n
  15491. * Pack 32-bit data from 64-bit chunks in two registers.
  15492. * * PKBB32: bottom.bottom
  15493. * * PKBT32: bottom.top
  15494. * * PKTT32: top.top
  15495. * * PKTB32: top.bottom
  15496. *
  15497. * **Description**:\n
  15498. * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15499. * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15500. * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15501. * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15502. *
  15503. * **Operations**:\n
  15504. * ~~~
  15505. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
  15506. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
  15507. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
  15508. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
  15509. * ~~~
  15510. *
  15511. * \param [in] a unsigned long type of value stored in a
  15512. * \param [in] b unsigned long type of value stored in b
  15513. * \return value stored in unsigned long type
  15514. */
  15515. __STATIC_FORCEINLINE unsigned long __RV_PKTT32(unsigned long a, unsigned long b)
  15516. {
  15517. unsigned long result;
  15518. __ASM volatile("pktt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15519. return result;
  15520. }
  15521. /* ===== Inline Function End for 4.22.3. PKTT32 ===== */
  15522. /* ===== Inline Function Start for 4.22.4. PKTB32 ===== */
  15523. /**
  15524. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PACK
  15525. * \brief PKTB32 (Pack Two 32-bit Data from Top and Bottom Half)
  15526. * \details
  15527. * **Type**: DSP (RV64 Only)
  15528. *
  15529. * **Syntax**:\n
  15530. * ~~~
  15531. * PKBB32 Rd, Rs1, Rs2
  15532. * PKBT32 Rd, Rs1, Rs2
  15533. * PKTT32 Rd, Rs1, Rs2
  15534. * PKTB32 Rd, Rs1, Rs2
  15535. * ~~~
  15536. *
  15537. * **Purpose**:\n
  15538. * Pack 32-bit data from 64-bit chunks in two registers.
  15539. * * PKBB32: bottom.bottom
  15540. * * PKBT32: bottom.top
  15541. * * PKTT32: top.top
  15542. * * PKTB32: top.bottom
  15543. *
  15544. * **Description**:\n
  15545. * (PKBB32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15546. * (PKBT32) moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15547. * (PKTT32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  15548. * (PKTB32) moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  15549. *
  15550. * **Operations**:\n
  15551. * ~~~
  15552. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*0*_]); // PKBB32
  15553. * Rd = CONCAT(Rs1.W[_*0*_], Rs2.W[_*1*_]); // PKBT32
  15554. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*1*_]); // PKTT32
  15555. * Rd = CONCAT(Rs1.W[_*1*_], Rs2.W[_*0*_]); // PKTB32
  15556. * ~~~
  15557. *
  15558. * \param [in] a unsigned long type of value stored in a
  15559. * \param [in] b unsigned long type of value stored in b
  15560. * \return value stored in unsigned long type
  15561. */
  15562. __STATIC_FORCEINLINE unsigned long __RV_PKTB32(unsigned long a, unsigned long b)
  15563. {
  15564. unsigned long result;
  15565. __ASM volatile("pktb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15566. return result;
  15567. }
  15568. /* ===== Inline Function End for 4.22.4. PKTB32 ===== */
  15569. /* ===== Inline Function Start for 4.23. RADD32 ===== */
  15570. /**
  15571. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15572. * \brief RADD32 (SIMD 32-bit Signed Halving Addition)
  15573. * \details
  15574. * **Type**: SIMD (RV64 Only)
  15575. *
  15576. * **Syntax**:\n
  15577. * ~~~
  15578. * RADD32 Rd, Rs1, Rs2
  15579. * ~~~
  15580. *
  15581. * **Purpose**:\n
  15582. * Do 32-bit signed integer element additions simultaneously. The results are halved to avoid
  15583. * overflow or saturation.
  15584. *
  15585. * **Description**:\n
  15586. * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed
  15587. * integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to
  15588. * Rd.
  15589. *
  15590. * **Examples**:\n
  15591. * ~~~
  15592. * * Rs1 = 0x7FFFFFFF, Rs2 = 0x7FFFFFFF Rd = 0x7FFFFFFF
  15593. * * Rs1 = 0x80000000, Rs2 = 0x80000000 Rd = 0x80000000
  15594. * * Rs1 = 0x40000000, Rs2 = 0x80000000 Rd = 0xE0000000
  15595. * ~~~
  15596. *
  15597. * **Operations**:\n
  15598. * ~~~
  15599. * Rd.W[x] = (Rs1.W[x] + Rs2.W[x]) s>> 1;
  15600. * for RV64: x=1...0
  15601. * ~~~
  15602. *
  15603. * \param [in] a unsigned long type of value stored in a
  15604. * \param [in] b unsigned long type of value stored in b
  15605. * \return value stored in unsigned long type
  15606. */
  15607. __STATIC_FORCEINLINE unsigned long __RV_RADD32(unsigned long a, unsigned long b)
  15608. {
  15609. unsigned long result;
  15610. __ASM volatile("radd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15611. return result;
  15612. }
  15613. /* ===== Inline Function End for 4.23. RADD32 ===== */
  15614. /* ===== Inline Function Start for 4.24. RCRAS32 ===== */
  15615. /**
  15616. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15617. * \brief RCRAS32 (SIMD 32-bit Signed Halving Cross Addition & Subtraction)
  15618. * \details
  15619. * **Type**: SIMD (RV64 Only)
  15620. *
  15621. * **Syntax**:\n
  15622. * ~~~
  15623. * RCRAS32 Rd, Rs1, Rs2
  15624. * ~~~
  15625. *
  15626. * **Purpose**:\n
  15627. * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in
  15628. * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to
  15629. * avoid overflow or saturation.
  15630. *
  15631. * **Description**:\n
  15632. * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit
  15633. * signed integer element in [31:0] of Rs2, and subtracts the 32-bit signed integer element in [63:32] of
  15634. * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first
  15635. * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd
  15636. * for subtraction.
  15637. *
  15638. * **Examples**:\n
  15639. * ~~~
  15640. * Please see `RADD32` and `RSUB32` instructions.
  15641. * ~~~
  15642. *
  15643. * **Operations**:\n
  15644. * ~~~
  15645. * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) s>> 1;
  15646. * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) s>> 1;
  15647. * ~~~
  15648. *
  15649. * \param [in] a unsigned long type of value stored in a
  15650. * \param [in] b unsigned long type of value stored in b
  15651. * \return value stored in unsigned long type
  15652. */
  15653. __STATIC_FORCEINLINE unsigned long __RV_RCRAS32(unsigned long a, unsigned long b)
  15654. {
  15655. unsigned long result;
  15656. __ASM volatile("rcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15657. return result;
  15658. }
  15659. /* ===== Inline Function End for 4.24. RCRAS32 ===== */
  15660. /* ===== Inline Function Start for 4.25. RCRSA32 ===== */
  15661. /**
  15662. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15663. * \brief RCRSA32 (SIMD 32-bit Signed Halving Cross Subtraction & Addition)
  15664. * \details
  15665. * **Type**: SIMD (RV64 Only)
  15666. *
  15667. * **Syntax**:\n
  15668. * ~~~
  15669. * RCRSA32 Rd, Rs1, Rs2
  15670. * ~~~
  15671. *
  15672. * **Purpose**:\n
  15673. * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in
  15674. * a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to
  15675. * avoid overflow or saturation.
  15676. *
  15677. * **Description**:\n
  15678. * This instruction subtracts the 32-bit signed integer element in [31:0] of Rs2 from the
  15679. * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0]
  15680. * of Rs1 with the 32-bit signed integer element in [63:32] of Rs2. The two results are first
  15681. * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of
  15682. * Rd for addition.
  15683. *
  15684. * **Examples**:\n
  15685. * ~~~
  15686. * Please see `RADD32` and `RSUB32` instructions.
  15687. * ~~~
  15688. *
  15689. * **Operations**:\n
  15690. * ~~~
  15691. * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) s>> 1;
  15692. * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) s>> 1;
  15693. * ~~~
  15694. *
  15695. * \param [in] a unsigned long type of value stored in a
  15696. * \param [in] b unsigned long type of value stored in b
  15697. * \return value stored in unsigned long type
  15698. */
  15699. __STATIC_FORCEINLINE unsigned long __RV_RCRSA32(unsigned long a, unsigned long b)
  15700. {
  15701. unsigned long result;
  15702. __ASM volatile("rcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15703. return result;
  15704. }
  15705. /* ===== Inline Function End for 4.25. RCRSA32 ===== */
  15706. /* ===== Inline Function Start for 4.26. RSTAS32 ===== */
  15707. /**
  15708. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15709. * \brief RSTAS32 (SIMD 32-bit Signed Halving Straight Addition & Subtraction)
  15710. * \details
  15711. * **Type**: SIMD (RV64 Only)
  15712. *
  15713. * **Syntax**:\n
  15714. * ~~~
  15715. * RSTAS32 Rd, Rs1, Rs2
  15716. * ~~~
  15717. *
  15718. * **Purpose**:\n
  15719. * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in
  15720. * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are
  15721. * halved to avoid overflow or saturation.
  15722. *
  15723. * **Description**:\n
  15724. * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit
  15725. * signed integer element in [63:32] of Rs2, and subtracts the 32-bit signed integer element in [31:0] of
  15726. * Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first
  15727. * arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd
  15728. * for subtraction.
  15729. *
  15730. * **Examples**:\n
  15731. * ~~~
  15732. * Please see `RADD32` and `RSUB32` instructions.
  15733. * ~~~
  15734. *
  15735. * **Operations**:\n
  15736. * ~~~
  15737. * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) s>> 1;
  15738. * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) s>> 1;
  15739. * ~~~
  15740. *
  15741. * \param [in] a unsigned long type of value stored in a
  15742. * \param [in] b unsigned long type of value stored in b
  15743. * \return value stored in unsigned long type
  15744. */
  15745. __STATIC_FORCEINLINE unsigned long __RV_RSTAS32(unsigned long a, unsigned long b)
  15746. {
  15747. unsigned long result;
  15748. __ASM volatile("rstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15749. return result;
  15750. }
  15751. /* ===== Inline Function End for 4.26. RSTAS32 ===== */
  15752. /* ===== Inline Function Start for 4.27. RSTSA32 ===== */
  15753. /**
  15754. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15755. * \brief RSTSA32 (SIMD 32-bit Signed Halving Straight Subtraction & Addition)
  15756. * \details
  15757. * **Type**: SIMD (RV64 Only)
  15758. *
  15759. * **Syntax**:\n
  15760. * ~~~
  15761. * RSTSA32 Rd, Rs1, Rs2
  15762. * ~~~
  15763. *
  15764. * **Purpose**:\n
  15765. * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in
  15766. * a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The results are
  15767. * halved to avoid overflow or saturation.
  15768. *
  15769. * **Description**:\n
  15770. * This instruction subtracts the 32-bit signed integer element in [63:32] of Rs2 from the
  15771. * 32-bit signed integer element in [63:32] of Rs1, and adds the 32-bit signed element integer in [31:0]
  15772. * of Rs1 with the 32-bit signed integer element in [31:0] of Rs2. The two results are first arithmetically
  15773. * right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
  15774. *
  15775. * **Examples**:\n
  15776. * ~~~
  15777. * Please see `RADD32` and `RSUB32` instructions.
  15778. * ~~~
  15779. *
  15780. * **Operations**:\n
  15781. * ~~~
  15782. * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) s>> 1;
  15783. * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) s>> 1;
  15784. * ~~~
  15785. *
  15786. * \param [in] a unsigned long type of value stored in a
  15787. * \param [in] b unsigned long type of value stored in b
  15788. * \return value stored in unsigned long type
  15789. */
  15790. __STATIC_FORCEINLINE unsigned long __RV_RSTSA32(unsigned long a, unsigned long b)
  15791. {
  15792. unsigned long result;
  15793. __ASM volatile("rstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15794. return result;
  15795. }
  15796. /* ===== Inline Function End for 4.27. RSTSA32 ===== */
  15797. /* ===== Inline Function Start for 4.28. RSUB32 ===== */
  15798. /**
  15799. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  15800. * \brief RSUB32 (SIMD 32-bit Signed Halving Subtraction)
  15801. * \details
  15802. * **Type**: SIMD (RV64 Only)
  15803. *
  15804. * **Syntax**:\n
  15805. * ~~~
  15806. * RSUB32 Rd, Rs1, Rs2
  15807. * ~~~
  15808. *
  15809. * **Purpose**:\n
  15810. * Do 32-bit signed integer element subtractions simultaneously. The results are halved to
  15811. * avoid overflow or saturation.
  15812. *
  15813. * **Description**:\n
  15814. * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit
  15815. * signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then
  15816. * written to Rd.
  15817. *
  15818. * **Examples**:\n
  15819. * ~~~
  15820. * * Ra = 0x7FFFFFFF, Rb = 0x80000000 Rt = 0x7FFFFFFF
  15821. * * Ra = 0x80000000, Rb = 0x7FFFFFFF Rt = 0x80000000
  15822. * * Ra = 0x80000000, Rb = 0x40000000 Rt = 0xA0000000
  15823. * ~~~
  15824. *
  15825. * **Operations**:\n
  15826. * ~~~
  15827. * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) s>> 1;
  15828. * for RV64: x=1...0
  15829. * ~~~
  15830. *
  15831. * \param [in] a unsigned long type of value stored in a
  15832. * \param [in] b unsigned long type of value stored in b
  15833. * \return value stored in unsigned long type
  15834. */
  15835. __STATIC_FORCEINLINE unsigned long __RV_RSUB32(unsigned long a, unsigned long b)
  15836. {
  15837. unsigned long result;
  15838. __ASM volatile("rsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15839. return result;
  15840. }
  15841. /* ===== Inline Function End for 4.28. RSUB32 ===== */
  15842. /* ===== Inline Function Start for 4.29. SLL32 ===== */
  15843. /**
  15844. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  15845. * \brief SLL32 (SIMD 32-bit Shift Left Logical)
  15846. * \details
  15847. * **Type**: SIMD (RV64 Only)
  15848. *
  15849. * **Syntax**:\n
  15850. * ~~~
  15851. * SLL32 Rd, Rs1, Rs2
  15852. * ~~~
  15853. *
  15854. * **Purpose**:\n
  15855. * Do 32-bit elements logical left shift operations simultaneously. The shift amount is a
  15856. * variable from a GPR.
  15857. *
  15858. * **Description**:\n
  15859. * The 32-bit elements in Rs1 are left-shifted logically. And the results are written to Rd.
  15860. * The shifted out bits are filled with zero and the shift amount is specified by the low-order 5-bits of
  15861. * the value in the Rs2 register.
  15862. *
  15863. * **Operations**:\n
  15864. * ~~~
  15865. * sa = Rs2[4:0];
  15866. * Rd.W[x] = Rs1.W[x] << sa;
  15867. * for RV64: x=1...0
  15868. * ~~~
  15869. *
  15870. * \param [in] a unsigned long type of value stored in a
  15871. * \param [in] b unsigned int type of value stored in b
  15872. * \return value stored in unsigned long type
  15873. */
  15874. __STATIC_FORCEINLINE unsigned long __RV_SLL32(unsigned long a, unsigned int b)
  15875. {
  15876. unsigned long result;
  15877. __ASM volatile("sll32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15878. return result;
  15879. }
  15880. /* ===== Inline Function End for 4.29. SLL32 ===== */
  15881. /* ===== Inline Function Start for 4.30. SLLI32 ===== */
  15882. /**
  15883. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  15884. * \brief SLLI32 (SIMD 32-bit Shift Left Logical Immediate)
  15885. * \details
  15886. * **Type**: SIMD (RV64 Only)
  15887. *
  15888. * **Syntax**:\n
  15889. * ~~~
  15890. * SLLI32 Rd, Rs1, imm5u[4:0]
  15891. * ~~~
  15892. *
  15893. * **Purpose**:\n
  15894. * Do 32-bit element logical left shift operations simultaneously. The shift amount is an
  15895. * immediate value.
  15896. *
  15897. * **Description**:\n
  15898. * The 32-bit elements in Rs1 are left-shifted logically. The shifted out bits are filled with
  15899. * zero and the shift amount is specified by the imm5u[4:0] constant. And the results are written to Rd.
  15900. *
  15901. * **Operations**:\n
  15902. * ~~~
  15903. * sa = imm5u[4:0];
  15904. * Rd.W[x] = Rs1.W[x] << sa;
  15905. * for RV64: x=1...0
  15906. * ~~~
  15907. *
  15908. * \param [in] a unsigned long type of value stored in a
  15909. * \param [in] b unsigned int type of value stored in b
  15910. * \return value stored in unsigned long type
  15911. */
  15912. #define __RV_SLLI32(a, b) \
  15913. ({ \
  15914. unsigned long result; \
  15915. unsigned long __a = (unsigned long)(a); \
  15916. __ASM volatile("slli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  15917. result; \
  15918. })
  15919. /* ===== Inline Function End for 4.30. SLLI32 ===== */
  15920. /* ===== Inline Function Start for 4.31. SMAX32 ===== */
  15921. /**
  15922. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
  15923. * \brief SMAX32 (SIMD 32-bit Signed Maximum)
  15924. * \details
  15925. * **Type**: SIMD (RV64 Only)
  15926. *
  15927. * **Syntax**:\n
  15928. * ~~~
  15929. * SMAX32 Rd, Rs1, Rs2
  15930. * ~~~
  15931. *
  15932. * **Purpose**:\n
  15933. * Do 32-bit signed integer elements finding maximum operations simultaneously.
  15934. *
  15935. * **Description**:\n
  15936. * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit
  15937. * signed integer elements in Rs2 and selects the numbers that is greater than the other one. The
  15938. * selected results are written to Rd.
  15939. *
  15940. * **Operations**:\n
  15941. * ~~~
  15942. * Rd.W[x] = (Rs1.W[x] > Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
  15943. * for RV64: x=1...0
  15944. * ~~~
  15945. *
  15946. * \param [in] a unsigned long type of value stored in a
  15947. * \param [in] b unsigned long type of value stored in b
  15948. * \return value stored in unsigned long type
  15949. */
  15950. __STATIC_FORCEINLINE unsigned long __RV_SMAX32(unsigned long a, unsigned long b)
  15951. {
  15952. unsigned long result;
  15953. __ASM volatile("smax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  15954. return result;
  15955. }
  15956. /* ===== Inline Function End for 4.31. SMAX32 ===== */
  15957. /* ===== Inline Function Start for 4.32.1. SMBB32 ===== */
  15958. /**
  15959. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
  15960. * \brief SMBB32 (Signed Multiply Bottom Word & Bottom Word)
  15961. * \details
  15962. * **Type**: DSP (RV64 Only)
  15963. *
  15964. * **Syntax**:\n
  15965. * ~~~
  15966. * SMBB32 Rd, Rs1, Rs2
  15967. * SMBT32 Rd, Rs1, Rs2
  15968. * SMTT32 Rd, Rs1, Rs2
  15969. * ~~~
  15970. *
  15971. * **Purpose**:\n
  15972. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
  15973. * register and write the 64-bit result to a third register.
  15974. * * SMBB32: bottom*bottom
  15975. * * SMBT32: bottom*top
  15976. * * SMTT32: top*top
  15977. *
  15978. * **Description**:\n
  15979. * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  15980. * element of Rs2. It is actually an alias of `MULSR64` instruction.
  15981. * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  15982. * element of Rs2.
  15983. * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
  15984. * of Rs2.
  15985. * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
  15986. * signed integers.
  15987. *
  15988. * **Operations**:\n
  15989. * ~~~
  15990. * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
  15991. * // SMTT32 Rd = res;
  15992. * ~~~
  15993. *
  15994. * \param [in] a unsigned long type of value stored in a
  15995. * \param [in] b unsigned long type of value stored in b
  15996. * \return value stored in long type
  15997. */
  15998. __STATIC_FORCEINLINE long __RV_SMBB32(unsigned long a, unsigned long b)
  15999. {
  16000. long result;
  16001. __ASM volatile("smbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16002. return result;
  16003. }
  16004. /* ===== Inline Function End for 4.32.1. SMBB32 ===== */
  16005. /* ===== Inline Function Start for 4.32.2. SMBT32 ===== */
  16006. /**
  16007. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
  16008. * \brief SMBT32 (Signed Multiply Bottom Word & Top Word)
  16009. * \details
  16010. * **Type**: DSP (RV64 Only)
  16011. *
  16012. * **Syntax**:\n
  16013. * ~~~
  16014. * SMBB32 Rd, Rs1, Rs2
  16015. * SMBT32 Rd, Rs1, Rs2
  16016. * SMTT32 Rd, Rs1, Rs2
  16017. * ~~~
  16018. *
  16019. * **Purpose**:\n
  16020. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
  16021. * register and write the 64-bit result to a third register.
  16022. * * SMBB32: bottom*bottom
  16023. * * SMBT32: bottom*top
  16024. * * SMTT32: top*top
  16025. *
  16026. * **Description**:\n
  16027. * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  16028. * element of Rs2. It is actually an alias of `MULSR64` instruction.
  16029. * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  16030. * element of Rs2.
  16031. * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
  16032. * of Rs2.
  16033. * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
  16034. * signed integers.
  16035. *
  16036. * **Operations**:\n
  16037. * ~~~
  16038. * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
  16039. * // SMTT32 Rd = res;
  16040. * ~~~
  16041. *
  16042. * \param [in] a unsigned long type of value stored in a
  16043. * \param [in] b unsigned long type of value stored in b
  16044. * \return value stored in long type
  16045. */
  16046. __STATIC_FORCEINLINE long __RV_SMBT32(unsigned long a, unsigned long b)
  16047. {
  16048. long result;
  16049. __ASM volatile("smbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16050. return result;
  16051. }
  16052. /* ===== Inline Function End for 4.32.2. SMBT32 ===== */
  16053. /* ===== Inline Function Start for 4.32.3. SMTT32 ===== */
  16054. /**
  16055. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_MULT
  16056. * \brief SMTT32 (Signed Multiply Top Word & Top Word)
  16057. * \details
  16058. * **Type**: DSP (RV64 Only)
  16059. *
  16060. * **Syntax**:\n
  16061. * ~~~
  16062. * SMBB32 Rd, Rs1, Rs2
  16063. * SMBT32 Rd, Rs1, Rs2
  16064. * SMTT32 Rd, Rs1, Rs2
  16065. * ~~~
  16066. *
  16067. * **Purpose**:\n
  16068. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another
  16069. * register and write the 64-bit result to a third register.
  16070. * * SMBB32: bottom*bottom
  16071. * * SMBT32: bottom*top
  16072. * * SMTT32: top*top
  16073. *
  16074. * **Description**:\n
  16075. * For the `SMBB32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  16076. * element of Rs2. It is actually an alias of `MULSR64` instruction.
  16077. * For the `SMBT32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  16078. * element of Rs2.
  16079. * For the `SMTT32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit element
  16080. * of Rs2.
  16081. * The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as
  16082. * signed integers.
  16083. *
  16084. * **Operations**:\n
  16085. * ~~~
  16086. * res = Rs1.W[0] * Rs2.W[0]; // SMBB32 res = Rs1.W[0] * Rs2.w[1]; // SMBT32 res = Rs1.W[1] * Rs2.W[1];
  16087. * // SMTT32 Rd = res;
  16088. * ~~~
  16089. *
  16090. * \param [in] a unsigned long type of value stored in a
  16091. * \param [in] b unsigned long type of value stored in b
  16092. * \return value stored in long type
  16093. */
  16094. __STATIC_FORCEINLINE long __RV_SMTT32(unsigned long a, unsigned long b)
  16095. {
  16096. long result;
  16097. __ASM volatile("smtt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16098. return result;
  16099. }
  16100. /* ===== Inline Function End for 4.32.3. SMTT32 ===== */
  16101. /* ===== Inline Function Start for 4.33.1. SMDS32 ===== */
  16102. /**
  16103. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  16104. * \brief SMDS32 (Signed Multiply Two Words and Subtract)
  16105. * \details
  16106. * **Type**: DSP (RV64 Only)
  16107. *
  16108. * **Syntax**:\n
  16109. * ~~~
  16110. * SMDS32 Rd, Rs1, Rs2
  16111. * SMDRS32 Rd, Rs1, Rs2
  16112. * SMXDS32 Rd, Rs1, Rs2
  16113. * ~~~
  16114. *
  16115. * **Purpose**:\n
  16116. * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
  16117. * perform a subtraction operation between the two 64-bit results.
  16118. * * SMDS32: top*top - bottom*bottom
  16119. * * SMDRS32: bottom*bottom - top*top
  16120. * * SMXDS32: top*bottom - bottom*top
  16121. *
  16122. * **Description**:\n
  16123. * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  16124. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  16125. * Rs1 with the top 32-bit element of Rs2.
  16126. * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
  16127. * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  16128. * element of Rs1 with the bottom 32-bit element of Rs2.
  16129. * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  16130. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  16131. * Rs1 with the bottom 32-bit element of Rs2.
  16132. * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
  16133. * integers.
  16134. *
  16135. * **Operations**:\n
  16136. * ~~~
  16137. * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
  16138. * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
  16139. * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
  16140. * ~~~
  16141. *
  16142. * \param [in] a unsigned long type of value stored in a
  16143. * \param [in] b unsigned long type of value stored in b
  16144. * \return value stored in long type
  16145. */
  16146. __STATIC_FORCEINLINE long __RV_SMDS32(unsigned long a, unsigned long b)
  16147. {
  16148. long result;
  16149. __ASM volatile("smds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16150. return result;
  16151. }
  16152. /* ===== Inline Function End for 4.33.1. SMDS32 ===== */
  16153. /* ===== Inline Function Start for 4.33.2. SMDRS32 ===== */
  16154. /**
  16155. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  16156. * \brief SMDRS32 (Signed Multiply Two Words and Reverse Subtract)
  16157. * \details
  16158. * **Type**: DSP (RV64 Only)
  16159. *
  16160. * **Syntax**:\n
  16161. * ~~~
  16162. * SMDS32 Rd, Rs1, Rs2
  16163. * SMDRS32 Rd, Rs1, Rs2
  16164. * SMXDS32 Rd, Rs1, Rs2
  16165. * ~~~
  16166. *
  16167. * **Purpose**:\n
  16168. * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
  16169. * perform a subtraction operation between the two 64-bit results.
  16170. * * SMDS32: top*top - bottom*bottom
  16171. * * SMDRS32: bottom*bottom - top*top
  16172. * * SMXDS32: top*bottom - bottom*top
  16173. *
  16174. * **Description**:\n
  16175. * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  16176. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  16177. * Rs1 with the top 32-bit element of Rs2.
  16178. * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
  16179. * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  16180. * element of Rs1 with the bottom 32-bit element of Rs2.
  16181. * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  16182. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  16183. * Rs1 with the bottom 32-bit element of Rs2.
  16184. * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
  16185. * integers.
  16186. *
  16187. * **Operations**:\n
  16188. * ~~~
  16189. * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
  16190. * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
  16191. * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
  16192. * ~~~
  16193. *
  16194. * \param [in] a unsigned long type of value stored in a
  16195. * \param [in] b unsigned long type of value stored in b
  16196. * \return value stored in long type
  16197. */
  16198. __STATIC_FORCEINLINE long __RV_SMDRS32(unsigned long a, unsigned long b)
  16199. {
  16200. long result;
  16201. __ASM volatile("smdrs32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16202. return result;
  16203. }
  16204. /* ===== Inline Function End for 4.33.2. SMDRS32 ===== */
  16205. /* ===== Inline Function Start for 4.33.3. SMXDS32 ===== */
  16206. /**
  16207. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_32B_PARALLEL_MAC
  16208. * \brief SMXDS32 (Signed Crossed Multiply Two Words and Subtract)
  16209. * \details
  16210. * **Type**: DSP (RV64 Only)
  16211. *
  16212. * **Syntax**:\n
  16213. * ~~~
  16214. * SMDS32 Rd, Rs1, Rs2
  16215. * SMDRS32 Rd, Rs1, Rs2
  16216. * SMXDS32 Rd, Rs1, Rs2
  16217. * ~~~
  16218. *
  16219. * **Purpose**:\n
  16220. * Do two signed 32-bit multiplications from the l 32-bit element of two registers; and then
  16221. * perform a subtraction operation between the two 64-bit results.
  16222. * * SMDS32: top*top - bottom*bottom
  16223. * * SMDRS32: bottom*bottom - top*top
  16224. * * SMXDS32: top*bottom - bottom*top
  16225. *
  16226. * **Description**:\n
  16227. * For the `SMDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  16228. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  16229. * Rs1 with the top 32-bit element of Rs2.
  16230. * For the `SMDRS32` instruction, it multiplies the top 32-bit element of Rs1 with the top 32-bit
  16231. * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  16232. * element of Rs1 with the bottom 32-bit element of Rs2.
  16233. * For the `SMXDS32` instruction, it multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  16234. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  16235. * Rs1 with the bottom 32-bit element of Rs2.
  16236. * The subtraction result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed
  16237. * integers.
  16238. *
  16239. * **Operations**:\n
  16240. * ~~~
  16241. * Rt = (Rs1.W[1] * Rs2.W[1]) - (Rs1.W[0] * Rs2.W[0]); // SMDS32
  16242. * Rt = (Rs1.W[0] * Rs2.W[0]) - (Rs1.W[1] * Rs2.W[1]); // SMDRS32
  16243. * Rt = (Rs1.W[1] * Rs2.W[0]) - (Rs1.W[0] * Rs2.W[1]); // SMXDS32
  16244. * ~~~
  16245. *
  16246. * \param [in] a unsigned long type of value stored in a
  16247. * \param [in] b unsigned long type of value stored in b
  16248. * \return value stored in long type
  16249. */
  16250. __STATIC_FORCEINLINE long __RV_SMXDS32(unsigned long a, unsigned long b)
  16251. {
  16252. long result;
  16253. __ASM volatile("smxds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16254. return result;
  16255. }
  16256. /* ===== Inline Function End for 4.33.3. SMXDS32 ===== */
  16257. /* ===== Inline Function Start for 4.34. SMIN32 ===== */
  16258. /**
  16259. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
  16260. * \brief SMIN32 (SIMD 32-bit Signed Minimum)
  16261. * \details
  16262. * **Type**: SIMD (RV64 Only)
  16263. *
  16264. * **Syntax**:\n
  16265. * ~~~
  16266. * SMIN32 Rd, Rs1, Rs2
  16267. * ~~~
  16268. *
  16269. * **Purpose**:\n
  16270. * Do 32-bit signed integer elements finding minimum operations simultaneously.
  16271. *
  16272. * **Description**:\n
  16273. * This instruction compares the 32-bit signed integer elements in Rs1 with the 32-bit
  16274. * signed integer elements in Rs2 and selects the numbers that is less than the other one. The selected
  16275. * results are written to Rd.
  16276. *
  16277. * **Operations**:\n
  16278. * ~~~
  16279. * Rd.W[x] = (Rs1.W[x] < Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
  16280. * for RV64: x=1...0
  16281. * ~~~
  16282. *
  16283. * \param [in] a unsigned long type of value stored in a
  16284. * \param [in] b unsigned long type of value stored in b
  16285. * \return value stored in unsigned long type
  16286. */
  16287. __STATIC_FORCEINLINE unsigned long __RV_SMIN32(unsigned long a, unsigned long b)
  16288. {
  16289. unsigned long result;
  16290. __ASM volatile("smin32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16291. return result;
  16292. }
  16293. /* ===== Inline Function End for 4.34. SMIN32 ===== */
  16294. /* ===== Inline Function Start for 4.35.1. SRA32 ===== */
  16295. /**
  16296. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16297. * \brief SRA32 (SIMD 32-bit Shift Right Arithmetic)
  16298. * \details
  16299. * **Type**: SIMD (RV64 Only)
  16300. *
  16301. * **Syntax**:\n
  16302. * ~~~
  16303. * SRA32 Rd, Rs1, Rs2
  16304. * SRA32.u Rd, Rs1, Rs2
  16305. * ~~~
  16306. *
  16307. * **Purpose**:\n
  16308. * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a
  16309. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  16310. * results.
  16311. *
  16312. * **Description**:\n
  16313. * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  16314. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
  16315. * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
  16316. * added to the most significant discarded bit of each 32-bit data element to calculate the final results.
  16317. * And the results are written to Rd.
  16318. *
  16319. * **Operations**:\n
  16320. * ~~~
  16321. * sa = Rs2[4:0];
  16322. * if (sa > 0) {
  16323. * if (`.u` form) { // SRA32.u
  16324. * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
  16325. * Rd.W[x] = res[31:0];
  16326. * else { // SRA32
  16327. * Rd.W[x] = SE32(Rs1.W[x][31:sa])
  16328. * }
  16329. * } else {
  16330. * Rd = Rs1;
  16331. * }
  16332. * for RV64: x=1...0
  16333. * ~~~
  16334. *
  16335. * \param [in] a unsigned long type of value stored in a
  16336. * \param [in] b unsigned int type of value stored in b
  16337. * \return value stored in unsigned long type
  16338. */
  16339. __STATIC_FORCEINLINE unsigned long __RV_SRA32(unsigned long a, unsigned int b)
  16340. {
  16341. unsigned long result;
  16342. __ASM volatile("sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16343. return result;
  16344. }
  16345. /* ===== Inline Function End for 4.35.1. SRA32 ===== */
  16346. /* ===== Inline Function Start for 4.35.2. SRA32.u ===== */
  16347. /**
  16348. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16349. * \brief SRA32.u (SIMD 32-bit Rounding Shift Right Arithmetic)
  16350. * \details
  16351. * **Type**: SIMD (RV64 Only)
  16352. *
  16353. * **Syntax**:\n
  16354. * ~~~
  16355. * SRA32 Rd, Rs1, Rs2
  16356. * SRA32.u Rd, Rs1, Rs2
  16357. * ~~~
  16358. *
  16359. * **Purpose**:\n
  16360. * Do 32-bit element arithmetic right shift operations simultaneously. The shift amount is a
  16361. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  16362. * results.
  16363. *
  16364. * **Description**:\n
  16365. * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  16366. * bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order
  16367. * 5-bits of the value in the Rs2 register. For the rounding operation of the `.u` form, a value of 1 is
  16368. * added to the most significant discarded bit of each 32-bit data element to calculate the final results.
  16369. * And the results are written to Rd.
  16370. *
  16371. * **Operations**:\n
  16372. * ~~~
  16373. * sa = Rs2[4:0];
  16374. * if (sa > 0) {
  16375. * if (`.u` form) { // SRA32.u
  16376. * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
  16377. * Rd.W[x] = res[31:0];
  16378. * else { // SRA32
  16379. * Rd.W[x] = SE32(Rs1.W[x][31:sa])
  16380. * }
  16381. * } else {
  16382. * Rd = Rs1;
  16383. * }
  16384. * for RV64: x=1...0
  16385. * ~~~
  16386. *
  16387. * \param [in] a unsigned long type of value stored in a
  16388. * \param [in] b unsigned int type of value stored in b
  16389. * \return value stored in unsigned long type
  16390. */
  16391. __STATIC_FORCEINLINE unsigned long __RV_SRA32_U(unsigned long a, unsigned int b)
  16392. {
  16393. unsigned long result;
  16394. __ASM volatile("sra32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16395. return result;
  16396. }
  16397. /* ===== Inline Function End for 4.35.2. SRA32.u ===== */
  16398. /* ===== Inline Function Start for 4.36.1. SRAI32 ===== */
  16399. /**
  16400. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16401. * \brief SRAI32 (SIMD 32-bit Shift Right Arithmetic Immediate)
  16402. * \details
  16403. * **Type**: DSP (RV64 Only)
  16404. *
  16405. * **Syntax**:\n
  16406. * ~~~
  16407. * SRAI32 Rd, Rs1, imm5u
  16408. * SRAI32.u Rd, Rs1, imm5u
  16409. * ~~~
  16410. *
  16411. * **Purpose**:\n
  16412. * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is
  16413. * an immediate value. The `.u` form performs additional rounding up operations on the shifted
  16414. * results.
  16415. *
  16416. * **Description**:\n
  16417. * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  16418. * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the
  16419. * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
  16420. * significant discarded bit of each 32-bit data to calculate the final results. And the results are written
  16421. * to Rd.
  16422. *
  16423. * **Operations**:\n
  16424. * ~~~
  16425. * sa = imm5u[4:0];
  16426. * if (sa > 0) {
  16427. * if (`.u` form) { // SRAI32.u
  16428. * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
  16429. * Rd.W[x] = res[31:0];
  16430. * else { // SRAI32
  16431. * Rd.W[x] = SE32(Rs1.W[x][31:sa]);
  16432. * }
  16433. * } else {
  16434. * Rd = Rs1;
  16435. * }
  16436. * for RV64: x=1...0
  16437. * ~~~
  16438. *
  16439. * \param [in] a unsigned long type of value stored in a
  16440. * \param [in] b unsigned int type of value stored in b
  16441. * \return value stored in unsigned long type
  16442. */
  16443. #define __RV_SRAI32(a, b) \
  16444. ({ \
  16445. unsigned long result; \
  16446. unsigned long __a = (unsigned long)(a); \
  16447. __ASM volatile("srai32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  16448. result; \
  16449. })
  16450. /* ===== Inline Function End for 4.36.1. SRAI32 ===== */
  16451. /* ===== Inline Function Start for 4.36.2. SRAI32.u ===== */
  16452. /**
  16453. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16454. * \brief SRAI32.u (SIMD 32-bit Rounding Shift Right Arithmetic Immediate)
  16455. * \details
  16456. * **Type**: DSP (RV64 Only)
  16457. *
  16458. * **Syntax**:\n
  16459. * ~~~
  16460. * SRAI32 Rd, Rs1, imm5u
  16461. * SRAI32.u Rd, Rs1, imm5u
  16462. * ~~~
  16463. *
  16464. * **Purpose**:\n
  16465. * Do 32-bit elements arithmetic right shift operations simultaneously. The shift amount is
  16466. * an immediate value. The `.u` form performs additional rounding up operations on the shifted
  16467. * results.
  16468. *
  16469. * **Description**:\n
  16470. * The 32-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out
  16471. * bits are filled with the sign-bit of the 32-bit data elements. The shift amount is specified by the
  16472. * imm5u constant. For the rounding operation of the `.u` form, a value of 1 is added to the most
  16473. * significant discarded bit of each 32-bit data to calculate the final results. And the results are written
  16474. * to Rd.
  16475. *
  16476. * **Operations**:\n
  16477. * ~~~
  16478. * sa = imm5u[4:0];
  16479. * if (sa > 0) {
  16480. * if (`.u` form) { // SRAI32.u
  16481. * res[31:-1] = SE33(Rs1.W[x][31:sa-1]) + 1;
  16482. * Rd.W[x] = res[31:0];
  16483. * else { // SRAI32
  16484. * Rd.W[x] = SE32(Rs1.W[x][31:sa]);
  16485. * }
  16486. * } else {
  16487. * Rd = Rs1;
  16488. * }
  16489. * for RV64: x=1...0
  16490. * ~~~
  16491. *
  16492. * \param [in] a unsigned long type of value stored in a
  16493. * \param [in] b unsigned int type of value stored in b
  16494. * \return value stored in unsigned long type
  16495. */
  16496. #define __RV_SRAI32_U(a, b) \
  16497. ({ \
  16498. unsigned long result; \
  16499. unsigned long __a = (unsigned long)(a); \
  16500. __ASM volatile("srai32.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  16501. result; \
  16502. })
  16503. /* ===== Inline Function End for 4.36.2. SRAI32.u ===== */
  16504. /* ===== Inline Function Start for 4.37. SRAIW.u ===== */
  16505. /**
  16506. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_NON_SIMD_32B_SHIFT
  16507. * \brief SRAIW.u (Rounding Shift Right Arithmetic Immediate Word)
  16508. * \details
  16509. * **Type**: DSP (RV64 only)
  16510. *
  16511. * **Syntax**:\n
  16512. * ~~~
  16513. * SRAIW.u Rd, Rs1, imm5u
  16514. * ~~~
  16515. *
  16516. * **Purpose**:\n
  16517. * Perform a 32-bit arithmetic right shift operation with rounding. The shift amount is an
  16518. * immediate value.
  16519. *
  16520. * **Description**:\n
  16521. * This instruction right-shifts the lower 32-bit content of Rs1 arithmetically. The shifted
  16522. * out bits are filled with the sign-bit Rs1(31) and the shift amount is specified by the imm5u constant.
  16523. * For the rounding operation, a value of 1 is added to the most significant discarded bit of the data to
  16524. * calculate the final result. And the result is sign-extended and written to Rd.
  16525. *
  16526. * **Operations**:\n
  16527. * ~~~
  16528. * sa = imm5u;
  16529. * if (sa != 0) {
  16530. * res[31:-1] = SE33(Rs1[31:(sa-1)]) + 1;
  16531. * Rd = SE32(res[31:0]);
  16532. * } else {
  16533. * Rd = SE32(Rs1.W[0]);
  16534. * }
  16535. * ~~~
  16536. *
  16537. * \param [in] a int type of value stored in a
  16538. * \param [in] b unsigned int type of value stored in b
  16539. * \return value stored in long type
  16540. */
  16541. #define __RV_SRAIW_U(a, b) \
  16542. ({ \
  16543. long result; \
  16544. int __a = (int)(a); \
  16545. __ASM volatile("sraiw.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  16546. result; \
  16547. })
  16548. /* ===== Inline Function End for 4.37. SRAIW.u ===== */
  16549. /* ===== Inline Function Start for 4.38.1. SRL32 ===== */
  16550. /**
  16551. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16552. * \brief SRL32 (SIMD 32-bit Shift Right Logical)
  16553. * \details
  16554. * **Type**: SIMD (RV64 Only)
  16555. *
  16556. * **Syntax**:\n
  16557. * ~~~
  16558. * SRL32 Rd, Rs1, Rs2
  16559. * SRL32.u Rd, Rs1, Rs2
  16560. * ~~~
  16561. *
  16562. * **Purpose**:\n
  16563. * Do 32-bit element logical right shift operations simultaneously. The shift amount is a
  16564. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  16565. * results.
  16566. *
  16567. * **Description**:\n
  16568. * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  16569. * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2
  16570. * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
  16571. * discarded bit of each 32-bit data element to calculate the final results. And the results are written to
  16572. * Rd.
  16573. *
  16574. * **Operations**:\n
  16575. * ~~~
  16576. * sa = Rs2[4:0];
  16577. * if (sa > 0) {
  16578. * if (`.u` form) { // SRA32.u
  16579. * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
  16580. * Rd.W[x] = res[31:0];
  16581. * else { // SRA32
  16582. * Rd.W[x] = ZE32(Rs1.W[x][31:sa])
  16583. * }
  16584. * } else {
  16585. * Rd = Rs1;
  16586. * }
  16587. * for RV64: x=1...0
  16588. * ~~~
  16589. *
  16590. * \param [in] a unsigned long type of value stored in a
  16591. * \param [in] b unsigned int type of value stored in b
  16592. * \return value stored in unsigned long type
  16593. */
  16594. __STATIC_FORCEINLINE unsigned long __RV_SRL32(unsigned long a, unsigned int b)
  16595. {
  16596. unsigned long result;
  16597. __ASM volatile("srl32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16598. return result;
  16599. }
  16600. /* ===== Inline Function End for 4.38.1. SRL32 ===== */
  16601. /* ===== Inline Function Start for 4.38.2. SRL32.u ===== */
  16602. /**
  16603. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16604. * \brief SRL32.u (SIMD 32-bit Rounding Shift Right Logical)
  16605. * \details
  16606. * **Type**: SIMD (RV64 Only)
  16607. *
  16608. * **Syntax**:\n
  16609. * ~~~
  16610. * SRL32 Rd, Rs1, Rs2
  16611. * SRL32.u Rd, Rs1, Rs2
  16612. * ~~~
  16613. *
  16614. * **Purpose**:\n
  16615. * Do 32-bit element logical right shift operations simultaneously. The shift amount is a
  16616. * variable from a GPR. The `.u` form performs additional rounding up operations on the shifted
  16617. * results.
  16618. *
  16619. * **Description**:\n
  16620. * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  16621. * are filled with zero. The shift amount is specified by the low-order 5-bits of the value in the Rs2
  16622. * register. For the rounding operation of the `.u` form, a value of 1 is added to the most significant
  16623. * discarded bit of each 32-bit data element to calculate the final results. And the results are written to
  16624. * Rd.
  16625. *
  16626. * **Operations**:\n
  16627. * ~~~
  16628. * sa = Rs2[4:0];
  16629. * if (sa > 0) {
  16630. * if (`.u` form) { // SRA32.u
  16631. * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
  16632. * Rd.W[x] = res[31:0];
  16633. * else { // SRA32
  16634. * Rd.W[x] = ZE32(Rs1.W[x][31:sa])
  16635. * }
  16636. * } else {
  16637. * Rd = Rs1;
  16638. * }
  16639. * for RV64: x=1...0
  16640. * ~~~
  16641. *
  16642. * \param [in] a unsigned long type of value stored in a
  16643. * \param [in] b unsigned int type of value stored in b
  16644. * \return value stored in unsigned long type
  16645. */
  16646. __STATIC_FORCEINLINE unsigned long __RV_SRL32_U(unsigned long a, unsigned int b)
  16647. {
  16648. unsigned long result;
  16649. __ASM volatile("srl32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16650. return result;
  16651. }
  16652. /* ===== Inline Function End for 4.38.2. SRL32.u ===== */
  16653. /* ===== Inline Function Start for 4.39.1. SRLI32 ===== */
  16654. /**
  16655. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16656. * \brief SRLI32 (SIMD 32-bit Shift Right Logical Immediate)
  16657. * \details
  16658. * **Type**: SIMD (RV64 Only)
  16659. *
  16660. * **Syntax**:\n
  16661. * ~~~
  16662. * SRLI32 Rd, Rs1, imm5u
  16663. * SRLI32.u Rd, Rs1, imm5u
  16664. * ~~~
  16665. *
  16666. * **Purpose**:\n
  16667. * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an
  16668. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  16669. *
  16670. * **Description**:\n
  16671. * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  16672. * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding
  16673. * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit
  16674. * data to calculate the final results. And the results are written to Rd.
  16675. *
  16676. * **Operations**:\n
  16677. * ~~~
  16678. * sa = imm5u[4:0];
  16679. * if (sa > 0) {
  16680. * if (`.u` form) { // SRLI32.u
  16681. * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
  16682. * Rd.W[x] = res[31:0];
  16683. * else { // SRLI32
  16684. * Rd.W[x] = ZE32(Rs1.W[x][31:sa]);
  16685. * }
  16686. * } else {
  16687. * Rd = Rs1;
  16688. * }
  16689. * for RV64: x=1...0
  16690. * ~~~
  16691. *
  16692. * \param [in] a unsigned long type of value stored in a
  16693. * \param [in] b unsigned int type of value stored in b
  16694. * \return value stored in unsigned long type
  16695. */
  16696. #define __RV_SRLI32(a, b) \
  16697. ({ \
  16698. unsigned long result; \
  16699. unsigned long __a = (unsigned long)(a); \
  16700. __ASM volatile("srli32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  16701. result; \
  16702. })
  16703. /* ===== Inline Function End for 4.39.1. SRLI32 ===== */
  16704. /* ===== Inline Function Start for 4.39.2. SRLI32.u ===== */
  16705. /**
  16706. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_SHIFT
  16707. * \brief SRLI32.u (SIMD 32-bit Rounding Shift Right Logical Immediate)
  16708. * \details
  16709. * **Type**: SIMD (RV64 Only)
  16710. *
  16711. * **Syntax**:\n
  16712. * ~~~
  16713. * SRLI32 Rd, Rs1, imm5u
  16714. * SRLI32.u Rd, Rs1, imm5u
  16715. * ~~~
  16716. *
  16717. * **Purpose**:\n
  16718. * Do 32-bit elements logical right shift operations simultaneously. The shift amount is an
  16719. * immediate value. The `.u` form performs additional rounding up operations on the shifted results.
  16720. *
  16721. * **Description**:\n
  16722. * The 32-bit data elements in Rs1 are right-shifted logically, that is, the shifted out bits
  16723. * are filled with zero. The shift amount is specified by the imm5u constant. For the rounding
  16724. * operation of the `.u` form, a value of 1 is added to the most significant discarded bit of each 32-bit
  16725. * data to calculate the final results. And the results are written to Rd.
  16726. *
  16727. * **Operations**:\n
  16728. * ~~~
  16729. * sa = imm5u[4:0];
  16730. * if (sa > 0) {
  16731. * if (`.u` form) { // SRLI32.u
  16732. * res[31:-1] = ZE33(Rs1.W[x][31:sa-1]) + 1;
  16733. * Rd.W[x] = res[31:0];
  16734. * else { // SRLI32
  16735. * Rd.W[x] = ZE32(Rs1.W[x][31:sa]);
  16736. * }
  16737. * } else {
  16738. * Rd = Rs1;
  16739. * }
  16740. * for RV64: x=1...0
  16741. * ~~~
  16742. *
  16743. * \param [in] a unsigned long type of value stored in a
  16744. * \param [in] b unsigned int type of value stored in b
  16745. * \return value stored in unsigned long type
  16746. */
  16747. #define __RV_SRLI32_U(a, b) \
  16748. ({ \
  16749. unsigned long result; \
  16750. unsigned long __a = (unsigned long)(a); \
  16751. __ASM volatile("srli32.u %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  16752. result; \
  16753. })
  16754. /* ===== Inline Function End for 4.39.2. SRLI32.u ===== */
  16755. /* ===== Inline Function Start for 4.40. STAS32 ===== */
  16756. /**
  16757. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  16758. * \brief STAS32 (SIMD 32-bit Straight Addition & Subtraction)
  16759. * \details
  16760. * **Type**: SIMD (RV64 Only)
  16761. *
  16762. * **Syntax**:\n
  16763. * ~~~
  16764. * STAS32 Rd, Rs1, Rs2
  16765. * ~~~
  16766. *
  16767. * **Purpose**:\n
  16768. * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit
  16769. * chunk simultaneously. Operands are from corresponding 32-bit elements.
  16770. *
  16771. * **Description**:\n
  16772. * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit
  16773. * integer element in [63:32] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts
  16774. * the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and
  16775. * writes the result to [31:0] of Rd.
  16776. *
  16777. * **Note**:\n
  16778. * This instruction can be used for either signed or unsigned operations.
  16779. *
  16780. * **Operations**:\n
  16781. * ~~~
  16782. * Rd.W[1] = Rs1.W[1] + Rs2.W[1];
  16783. * Rd.W[0] = Rs1.W[0] - Rs2.W[0];
  16784. * ~~~
  16785. *
  16786. * \param [in] a unsigned long type of value stored in a
  16787. * \param [in] b unsigned long type of value stored in b
  16788. * \return value stored in unsigned long type
  16789. */
  16790. __STATIC_FORCEINLINE unsigned long __RV_STAS32(unsigned long a, unsigned long b)
  16791. {
  16792. unsigned long result;
  16793. __ASM volatile("stas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16794. return result;
  16795. }
  16796. /* ===== Inline Function End for 4.40. STAS32 ===== */
  16797. /* ===== Inline Function Start for 4.41. STSA32 ===== */
  16798. /**
  16799. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  16800. * \brief STSA32 (SIMD 32-bit Straight Subtraction & Addition)
  16801. * \details
  16802. * **Type**: SIMD (RV64 Only)
  16803. *
  16804. * **Syntax**:\n
  16805. * ~~~
  16806. * STSA32 Rd, Rs1, Rs2
  16807. * ~~~
  16808. *
  16809. * **Purpose**:\n
  16810. * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit
  16811. * chunk simultaneously. Operands are from corresponding 32-bit elements.
  16812. * *Description: *
  16813. * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer
  16814. * element in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit
  16815. * integer element in [31:0] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and writes the result
  16816. * to [31:0] of Rd
  16817. *
  16818. * **Note**:\n
  16819. * This instruction can be used for either signed or unsigned operations.
  16820. *
  16821. * **Operations**:\n
  16822. * ~~~
  16823. * Rd.W[1] = Rs1.W[1] - Rs2.W[1];
  16824. * Rd.W[0] = Rs1.W[0] + Rs2.W[0];
  16825. * ~~~
  16826. *
  16827. * \param [in] a unsigned long type of value stored in a
  16828. * \param [in] b unsigned long type of value stored in b
  16829. * \return value stored in unsigned long type
  16830. */
  16831. __STATIC_FORCEINLINE unsigned long __RV_STSA32(unsigned long a, unsigned long b)
  16832. {
  16833. unsigned long result;
  16834. __ASM volatile("stsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16835. return result;
  16836. }
  16837. /* ===== Inline Function End for 4.41. STSA32 ===== */
  16838. /* ===== Inline Function Start for 4.42. SUB32 ===== */
  16839. /**
  16840. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  16841. * \brief SUB32 (SIMD 32-bit Subtraction)
  16842. * \details
  16843. * **Type**: DSP (RV64 Only)
  16844. *
  16845. * **Syntax**:\n
  16846. * ~~~
  16847. * SUB32 Rd, Rs1, Rs2
  16848. * ~~~
  16849. *
  16850. * **Purpose**:\n
  16851. * Do 32-bit integer element subtractions simultaneously.
  16852. *
  16853. * **Description**:\n
  16854. * This instruction subtracts the 32-bit integer elements in Rs2 from the 32-bit integer
  16855. * elements in Rs1, and then writes the results to Rd.
  16856. *
  16857. * **Note**:\n
  16858. * This instruction can be used for either signed or unsigned subtraction.
  16859. *
  16860. * **Operations**:\n
  16861. * ~~~
  16862. * Rd.W[x] = Rs1.W[x] - Rs2.W[x];
  16863. * for RV64: x=1...0
  16864. * ~~~
  16865. *
  16866. * \param [in] a unsigned long type of value stored in a
  16867. * \param [in] b unsigned long type of value stored in b
  16868. * \return value stored in unsigned long type
  16869. */
  16870. __STATIC_FORCEINLINE unsigned long __RV_SUB32(unsigned long a, unsigned long b)
  16871. {
  16872. unsigned long result;
  16873. __ASM volatile("sub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16874. return result;
  16875. }
  16876. /* ===== Inline Function End for 4.42. SUB32 ===== */
  16877. /* ===== Inline Function Start for 4.43. UKADD32 ===== */
  16878. /**
  16879. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  16880. * \brief UKADD32 (SIMD 32-bit Unsigned Saturating Addition)
  16881. * \details
  16882. * **Type**: SIMD (RV64 Only)
  16883. *
  16884. * **Syntax**:\n
  16885. * ~~~
  16886. * UKADD32 Rd, Rs1, Rs2
  16887. * ~~~
  16888. *
  16889. * **Purpose**:\n
  16890. * Do 32-bit unsigned integer element saturating additions simultaneously.
  16891. *
  16892. * **Description**:\n
  16893. * This instruction adds the 32-bit unsigned integer elements in Rs1 with the 32-bit
  16894. * unsigned integer elements in Rs2. If any of the results are beyond the 32-bit unsigned number
  16895. * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated
  16896. * results are written to Rd.
  16897. *
  16898. * **Operations**:\n
  16899. * ~~~
  16900. * res[x] = Rs1.W[x] + Rs2.W[x];
  16901. * if (res[x] > (2^32)-1) {
  16902. * res[x] = (2^32)-1;
  16903. * OV = 1;
  16904. * }
  16905. * Rd.W[x] = res[x];
  16906. * for RV64: x=1...0
  16907. * ~~~
  16908. *
  16909. * \param [in] a unsigned long type of value stored in a
  16910. * \param [in] b unsigned long type of value stored in b
  16911. * \return value stored in unsigned long type
  16912. */
  16913. __STATIC_FORCEINLINE unsigned long __RV_UKADD32(unsigned long a, unsigned long b)
  16914. {
  16915. unsigned long result;
  16916. __ASM volatile("ukadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16917. return result;
  16918. }
  16919. /* ===== Inline Function End for 4.43. UKADD32 ===== */
  16920. /* ===== Inline Function Start for 4.44. UKCRAS32 ===== */
  16921. /**
  16922. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  16923. * \brief UKCRAS32 (SIMD 32-bit Unsigned Saturating Cross Addition & Subtraction)
  16924. * \details
  16925. * **Type**: SIMD (RV64 Only)
  16926. *
  16927. * **Syntax**:\n
  16928. * ~~~
  16929. * UKCRAS32 Rd, Rs1, Rs2
  16930. * ~~~
  16931. *
  16932. * **Purpose**:\n
  16933. * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned
  16934. * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from crossed
  16935. * 32-bit elements.
  16936. *
  16937. * **Description**:\n
  16938. * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
  16939. * bit unsigned integer element in [31:0] of Rs2; at the same time, it subtracts the 32-bit unsigned
  16940. * integer element in [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the
  16941. * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
  16942. * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and
  16943. * [31:0] of Rd for subtraction.
  16944. *
  16945. * **Operations**:\n
  16946. * ~~~
  16947. * res1 = Rs1.W[1] + Rs2.W[0];
  16948. * res2 = Rs1.W[0] - Rs2.W[1];
  16949. * if (res1 > (2^32)-1) {
  16950. * res1 = (2^32)-1;
  16951. * OV = 1;
  16952. * }
  16953. * if (res2 < 0) {
  16954. * res2 = 0;
  16955. * OV = 1;
  16956. * }
  16957. * Rd.W[1] = res1;
  16958. * Rd.W[0] = res2;
  16959. * ~~~
  16960. *
  16961. * \param [in] a unsigned long type of value stored in a
  16962. * \param [in] b unsigned long type of value stored in b
  16963. * \return value stored in unsigned long type
  16964. */
  16965. __STATIC_FORCEINLINE unsigned long __RV_UKCRAS32(unsigned long a, unsigned long b)
  16966. {
  16967. unsigned long result;
  16968. __ASM volatile("ukcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  16969. return result;
  16970. }
  16971. /* ===== Inline Function End for 4.44. UKCRAS32 ===== */
  16972. /* ===== Inline Function Start for 4.45. UKCRSA32 ===== */
  16973. /**
  16974. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  16975. * \brief UKCRSA32 (SIMD 32-bit Unsigned Saturating Cross Subtraction & Addition)
  16976. * \details
  16977. * **Type**: SIMD (RV64 Only)
  16978. *
  16979. * **Syntax**:\n
  16980. * ~~~
  16981. * UKCRSA32 Rd, Rs1, Rs2
  16982. * ~~~
  16983. *
  16984. * **Purpose**:\n
  16985. * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned
  16986. * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from crossed
  16987. * 32-bit elements.
  16988. *
  16989. * **Description**:\n
  16990. * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the
  16991. * 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned
  16992. * integer element in [63:32] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the
  16993. * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
  16994. * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and
  16995. * [31:0] of Rd for addition.
  16996. *
  16997. * **Operations**:\n
  16998. * ~~~
  16999. * res1 = Rs1.W[1] - Rs2.W[0];
  17000. * res2 = Rs1.W[0] + Rs2.W[1];
  17001. * if (res1 < 0) {
  17002. * res1 = 0;
  17003. * OV = 1;
  17004. * } else if (res2 > (2^32)-1) {
  17005. * res2 = (2^32)-1;
  17006. * OV = 1;
  17007. * }
  17008. * Rd.W[1] = res1;
  17009. * Rd.W[0] = res2;
  17010. * ~~~
  17011. *
  17012. * \param [in] a unsigned long type of value stored in a
  17013. * \param [in] b unsigned long type of value stored in b
  17014. * \return value stored in unsigned long type
  17015. */
  17016. __STATIC_FORCEINLINE unsigned long __RV_UKCRSA32(unsigned long a, unsigned long b)
  17017. {
  17018. unsigned long result;
  17019. __ASM volatile("ukcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17020. return result;
  17021. }
  17022. /* ===== Inline Function End for 4.45. UKCRSA32 ===== */
  17023. /* ===== Inline Function Start for 4.46. UKSTAS32 ===== */
  17024. /**
  17025. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17026. * \brief UKSTAS32 (SIMD 32-bit Unsigned Saturating Straight Addition & Subtraction)
  17027. * \details
  17028. * **Type**: SIMD (RV64 Only)
  17029. *
  17030. * **Syntax**:\n
  17031. * ~~~
  17032. * UKSTAS32 Rd, Rs1, Rs2
  17033. * ~~~
  17034. *
  17035. * **Purpose**:\n
  17036. * Do one 32-bit unsigned integer element saturating addition and one 32-bit unsigned
  17037. * integer element saturating subtraction in a 64-bit chunk simultaneously. Operands are from
  17038. * corresponding 32-bit elements.
  17039. *
  17040. * **Description**:\n
  17041. * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
  17042. * bit unsigned integer element in [63:32] of Rs2; at the same time, it subtracts the 32-bit unsigned
  17043. * integer element in [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] Rs1. If any of the
  17044. * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
  17045. * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for addition and
  17046. * [31:0] of Rd for subtraction.
  17047. *
  17048. * **Operations**:\n
  17049. * ~~~
  17050. * res1 = Rs1.W[1] + Rs2.W[1];
  17051. * res2 = Rs1.W[0] - Rs2.W[0];
  17052. * if (res1 > (2^32)-1) {
  17053. * res1 = (2^32)-1;
  17054. * OV = 1;
  17055. * }
  17056. * if (res2 < 0) {
  17057. * res2 = 0;
  17058. * OV = 1;
  17059. * }
  17060. * Rd.W[1] = res1;
  17061. * Rd.W[0] = res2;
  17062. * ~~~
  17063. *
  17064. * \param [in] a unsigned long type of value stored in a
  17065. * \param [in] b unsigned long type of value stored in b
  17066. * \return value stored in unsigned long type
  17067. */
  17068. __STATIC_FORCEINLINE unsigned long __RV_UKSTAS32(unsigned long a, unsigned long b)
  17069. {
  17070. unsigned long result;
  17071. __ASM volatile("ukstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17072. return result;
  17073. }
  17074. /* ===== Inline Function End for 4.46. UKSTAS32 ===== */
  17075. /* ===== Inline Function Start for 4.47. UKSTSA32 ===== */
  17076. /**
  17077. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17078. * \brief UKSTSA32 (SIMD 32-bit Unsigned Saturating Straight Subtraction & Addition)
  17079. * \details
  17080. * **Type**: SIMD (RV64 Only)
  17081. *
  17082. * **Syntax**:\n
  17083. * ~~~
  17084. * UKSTSA32 Rd, Rs1, Rs2
  17085. * ~~~
  17086. *
  17087. * **Purpose**:\n
  17088. * Do one 32-bit unsigned integer element saturating subtraction and one 32-bit unsigned
  17089. * integer element saturating addition in a 64-bit chunk simultaneously. Operands are from
  17090. * corresponding 32-bit elements.
  17091. *
  17092. * **Description**:\n
  17093. * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from
  17094. * the 32-bit unsigned integer element in [63:32] of Rs1; at the same time, it adds the 32-bit unsigned
  17095. * integer element in [31:0] of Rs2 with the 32-bit unsigned integer element in [31:0] Rs1. If any of the
  17096. * results are beyond the 32-bit unsigned number range (0 <= RES <= 2^32-1), they are saturated to the
  17097. * range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and
  17098. * [31:0] of Rd for addition.
  17099. *
  17100. * **Operations**:\n
  17101. * ~~~
  17102. * res1 = Rs1.W[1] - Rs2.W[1];
  17103. * res2 = Rs1.W[0] + Rs2.W[0];
  17104. * if (res1 < 0) {
  17105. * res1 = 0;
  17106. * OV = 1;
  17107. * } else if (res2 > (2^32)-1) {
  17108. * res2 = (2^32)-1;
  17109. * OV = 1;
  17110. * }
  17111. * Rd.W[1] = res1;
  17112. * Rd.W[0] = res2;
  17113. * ~~~
  17114. *
  17115. * \param [in] a unsigned long type of value stored in a
  17116. * \param [in] b unsigned long type of value stored in b
  17117. * \return value stored in unsigned long type
  17118. */
  17119. __STATIC_FORCEINLINE unsigned long __RV_UKSTSA32(unsigned long a, unsigned long b)
  17120. {
  17121. unsigned long result;
  17122. __ASM volatile("ukstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17123. return result;
  17124. }
  17125. /* ===== Inline Function End for 4.47. UKSTSA32 ===== */
  17126. /* ===== Inline Function Start for 4.48. UKSUB32 ===== */
  17127. /**
  17128. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17129. * \brief UKSUB32 (SIMD 32-bit Unsigned Saturating Subtraction)
  17130. * \details
  17131. * **Type**: SIMD (RV64 Only)
  17132. *
  17133. * **Syntax**:\n
  17134. * ~~~
  17135. * UKSUB32 Rd, Rs1, Rs2
  17136. * ~~~
  17137. *
  17138. * **Purpose**:\n
  17139. * Do 32-bit unsigned integer elements saturating subtractions simultaneously.
  17140. *
  17141. * **Description**:\n
  17142. * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit
  17143. * unsigned integer elements in Rs1. If any of the results are beyond the 32-bit unsigned number
  17144. * range (0 <= RES <= 2^32-1), they are saturated to the range and the OV bit is set to 1. The saturated
  17145. * results are written to Rd.
  17146. *
  17147. * **Operations**:\n
  17148. * ~~~
  17149. * res[x] = Rs1.W[x] - Rs2.W[x];
  17150. * if (res[x] < 0) {
  17151. * res[x] = 0;
  17152. * OV = 1;
  17153. * }
  17154. * Rd.W[x] = res[x];
  17155. * for RV64: x=1...0
  17156. * ~~~
  17157. *
  17158. * \param [in] a unsigned long type of value stored in a
  17159. * \param [in] b unsigned long type of value stored in b
  17160. * \return value stored in unsigned long type
  17161. */
  17162. __STATIC_FORCEINLINE unsigned long __RV_UKSUB32(unsigned long a, unsigned long b)
  17163. {
  17164. unsigned long result;
  17165. __ASM volatile("uksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17166. return result;
  17167. }
  17168. /* ===== Inline Function End for 4.48. UKSUB32 ===== */
  17169. /* ===== Inline Function Start for 4.49. UMAX32 ===== */
  17170. /**
  17171. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
  17172. * \brief UMAX32 (SIMD 32-bit Unsigned Maximum)
  17173. * \details
  17174. * **Type**: SIMD (RV64 Only)
  17175. *
  17176. * **Syntax**:\n
  17177. * ~~~
  17178. * UMAX32 Rd, Rs1, Rs2
  17179. * ~~~
  17180. *
  17181. * **Purpose**:\n
  17182. * Do 32-bit unsigned integer elements finding maximum operations simultaneously.
  17183. *
  17184. * **Description**:\n
  17185. * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit
  17186. * unsigned integer elements in Rs2 and selects the numbers that is greater than the other one. The
  17187. * selected results are written to Rd.
  17188. *
  17189. * **Operations**:\n
  17190. * ~~~
  17191. * Rd.W[x] = (Rs1.W[x] u> Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
  17192. * for RV64: x=1...0
  17193. * ~~~
  17194. *
  17195. * \param [in] a unsigned long type of value stored in a
  17196. * \param [in] b unsigned long type of value stored in b
  17197. * \return value stored in unsigned long type
  17198. */
  17199. __STATIC_FORCEINLINE unsigned long __RV_UMAX32(unsigned long a, unsigned long b)
  17200. {
  17201. unsigned long result;
  17202. __ASM volatile("umax32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17203. return result;
  17204. }
  17205. /* ===== Inline Function End for 4.49. UMAX32 ===== */
  17206. /* ===== Inline Function Start for 4.50. UMIN32 ===== */
  17207. /**
  17208. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_MISC
  17209. * \brief UMIN32 (SIMD 32-bit Unsigned Minimum)
  17210. * \details
  17211. * **Type**: SIMD (RV64 Only)
  17212. *
  17213. * **Syntax**:\n
  17214. * ~~~
  17215. * UMIN32 Rd, Rs1, Rs2
  17216. * ~~~
  17217. *
  17218. * **Purpose**:\n
  17219. * Do 32-bit unsigned integer elements finding minimum operations simultaneously.
  17220. *
  17221. * **Description**:\n
  17222. * This instruction compares the 32-bit unsigned integer elements in Rs1 with the 32-bit
  17223. * unsigned integer elements in Rs2 and selects the numbers that is less than the other one. The
  17224. * selected results are written to Rd.
  17225. *
  17226. * **Operations**:\n
  17227. * ~~~
  17228. * Rd.W[x] = (Rs1.W[x] <u Rs2.W[x])? Rs1.W[x] : Rs2.W[x];
  17229. * for RV64: x=1...0
  17230. * ~~~
  17231. *
  17232. * \param [in] a unsigned long type of value stored in a
  17233. * \param [in] b unsigned long type of value stored in b
  17234. * \return value stored in unsigned long type
  17235. */
  17236. __STATIC_FORCEINLINE unsigned long __RV_UMIN32(unsigned long a, unsigned long b)
  17237. {
  17238. unsigned long result;
  17239. __ASM volatile("umin32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17240. return result;
  17241. }
  17242. /* ===== Inline Function End for 4.50. UMIN32 ===== */
  17243. /* ===== Inline Function Start for 4.51. URADD32 ===== */
  17244. /**
  17245. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17246. * \brief URADD32 (SIMD 32-bit Unsigned Halving Addition)
  17247. * \details
  17248. * **Type**: SIMD (RV64 Only)
  17249. *
  17250. * **Syntax**:\n
  17251. * ~~~
  17252. * URADD32 Rd, Rs1, Rs2
  17253. * ~~~
  17254. *
  17255. * **Purpose**:\n
  17256. * Do 32-bit unsigned integer element additions simultaneously. The results are halved to
  17257. * avoid overflow or saturation.
  17258. *
  17259. * **Description**:\n
  17260. * This instruction adds the 32-bit unsigned integer elements in Rs1 with the 32-bit
  17261. * unsigned integer elements in Rs2. The results are first logically right-shifted by 1 bit and then
  17262. * written to Rd.
  17263. *
  17264. * **Examples**:\n
  17265. * ~~~
  17266. * * Ra = 0x7FFFFFFF, Rb = 0x7FFFFFFF Rt = 0x7FFFFFFF
  17267. * * Ra = 0x80000000, Rb = 0x80000000 Rt = 0x80000000
  17268. * * Ra = 0x40000000, Rb = 0x80000000 Rt = 0x60000000
  17269. * ~~~
  17270. *
  17271. * **Operations**:\n
  17272. * ~~~
  17273. * Rd.W[x] = (Rs1.W[x] + Rs2.W[x]) u>> 1;
  17274. * for RV64: x=1...0
  17275. * ~~~
  17276. *
  17277. * \param [in] a unsigned long type of value stored in a
  17278. * \param [in] b unsigned long type of value stored in b
  17279. * \return value stored in unsigned long type
  17280. */
  17281. __STATIC_FORCEINLINE unsigned long __RV_URADD32(unsigned long a, unsigned long b)
  17282. {
  17283. unsigned long result;
  17284. __ASM volatile("uradd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17285. return result;
  17286. }
  17287. /* ===== Inline Function End for 4.51. URADD32 ===== */
  17288. /* ===== Inline Function Start for 4.52. URCRAS32 ===== */
  17289. /**
  17290. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17291. * \brief URCRAS32 (SIMD 32-bit Unsigned Halving Cross Addition & Subtraction)
  17292. * \details
  17293. * **Type**: SIMD (RV64 Only)
  17294. *
  17295. * **Syntax**:\n
  17296. * ~~~
  17297. * URCRAS32 Rd, Rs1, Rs2
  17298. * ~~~
  17299. *
  17300. * **Purpose**:\n
  17301. * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element
  17302. * subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The
  17303. * results are halved to avoid overflow or saturation.
  17304. *
  17305. * **Description**:\n
  17306. * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
  17307. * bit unsigned integer element in [31:0] of Rs2, and subtracts the 32-bit unsigned integer element in
  17308. * [63:32] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first
  17309. * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for
  17310. * subtraction.
  17311. *
  17312. * **Examples**:\n
  17313. * ~~~
  17314. * Please see `URADD32` and `URSUB32` instructions.
  17315. * ~~~
  17316. *
  17317. * **Operations**:\n
  17318. * ~~~
  17319. * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) u>> 1;
  17320. * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) u>> 1;
  17321. * ~~~
  17322. *
  17323. * \param [in] a unsigned long type of value stored in a
  17324. * \param [in] b unsigned long type of value stored in b
  17325. * \return value stored in unsigned long type
  17326. */
  17327. __STATIC_FORCEINLINE unsigned long __RV_URCRAS32(unsigned long a, unsigned long b)
  17328. {
  17329. unsigned long result;
  17330. __ASM volatile("urcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17331. return result;
  17332. }
  17333. /* ===== Inline Function End for 4.52. URCRAS32 ===== */
  17334. /* ===== Inline Function Start for 4.53. URCRSA32 ===== */
  17335. /**
  17336. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17337. * \brief URCRSA32 (SIMD 32-bit Unsigned Halving Cross Subtraction & Addition)
  17338. * \details
  17339. * **Type**: SIMD (RV64 Only)
  17340. *
  17341. * **Syntax**:\n
  17342. * ~~~
  17343. * URCRSA32 Rd, Rs1, Rs2
  17344. * ~~~
  17345. *
  17346. * **Purpose**:\n
  17347. * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element
  17348. * addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results
  17349. * are halved to avoid overflow or saturation.
  17350. *
  17351. * **Description**:\n
  17352. * This instruction subtracts the 32-bit unsigned integer element in [31:0] of Rs2 from the
  17353. * 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer in
  17354. * [31:0] of Rs1 with the 32-bit unsigned integer element in [63:32] of Rs2. The two results are first
  17355. * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for
  17356. * addition.
  17357. *
  17358. * **Examples**:\n
  17359. * ~~~
  17360. * Please see `URADD32` and `URSUB32` instructions.
  17361. * ~~~
  17362. *
  17363. * **Operations**:\n
  17364. * ~~~
  17365. * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) u>> 1;
  17366. * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) u>> 1;
  17367. * ~~~
  17368. *
  17369. * \param [in] a unsigned long type of value stored in a
  17370. * \param [in] b unsigned long type of value stored in b
  17371. * \return value stored in unsigned long type
  17372. */
  17373. __STATIC_FORCEINLINE unsigned long __RV_URCRSA32(unsigned long a, unsigned long b)
  17374. {
  17375. unsigned long result;
  17376. __ASM volatile("urcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17377. return result;
  17378. }
  17379. /* ===== Inline Function End for 4.53. URCRSA32 ===== */
  17380. /* ===== Inline Function Start for 4.54. URSTAS32 ===== */
  17381. /**
  17382. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17383. * \brief URSTAS32 (SIMD 32-bit Unsigned Halving Straight Addition & Subtraction)
  17384. * \details
  17385. * **Type**: SIMD (RV64 Only)
  17386. *
  17387. * **Syntax**:\n
  17388. * ~~~
  17389. * URSTAS32 Rd, Rs1, Rs2
  17390. * ~~~
  17391. *
  17392. * **Purpose**:\n
  17393. * Do 32-bit unsigned integer element addition and 32-bit unsigned integer element
  17394. * subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements.
  17395. * The results are halved to avoid overflow or saturation.
  17396. *
  17397. * **Description**:\n
  17398. * This instruction adds the 32-bit unsigned integer element in [63:32] of Rs1 with the 32-
  17399. * bit unsigned integer element in [63:32] of Rs2, and subtracts the 32-bit unsigned integer element in
  17400. * [31:0] of Rs2 from the 32-bit unsigned integer element in [31:0] of Rs1. The element results are first
  17401. * logically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for
  17402. * subtraction.
  17403. *
  17404. * **Examples**:\n
  17405. * ~~~
  17406. * Please see `URADD32` and `URSUB32` instructions.
  17407. * ~~~
  17408. *
  17409. * **Operations**:\n
  17410. * ~~~
  17411. * Rd.W[1] = (Rs1.W[1] + Rs2.W[1]) u>> 1;
  17412. * Rd.W[0] = (Rs1.W[0] - Rs2.W[0]) u>> 1;
  17413. * ~~~
  17414. *
  17415. * \param [in] a unsigned long type of value stored in a
  17416. * \param [in] b unsigned long type of value stored in b
  17417. * \return value stored in unsigned long type
  17418. */
  17419. __STATIC_FORCEINLINE unsigned long __RV_URSTAS32(unsigned long a, unsigned long b)
  17420. {
  17421. unsigned long result;
  17422. __ASM volatile("urstas32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17423. return result;
  17424. }
  17425. /* ===== Inline Function End for 4.54. URSTAS32 ===== */
  17426. /* ===== Inline Function Start for 4.55. URSTSA32 ===== */
  17427. /**
  17428. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17429. * \brief URSTSA32 (SIMD 32-bit Unsigned Halving Straight Subtraction & Addition)
  17430. * \details
  17431. * **Type**: SIMD (RV64 Only)
  17432. *
  17433. * **Syntax**:\n
  17434. * ~~~
  17435. * URSTSA32 Rd, Rs1, Rs2
  17436. * ~~~
  17437. *
  17438. * **Purpose**:\n
  17439. * Do 32-bit unsigned integer element subtraction and 32-bit unsigned integer element
  17440. * addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements. The
  17441. * results are halved to avoid overflow or saturation.
  17442. *
  17443. * **Description**:\n
  17444. * This instruction subtracts the 32-bit unsigned integer element in [63:32] of Rs2 from
  17445. * the 32-bit unsigned integer element in [63:32] of Rs1, and adds the 32-bit unsigned element integer
  17446. * in [31:0] of Rs1 with the 32-bit unsigned integer element in [31:0] of Rs2. The two results are first
  17447. * logically right-shifted by 1 bit and then written to [63:32] of Rd for subtraction and [31:0] of Rd for
  17448. * addition.
  17449. *
  17450. * **Examples**:\n
  17451. * ~~~
  17452. * Please see `URADD32` and `URSUB32` instructions.
  17453. * ~~~
  17454. *
  17455. * **Operations**:\n
  17456. * ~~~
  17457. * Rd.W[1] = (Rs1.W[1] - Rs2.W[1]) u>> 1;
  17458. * Rd.W[0] = (Rs1.W[0] + Rs2.W[0]) u>> 1;
  17459. * ~~~
  17460. *
  17461. * \param [in] a unsigned long type of value stored in a
  17462. * \param [in] b unsigned long type of value stored in b
  17463. * \return value stored in unsigned long type
  17464. */
  17465. __STATIC_FORCEINLINE unsigned long __RV_URSTSA32(unsigned long a, unsigned long b)
  17466. {
  17467. unsigned long result;
  17468. __ASM volatile("urstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17469. return result;
  17470. }
  17471. /* ===== Inline Function End for 4.55. URSTSA32 ===== */
  17472. /* ===== Inline Function Start for 4.56. URSUB32 ===== */
  17473. /**
  17474. * \ingroup NMSIS_Core_DSP_Intrinsic_RV64_SIMD_32B_ADDSUB
  17475. * \brief URSUB32 (SIMD 32-bit Unsigned Halving Subtraction)
  17476. * \details
  17477. * **Type**: SIMD (RV64 Only)
  17478. *
  17479. * **Syntax**:\n
  17480. * ~~~
  17481. * URSUB32 Rd, Rs1, Rs2
  17482. * ~~~
  17483. *
  17484. * **Purpose**:\n
  17485. * Do 32-bit unsigned integer element subtractions simultaneously. The results are halved to
  17486. * avoid overflow or saturation.
  17487. *
  17488. * **Description**:\n
  17489. * This instruction subtracts the 32-bit unsigned integer elements in Rs2 from the 32-bit
  17490. * unsigned integer elements in Rs1. The results are first logically right-shifted by 1 bit and then
  17491. * written to Rd.
  17492. *
  17493. * **Examples**:\n
  17494. * ~~~
  17495. * * Ra = 0x7FFFFFFF, Rb = 0x80000000, Rt = 0xFFFFFFFF
  17496. * * Ra = 0x80000000, Rb = 0x7FFFFFFF, Rt = 0x00000000
  17497. * * Ra = 0x80000000, Rb = 0x40000000, Rt = 0x20000000
  17498. * ~~~
  17499. *
  17500. * **Operations**:\n
  17501. * ~~~
  17502. * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) u>> 1;
  17503. * for RV64: x=1...0
  17504. * ~~~
  17505. *
  17506. * \param [in] a unsigned long type of value stored in a
  17507. * \param [in] b unsigned long type of value stored in b
  17508. * \return value stored in unsigned long type
  17509. */
  17510. __STATIC_FORCEINLINE unsigned long __RV_URSUB32(unsigned long a, unsigned long b)
  17511. {
  17512. unsigned long result;
  17513. __ASM volatile("ursub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17514. return result;
  17515. }
  17516. /* ===== Inline Function End for 4.56. URSUB32 ===== */
  17517. #endif /* __RISCV_XLEN == 64 */
  17518. /**
  17519. * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default Nuclei Default SIMD DSP Additional Instructions
  17520. * \ingroup NMSIS_Core_DSP_Intrinsic
  17521. * \brief (RV32 & RV64)Nuclei Customized DSP Instructions
  17522. * \details This is Nuclei customized DSP instructions for both RV32 and RV64
  17523. */
  17524. /* ===== Inline Function Start for EXPD80 ===== */
  17525. /**
  17526. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17527. * \brief EXPD80 (Expand and Copy Byte 0 to 32bit(when rv32) or 64bit(when rv64))
  17528. * \details
  17529. * **Type**: DSP
  17530. *
  17531. * **Syntax**:\n
  17532. * ~~~
  17533. * EXPD80 Rd, Rs1
  17534. * ~~~
  17535. *
  17536. * **Purpose**:\n
  17537. * When rv32, Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
  17538. * When rv64, Copy 8-bit data from 64-bit chunks into 8 bytes in a register.
  17539. *
  17540. * **Description**:\n
  17541. * Moves Rs1.B[0][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17542. *
  17543. * **Operations**:\n
  17544. * ~~~
  17545. * Rd.W[x][31:0] = CONCAT(Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0], Rs1.B[0][7:0]);
  17546. * for RV32: x=0
  17547. * ~~~
  17548. *
  17549. * \param [in] a unsigned long type of value stored in a
  17550. * \return value stored in unsigned long type
  17551. */
  17552. __STATIC_FORCEINLINE unsigned long __RV_EXPD80(unsigned long a)
  17553. {
  17554. unsigned long result;
  17555. __ASM volatile("expd80 %0, %1" : "=r"(result) : "r"(a));
  17556. return result;
  17557. }
  17558. /* ===== Inline Function End for EXPD80 ===== */
  17559. /* ===== Inline Function Start for EXPD81 ===== */
  17560. /**
  17561. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17562. * \brief EXPD81 (Expand and Copy Byte 1 to 32bit(rv32) or 64bit(when rv64))
  17563. * \details
  17564. * **Type**: DSP
  17565. *
  17566. * **Syntax**:\n
  17567. * ~~~
  17568. * EXPD81 Rd, Rs1
  17569. * ~~~
  17570. *
  17571. * **Purpose**:\n
  17572. * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
  17573. *
  17574. * **Description**:\n
  17575. * Moves Rs1.B[1][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17576. *
  17577. * **Operations**:\n
  17578. * ~~~
  17579. * Rd.W[x][31:0] = CONCAT(Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0], Rs1.B[1][7:0]);
  17580. * for RV32: x=0
  17581. * ~~~
  17582. *
  17583. * \param [in] a unsigned long type of value stored in a
  17584. * \return value stored in unsigned long type
  17585. */
  17586. __STATIC_FORCEINLINE unsigned long __RV_EXPD81(unsigned long a)
  17587. {
  17588. unsigned long result;
  17589. __ASM volatile("expd81 %0, %1" : "=r"(result) : "r"(a));
  17590. return result;
  17591. }
  17592. /* ===== Inline Function End for EXPD81 ===== */
  17593. /* ===== Inline Function Start for EXPD82 ===== */
  17594. /**
  17595. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17596. * \brief EXPD82 (Expand and Copy Byte 2 to 32bit(rv32) or 64bit(when rv64))
  17597. * \details
  17598. * **Type**: DSP
  17599. *
  17600. * **Syntax**:\n
  17601. * ~~~
  17602. * EXPD82 Rd, Rs1
  17603. * ~~~
  17604. *
  17605. * **Purpose**:\n
  17606. * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
  17607. *
  17608. * **Description**:\n
  17609. * Moves Rs1.B[2][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17610. *
  17611. * **Operations**:\n
  17612. * ~~~
  17613. * Rd.W[x][31:0] = CONCAT(Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0], Rs1.B[2][7:0]);
  17614. * for RV32: x=0
  17615. * ~~~
  17616. *
  17617. * \param [in] a unsigned long type of value stored in a
  17618. * \return value stored in unsigned long type
  17619. */
  17620. __STATIC_FORCEINLINE unsigned long __RV_EXPD82(unsigned long a)
  17621. {
  17622. unsigned long result;
  17623. __ASM volatile("expd82 %0, %1" : "=r"(result) : "r"(a));
  17624. return result;
  17625. }
  17626. /* ===== Inline Function End for EXPD82 ===== */
  17627. /* ===== Inline Function Start for EXPD83 ===== */
  17628. /**
  17629. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17630. * \brief EXPD83 (Expand and Copy Byte 3 to 32bit(rv32) or 64bit(when rv64))
  17631. * \details
  17632. * **Type**: DSP
  17633. *
  17634. * **Syntax**:\n
  17635. * ~~~
  17636. * EXPD83 Rd, Rs1
  17637. * ~~~
  17638. *
  17639. * **Purpose**:\n
  17640. * Copy 8-bit data from 32-bit chunks into 4 bytes in a register.
  17641. *
  17642. * **Description**:\n
  17643. * Moves Rs1.B[3][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17644. *
  17645. * **Operations**:\n
  17646. * ~~~
  17647. * Rd.W[x][31:0] = CONCAT(Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0], Rs1.B[3][7:0]);
  17648. * for RV32: x=0
  17649. * ~~~
  17650. *
  17651. * \param [in] a unsigned long type of value stored in a
  17652. * \return value stored in unsigned long type
  17653. */
  17654. __STATIC_FORCEINLINE unsigned long __RV_EXPD83(unsigned long a)
  17655. {
  17656. unsigned long result;
  17657. __ASM volatile("expd83 %0, %1" : "=r"(result) : "r"(a));
  17658. return result;
  17659. }
  17660. /* ===== Inline Function End for EXPD83 ===== */
  17661. #if (__RISCV_XLEN == 64)
  17662. /* ===== Inline Function Start for EXPD84 ===== */
  17663. /**
  17664. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17665. * \brief EXPD84 (Expand and Copy Byte 4 to 64bit)
  17666. * \details
  17667. * **Type**: DSP
  17668. *
  17669. * **Syntax**:\n
  17670. * ~~~
  17671. * EXPD84 Rd, Rs1
  17672. * ~~~
  17673. *
  17674. * **Purpose**:\n
  17675. * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
  17676. *
  17677. * **Description**:\n
  17678. * Moves Rs1.B[4][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17679. *
  17680. * **Operations**:\n
  17681. * ~~~
  17682. * Rd.W[x][31:0] = CONCAT(Rs1.B[4][7:0], Rs1.B[4][7:0], Rs1.B[4][7:0], Rs1.B[4][7:0]);
  17683. * for RV32: x=0
  17684. * ~~~
  17685. *
  17686. * \param [in] a unsigned long type of value stored in a
  17687. * \return value stored in unsigned long type
  17688. */
  17689. __STATIC_FORCEINLINE unsigned long __RV_EXPD84(unsigned long a)
  17690. {
  17691. unsigned long result;
  17692. __ASM volatile("expd84 %0, %1" : "=r"(result) : "r"(a));
  17693. return result;
  17694. }
  17695. /* ===== Inline Function End for EXPD84 ===== */
  17696. /* ===== Inline Function Start for EXPD85 ===== */
  17697. /**
  17698. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17699. * \brief EXPD85 (Expand and Copy Byte 5 to 64bit)
  17700. * \details
  17701. * **Type**: DSP
  17702. *
  17703. * **Syntax**:\n
  17704. * ~~~
  17705. * EXPD85 Rd, Rs1
  17706. * ~~~
  17707. *
  17708. * **Purpose**:\n
  17709. * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
  17710. *
  17711. * **Description**:\n
  17712. * Moves Rs1.B[5][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17713. *
  17714. * **Operations**:\n
  17715. * ~~~
  17716. * Rd.W[x][31:0] = CONCAT(Rs1.B[5][7:0], Rs1.B[5][7:0], Rs1.B[5][7:0], Rs1.B[5][7:0]);
  17717. * for RV32: x=0
  17718. * ~~~
  17719. *
  17720. * \param [in] a unsigned long type of value stored in a
  17721. * \return value stored in unsigned long type
  17722. */
  17723. __STATIC_FORCEINLINE unsigned long __RV_EXPD85(unsigned long a)
  17724. {
  17725. unsigned long result;
  17726. __ASM volatile("expd85 %0, %1" : "=r"(result) : "r"(a));
  17727. return result;
  17728. }
  17729. /* ===== Inline Function End for EXPD85 ===== */
  17730. /* ===== Inline Function Start for EXPD86 ===== */
  17731. /**
  17732. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17733. * \brief EXPD86 (Expand and Copy Byte 6 to 64bit)
  17734. * \details
  17735. * **Type**: DSP
  17736. *
  17737. * **Syntax**:\n
  17738. * ~~~
  17739. * EXPD86 Rd, Rs1
  17740. * ~~~
  17741. *
  17742. * **Purpose**:\n
  17743. * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
  17744. *
  17745. * **Description**:\n
  17746. * Moves Rs1.B[6][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17747. *
  17748. * **Operations**:\n
  17749. * ~~~
  17750. * Rd.W[x][31:0] = CONCAT(Rs1.B[6][7:0], Rs1.B[6][7:0], Rs1.B[6][7:0], Rs1.B[6][7:0]);
  17751. * for RV32: x=0
  17752. * ~~~
  17753. *
  17754. * \param [in] a unsigned long type of value stored in a
  17755. * \return value stored in unsigned long type
  17756. */
  17757. __STATIC_FORCEINLINE unsigned long __RV_EXPD86(unsigned long a)
  17758. {
  17759. unsigned long result;
  17760. __ASM volatile("expd86 %0, %1" : "=r"(result) : "r"(a));
  17761. return result;
  17762. }
  17763. /* ===== Inline Function End for EXPD86 ===== */
  17764. /* ===== Inline Function Start for EXPD87 ===== */
  17765. /**
  17766. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_Default
  17767. * \brief EXPD87 (Expand and Copy Byte 7 to 64bit)
  17768. * \details
  17769. * **Type**: DSP
  17770. *
  17771. * **Syntax**:\n
  17772. * ~~~
  17773. * EXPD87 Rd, Rs1
  17774. * ~~~
  17775. *
  17776. * **Purpose**:\n
  17777. * Only RV64, copy 8-bit data from 64-bit chunks into 8 bytes in a register.
  17778. *
  17779. * **Description**:\n
  17780. * Moves Rs1.B[7][7:0] to Rd.[0][7:0], Rd.[1][7:0], Rd.[2][7:0], Rd.[3][7:0]
  17781. *
  17782. * **Operations**:\n
  17783. * ~~~
  17784. * Rd.W[x][31:0] = CONCAT(Rs1.B[7][7:0], Rs1.B[7][7:0], Rs1.B[7][7:0], Rs1.B[7][7:0]);
  17785. * for RV32: x=0
  17786. * ~~~
  17787. *
  17788. * \param [in] a unsigned long type of value stored in a
  17789. * \return value stored in unsigned long type
  17790. */
  17791. __STATIC_FORCEINLINE unsigned long __RV_EXPD87(unsigned long a)
  17792. {
  17793. unsigned long result;
  17794. __ASM volatile("expd87 %0, %1" : "=r"(result) : "r"(a));
  17795. return result;
  17796. }
  17797. /* ===== Inline Function End for EXPD87 ===== */
  17798. #endif /* __RISCV_XLEN == 64 */
  17799. #if (__RISCV_XLEN == 32) || defined(__ONLY_FOR_DOXYGEN_DOCUMENT_GENERATION__)
  17800. /* XXXXX Nuclei Extended DSP Instructions for RV32 XXXXX */
  17801. /**
  17802. * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1 Nuclei N1 SIMD DSP Additional Instructions
  17803. * \ingroup NMSIS_Core_DSP_Intrinsic
  17804. * \brief (RV32 only)Nuclei Customized N1 DSP Instructions
  17805. * \details This is Nuclei customized DSP N1 instructions only for RV32
  17806. */
  17807. /**
  17808. * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2 Nuclei N2 SIMD DSP Additional Instructions
  17809. * \ingroup NMSIS_Core_DSP_Intrinsic
  17810. * \brief (RV32 only)Nuclei Customized N2 DSP Instructions
  17811. * \details This is Nuclei customized DSP N2 instructions only for RV32
  17812. */
  17813. /**
  17814. * \defgroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3 Nuclei N3 SIMD DSP Additional Instructions
  17815. * \ingroup NMSIS_Core_DSP_Intrinsic
  17816. * \brief (RV32 only)Nuclei Customized N3 DSP Instructions
  17817. * \details This is Nuclei customized DSP N3 instructions only for RV32
  17818. */
  17819. /* ===== Inline Function Start for DKHM8 ===== */
  17820. /**
  17821. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  17822. * \brief DKHM8 (64-bit SIMD Signed Saturating Q7 Multiply)
  17823. * \details
  17824. * **Type**: SIMD
  17825. *
  17826. * **Syntax**:\n
  17827. * ~~~
  17828. * DKHM8 Rd, Rs1, Rs2
  17829. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  17830. * ~~~
  17831. *
  17832. * **Purpose**:\n
  17833. * Do Q7xQ7 element multiplications simultaneously. The Q14 results are then reduced to Q7
  17834. * numbers again.
  17835. *
  17836. * **Description**:\n
  17837. * For the `DKHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1
  17838. * with the top 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
  17839. * content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2.
  17840. *
  17841. * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
  17842. * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
  17843. * The result will be saturated to 0x7F and the overflow flag OV will be set.
  17844. *
  17845. * **Operations**:\n
  17846. * ~~~
  17847. * op1t = Rs1.B[x+1]; op2t = Rs2.B[x+1]; // top
  17848. * op1b = Rs1.B[x]; op2b = Rs2.B[x]; // bottom
  17849. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  17850. * if (0x80 != aop | 0x80 != bop) {
  17851. * res = (aop s* bop) >> 7;
  17852. * } else {
  17853. * res= 0x7F;
  17854. * OV = 1;
  17855. * }
  17856. * }
  17857. * Rd.H[x/2] = concat(rest, resb);
  17858. * for RV32, x=0,2,4,6
  17859. * ~~~
  17860. *
  17861. * \param [in] a unsigned long long type of value stored in a
  17862. * \param [in] b unsigned long long type of value stored in b
  17863. * \return value stored in unsigned long long type
  17864. */
  17865. __STATIC_FORCEINLINE unsigned long long __RV_DKHM8(unsigned long long a, unsigned long long b)
  17866. {
  17867. unsigned long long result;
  17868. __ASM volatile("dkhm8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17869. return result;
  17870. }
  17871. /* ===== Inline Function End for DKHM8 ===== */
  17872. /* ===== Inline Function Start for DKHM16 ===== */
  17873. /**
  17874. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  17875. * \brief DKHM16 (64-bit SIMD Signed Saturating Q15 Multiply)
  17876. * \details
  17877. * **Type**: SIMD
  17878. *
  17879. * **Syntax**:\n
  17880. * ~~~
  17881. * DKHM16 Rd, Rs1, Rs2
  17882. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  17883. * ~~~
  17884. *
  17885. * **Purpose**:\n
  17886. * Do Q15xQ15 element multiplications simultaneously. The Q30 results are then reduced to
  17887. * Q15 numbers again.
  17888. *
  17889. * **Description**:\n
  17890. * For the `DKHM16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in
  17891. * Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom
  17892. * 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in
  17893. * Rs2.
  17894. *
  17895. * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
  17896. * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
  17897. * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
  17898. *
  17899. * **Operations**:\n
  17900. * ~~~
  17901. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x+1]; // top
  17902. * op1b = Rs1.H[x]; op2b = Rs2.H[x]; // bottom
  17903. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  17904. * if (0x8000 != aop | 0x8000 != bop) {
  17905. * res = (aop s* bop) >> 15;
  17906. * } else {
  17907. * res= 0x7FFF;
  17908. * OV = 1;
  17909. * }
  17910. * }
  17911. * Rd.W[x/2] = concat(rest, resb);
  17912. * for RV32: x=0, 2
  17913. * ~~~
  17914. *
  17915. * \param [in] a unsigned long long type of value stored in a
  17916. * \param [in] b unsigned long long type of value stored in b
  17917. * \return value stored in unsigned long long type
  17918. */
  17919. __STATIC_FORCEINLINE unsigned long long __RV_DKHM16(unsigned long long a, unsigned long long b)
  17920. {
  17921. unsigned long long result;
  17922. __ASM volatile("dkhm16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  17923. return result;
  17924. }
  17925. /* ===== Inline Function End for DKHM16 ===== */
  17926. /* ===== Inline Function Start for DKABS8 ===== */
  17927. /**
  17928. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  17929. * \brief DKABS8 (64-bit SIMD 8-bit Saturating Absolute)
  17930. * \details
  17931. * **Type**: SIMD
  17932. *
  17933. * **Syntax**:\n
  17934. * ~~~
  17935. * DKABS8 Rd, Rs1
  17936. * # Rd, Rs1 are all even/odd pair of registers
  17937. * ~~~
  17938. *
  17939. * **Purpose**:\n
  17940. * Get the absolute value of 8-bit signed integer elements simultaneously.
  17941. *
  17942. * **Description**:\n
  17943. * This instruction calculates the absolute value of 8-bit signed integer elements stored
  17944. * in Rs1 and writes the element results to Rd. If the input number is 0x80, this instruction generates
  17945. * 0x7f as the output and sets the OV bit to 1.
  17946. *
  17947. * **Operations**:\n
  17948. * ~~~
  17949. * src = Rs1.B[x];
  17950. * if (src == 0x80) {
  17951. * src = 0x7f;
  17952. * OV = 1;
  17953. * } else if (src[7] == 1)
  17954. * src = -src;
  17955. * }
  17956. * Rd.B[x] = src;
  17957. * for RV32: x=7...0,
  17958. * ~~~
  17959. *
  17960. * \param [in] a unsigned long long type of value stored in a
  17961. * \return value stored in unsigned long long type
  17962. */
  17963. __STATIC_FORCEINLINE unsigned long long __RV_DKABS8(unsigned long long a)
  17964. {
  17965. unsigned long long result;
  17966. __ASM volatile("dkabs8 %0, %1" : "=r"(result) : "r"(a));
  17967. return result;
  17968. }
  17969. /* ===== Inline Function End for DKABS8 ===== */
  17970. /* ===== Inline Function Start for DKABS16 ===== */
  17971. /**
  17972. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  17973. * \brief DKABS16 (64-bit SIMD 16-bit Saturating Absolute)
  17974. * \details
  17975. * **Type**: SIMD
  17976. *
  17977. * **Syntax**:\n
  17978. * ~~~
  17979. * DKABS16 Rd, Rs1
  17980. * # Rd, Rs1 are all even/odd pair of registers
  17981. * ~~~
  17982. *
  17983. * **Purpose**:\n
  17984. * Get the absolute value of 16-bit signed integer elements simultaneously.
  17985. *
  17986. * **Description**:\n
  17987. * This instruction calculates the absolute value of 16-bit signed integer elements stored
  17988. * in Rs1 and writes the element results to Rd. If the input number is 0x8000, this instruction
  17989. * generates 0x7fff as the output and sets the OV bit to 1.
  17990. *
  17991. * **Operations**:\n
  17992. * ~~~
  17993. * src = Rs1.H[x];
  17994. * if (src == 0x8000) {
  17995. * src = 0x7fff;
  17996. * OV = 1;
  17997. * } else if (src[15] == 1)
  17998. * src = -src;
  17999. * }
  18000. * Rd.H[x] = src;
  18001. * for RV32: x=3...0,
  18002. * ~~~
  18003. *
  18004. * \param [in] a unsigned long long type of value stored in a
  18005. * \return value stored in unsigned long long type
  18006. */
  18007. __STATIC_FORCEINLINE unsigned long long __RV_DKABS16(unsigned long long a)
  18008. {
  18009. unsigned long long result;
  18010. __ASM volatile("dkabs16 %0, %1" : "=r"(result) : "r"(a));
  18011. return result;
  18012. }
  18013. /* ===== Inline Function End for DKABS16 ===== */
  18014. /* ===== Inline Function Start for DKSLRA8 ===== */
  18015. /**
  18016. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  18017. * \brief DKSLRA8 (64-bit SIMD 8-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
  18018. * \details
  18019. * **Type**: SIMD
  18020. *
  18021. * **Syntax**:\n
  18022. * ~~~
  18023. * DKSLRA8 Rd, Rs1, Rs2
  18024. * # Rd, Rs1 are all even/odd pair of registers
  18025. * ~~~
  18026. *
  18027. * **Purpose**:\n
  18028. * Do 8-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  18029. * Q7 saturation for the left shift.
  18030. *
  18031. * **Description**:\n
  18032. * The 8-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  18033. * based on the value of Rs2[3:0]. Rs2[3:0] is in the signed range of [-2^3, 2^3-1]. A positive Rs2[3:0] means
  18034. * logical left shift and a negative Rs2[3:0] means arithmetic right shift. The shift amount is the
  18035. * absolute value of Rs2[3:0]. However, the behavior of `Rs2[3:0]==-2^3 (0x8)` is defined to be
  18036. * equivalent to the behavior of `Rs2[3:0]==-(2^3-1) (0x9)`.
  18037. * The left-shifted results are saturated to the 8-bit signed integer range of [-2^7, 2^7-1].
  18038. * If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:4] will not affect
  18039. * this instruction.
  18040. *
  18041. * **Operations**:\n
  18042. * ~~~
  18043. * if (Rs2[3:0] < 0) {
  18044. * sa = -Rs2[3:0];
  18045. * sa = (sa == 8)? 7 : sa;
  18046. * Rd.B[x] = SE8(Rs1.B[x][7:sa]);
  18047. * } else {
  18048. * sa = Rs2[2:0];
  18049. * res[(7+sa):0] = Rs1.B[x] <<(logic) sa;
  18050. * if (res > (2^7)-1) {
  18051. * res[7:0] = 0x7f; OV = 1;
  18052. * } else if (res < -2^7) {
  18053. * res[7:0] = 0x80; OV = 1;
  18054. * }
  18055. * Rd.B[x] = res[7:0];
  18056. * }
  18057. * for RV32: x=7...0,
  18058. * ~~~
  18059. *
  18060. * \param [in] a unsigned long long type of value stored in a
  18061. * \param [in] b int type of value stored in b
  18062. * \return value stored in unsigned long long type
  18063. */
  18064. __STATIC_FORCEINLINE unsigned long long __RV_DKSLRA8(unsigned long long a, int b)
  18065. {
  18066. unsigned long long result;
  18067. __ASM volatile("dkslra8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18068. return result;
  18069. }
  18070. /* ===== Inline Function End for DKSLRA8 ===== */
  18071. /* ===== Inline Function Start for DKSLRA16 ===== */
  18072. /**
  18073. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  18074. * \brief DKSLRA16 (64-bit SIMD 16-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
  18075. * \details
  18076. * **Type**: SIMD
  18077. *
  18078. * **Syntax**:\n
  18079. * ~~~
  18080. * DKSLRA16 Rd, Rs1, Rs2
  18081. * # Rd, Rs1 are all even/odd pair of registers
  18082. * ~~~
  18083. *
  18084. * **Purpose**:\n
  18085. * Do 16-bit elements logical left (positive) or arithmetic right (negative) shift operation with
  18086. * Q15 saturation for the left shift.
  18087. *
  18088. * **Description**:\n
  18089. * The 16-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically
  18090. * based on the value of Rs2[4:0]. Rs2[4:0] is in the signed range of [-2^4, 2^4-1]. A positive Rs2[4:0] means
  18091. * logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the
  18092. * absolute value of Rs2[4:0]. However, the behavior of `Rs2[4:0]==-2^4 (0x10)` is defined to be
  18093. * equivalent to the behavior of `Rs2[4:0]==-(2^4-1) (0x11)`.
  18094. * The left-shifted results are saturated to the 16-bit signed integer range of [-2^15, 2^15-1].
  18095. * After the shift, saturation, or rounding, the final results are written to
  18096. * Rd. If any saturation happens, this instruction sets the OV flag. The value of Rs2[31:5] will not affect
  18097. * this instruction.
  18098. *
  18099. * **Operations**:\n
  18100. * ~~~
  18101. * if (Rs2[4:0] < 0) {
  18102. * sa = -Rs2[4:0];
  18103. * sa = (sa == 16)? 15 : sa;
  18104. * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
  18105. * } else {
  18106. * sa = Rs2[3:0];
  18107. * res[(15+sa):0] = Rs1.H[x] <<(logic) sa;
  18108. * if (res > (2^15)-1) {
  18109. * res[15:0] = 0x7fff; OV = 1;
  18110. * } else if (res < -2^15) {
  18111. * res[15:0] = 0x8000; OV = 1;
  18112. * }
  18113. * d.H[x] = res[15:0];
  18114. * }
  18115. * for RV32: x=3...0,
  18116. * ~~~
  18117. *
  18118. * \param [in] a unsigned long long type of value stored in a
  18119. * \param [in] b int type of value stored in b
  18120. * \return value stored in unsigned long long type
  18121. */
  18122. __STATIC_FORCEINLINE unsigned long long __RV_DKSLRA16(unsigned long long a, int b)
  18123. {
  18124. unsigned long long result;
  18125. __ASM volatile("dkslra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18126. return result;
  18127. }
  18128. /* ===== Inline Function End for DKSLRA16 ===== */
  18129. /* ===== Inline Function Start for DKADD8 ===== */
  18130. /**
  18131. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  18132. * \brief DKADD8 (64-bit SIMD 8-bit Signed Saturating Addition)
  18133. * \details
  18134. * **Type**: SIMD
  18135. *
  18136. * **Syntax**:\n
  18137. * ~~~
  18138. * DKADD8 Rd, Rs1, Rs2
  18139. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18140. * ~~~
  18141. *
  18142. * **Purpose**:\n
  18143. * Do 8-bit signed integer element saturating additions simultaneously.
  18144. *
  18145. * **Description**:\n
  18146. * This instruction adds the 8-bit signed integer elements in Rs1 with the 8-bit signed
  18147. * integer elements in Rs2. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1), they
  18148. * are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
  18149. *
  18150. * **Operations**:\n
  18151. * ~~~
  18152. * res[x] = Rs1.B[x] + Rs2.B[x];
  18153. * if (res[x] > 127) {
  18154. * res[x] = 127;
  18155. * OV = 1;
  18156. * } else if (res[x] < -128) {
  18157. * res[x] = -128;
  18158. * OV = 1;
  18159. * }
  18160. * Rd.B[x] = res[x];
  18161. * for RV32: x=7...0,
  18162. * ~~~
  18163. *
  18164. * \param [in] a unsigned long long type of value stored in a
  18165. * \param [in] b unsigned long long type of value stored in b
  18166. * \return value stored in unsigned long long type
  18167. */
  18168. __STATIC_FORCEINLINE unsigned long long __RV_DKADD8(unsigned long long a, unsigned long long b)
  18169. {
  18170. unsigned long long result;
  18171. __ASM volatile("dkadd8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18172. return result;
  18173. }
  18174. /* ===== Inline Function End for DKADD8 ===== */
  18175. /* ===== Inline Function Start for DKADD16 ===== */
  18176. /**
  18177. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  18178. * \brief DKADD16 (64-bit SIMD 16-bit Signed Saturating Addition)
  18179. * \details
  18180. * **Type**: SIMD
  18181. *
  18182. * **Syntax**:\n
  18183. * ~~~
  18184. * DKADD16 Rd, Rs1, Rs2
  18185. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18186. * ~~~
  18187. *
  18188. * **Purpose**:\n
  18189. * Do 16-bit signed integer element saturating additions simultaneously.
  18190. *
  18191. * **Description**:\n
  18192. * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed
  18193. * integer elements in Rs2. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1),
  18194. * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
  18195. *
  18196. * **Operations**:\n
  18197. * ~~~
  18198. * res[x] = Rs1.H[x] + Rs2.H[x];
  18199. * if (res[x] > 32767) {
  18200. * res[x] = 32767;
  18201. * OV = 1;
  18202. * } else if (res[x] < -32768) {
  18203. * res[x] = -32768;
  18204. * OV = 1;
  18205. * }
  18206. * Rd.H[x] = res[x];
  18207. * for RV32: x=3...0,
  18208. * ~~~
  18209. *
  18210. * \param [in] a unsigned long long type of value stored in a
  18211. * \param [in] b unsigned long long type of value stored in b
  18212. * \return value stored in unsigned long long type
  18213. */
  18214. __STATIC_FORCEINLINE unsigned long long __RV_DKADD16(unsigned long long a, unsigned long long b)
  18215. {
  18216. unsigned long long result;
  18217. __ASM volatile("dkadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18218. return result;
  18219. }
  18220. /* ===== Inline Function End for DKADD16 ===== */
  18221. /* ===== Inline Function Start for DKSUB8 ===== */
  18222. /**
  18223. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  18224. * \brief DKSUB8 (64-bit SIMD 8-bit Signed Saturating Subtraction)
  18225. * \details
  18226. * **Type**: SIMD
  18227. *
  18228. * **Syntax**:\n
  18229. * ~~~
  18230. * DKSUB8 Rd, Rs1, Rs2
  18231. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18232. * ~~~
  18233. *
  18234. * **Purpose**:\n
  18235. * Do 8-bit signed elements saturating subtractions simultaneously.
  18236. *
  18237. * **Description**:\n
  18238. * This instruction subtracts the 8-bit signed integer elements in Rs2 from the 8-bit
  18239. * signed integer elements in Rs1. If any of the results are beyond the Q7 number range (-2^7 <= Q7 <= 2^7-1),
  18240. * they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.
  18241. *
  18242. * **Operations**:\n
  18243. * ~~~
  18244. * res[x] = Rs1.B[x] - Rs2.B[x];
  18245. * if (res[x] > (2^7)-1) {
  18246. * res[x] = (2^7)-1;
  18247. * OV = 1;
  18248. * } else if (res[x] < -2^7) {
  18249. * res[x] = -2^7;
  18250. * OV = 1;
  18251. * }
  18252. * Rd.B[x] = res[x];
  18253. * for RV32: x=7...0,
  18254. * ~~~
  18255. *
  18256. * \param [in] a unsigned long long type of value stored in a
  18257. * \param [in] b unsigned long long type of value stored in b
  18258. * \return value stored in unsigned long long type
  18259. */
  18260. __STATIC_FORCEINLINE unsigned long long __RV_DKSUB8(unsigned long long a, unsigned long long b)
  18261. {
  18262. unsigned long long result;
  18263. __ASM volatile("dksub8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18264. return result;
  18265. }
  18266. /* ===== Inline Function End for DKSUB8 ===== */
  18267. /* ===== Inline Function Start for DKSUB16 ===== */
  18268. /**
  18269. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N1
  18270. * \brief DKSUB16 (64-bit SIMD 16-bit Signed Saturating Subtraction)
  18271. * \details
  18272. * **Type**: SIMD
  18273. *
  18274. * **Syntax**:\n
  18275. * ~~~
  18276. * DKSUB16 Rd, Rs1, Rs2
  18277. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18278. * ~~~
  18279. *
  18280. * **Purpose**:\n
  18281. * Do 16-bit signed integer elements saturating subtractions simultaneously.
  18282. *
  18283. * **Description**:\n
  18284. * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit
  18285. * signed integer elements in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <=
  18286. * 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to
  18287. * Rd.
  18288. *
  18289. * **Operations**:\n
  18290. * ~~~
  18291. * res[x] = Rs1.H[x] - Rs2.H[x];
  18292. * if (res[x] > (2^15)-1) {
  18293. * res[x] = (2^15)-1;
  18294. * OV = 1;
  18295. * } else if (res[x] < -2^15) {
  18296. * res[x] = -2^15;
  18297. * OV = 1;
  18298. * }
  18299. * Rd.H[x] = res[x];
  18300. * for RV32: x=3...0,
  18301. * ~~~
  18302. *
  18303. * \param [in] a unsigned long long type of value stored in a
  18304. * \param [in] b unsigned long long type of value stored in b
  18305. * \return value stored in unsigned long long type
  18306. */
  18307. __STATIC_FORCEINLINE unsigned long long __RV_DKSUB16(unsigned long long a, unsigned long long b)
  18308. {
  18309. unsigned long long result;
  18310. __ASM volatile("dksub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18311. return result;
  18312. }
  18313. /* ===== Inline Function End for DKSUB16 ===== */
  18314. /* ===== Inline Function Start for DKHMX8 ===== */
  18315. /**
  18316. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18317. * \brief DKHMX8 (64-bit SIMD Signed Crossed Saturating Q7 Multiply)
  18318. * \details
  18319. * **Type**: SIMD
  18320. *
  18321. * **Syntax**:\n
  18322. * ~~~
  18323. * DKHMX8 Rd, Rs1, Rs2
  18324. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18325. * ~~~
  18326. *
  18327. * **Purpose**:\n
  18328. * Do Q7xQ7 element crossed multiplications simultaneously. The Q15 results are then reduced to Q7 numbers again.
  18329. *
  18330. * **Description**:\n
  18331. * For the `KHM8` instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the
  18332. * bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7
  18333. * content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.
  18334. *
  18335. * The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then
  18336. * written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen.
  18337. * The result will be saturated to 0x7F and the overflow flag OV will be set.
  18338. *
  18339. * **Operations**:\n
  18340. * ~~~
  18341. * op1t = Rs1.B[x+1]; op2t = Rs2.B[x]; // top
  18342. * op1b = Rs1.B[x]; op2b = Rs2.B[x+1]; // bottom
  18343. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  18344. * if (0x80 != aop | 0x80 != bop) {
  18345. * res = (aop s* bop) >> 7;
  18346. * } else {
  18347. * res= 0x7F;
  18348. * OV = 1;
  18349. * }
  18350. * }
  18351. * Rd.H[x/2] = concat(rest, resb);
  18352. * for RV32, x=0,2,4,6
  18353. * ~~~
  18354. *
  18355. * \param [in] a unsigned long long type of value stored in a
  18356. * \param [in] b unsigned long long type of value stored in b
  18357. * \return value stored in unsigned long long type
  18358. */
  18359. __STATIC_FORCEINLINE unsigned long long __RV_DKHMX8(unsigned long long a, unsigned long long b)
  18360. {
  18361. unsigned long long result;
  18362. __ASM volatile("dkhmx8 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18363. return result;
  18364. }
  18365. /* ===== Inline Function End for DKHMX8 ===== */
  18366. /* ===== Inline Function Start for DKHMX16 ===== */
  18367. /**
  18368. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18369. * \brief DKHMX16 (64-bit SIMD Signed Crossed Saturating Q15 Multiply)
  18370. * \details
  18371. * **Type**: SIMD
  18372. *
  18373. * **Syntax**:\n
  18374. * ~~~
  18375. * DKHMX16 Rd, Rs1, Rs2
  18376. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18377. * ~~~
  18378. *
  18379. * **Purpose**:\n
  18380. * Do Q15xQ15 element crossed multiplications simultaneously. The Q31 results are then reduced to Q15 numbers again.
  18381. *
  18382. * **Description**:\n
  18383. * For the `KHMX16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the
  18384. * bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15
  18385. * content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.
  18386. *
  18387. * The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are
  18388. * then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will
  18389. * happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.
  18390. *
  18391. * **Operations**:\n
  18392. * ~~~
  18393. * op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // top
  18394. * op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // bottom
  18395. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  18396. * if (0x8000 != aop | 0x8000 != bop) {
  18397. * res = (aop s* bop) >> 15;
  18398. * } else {
  18399. * res= 0x7FFF;
  18400. * OV = 1;
  18401. * }
  18402. * }
  18403. * Rd.W[x/2] = concat(rest, resb);
  18404. * for RV32, x=0,2
  18405. * ~~~
  18406. *
  18407. * \param [in] a unsigned long long type of value stored in a
  18408. * \param [in] b unsigned long long type of value stored in b
  18409. * \return value stored in unsigned long long type
  18410. */
  18411. __STATIC_FORCEINLINE unsigned long long __RV_DKHMX16(unsigned long long a, unsigned long long b)
  18412. {
  18413. unsigned long long result;
  18414. __ASM volatile("dkhmx16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18415. return result;
  18416. }
  18417. /* ===== Inline Function End for DKHMX16 ===== */
  18418. /* ===== Inline Function Start for DSMMUL ===== */
  18419. /**
  18420. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18421. * \brief DSMMUL (64-bit MSW 32x32 Signed Multiply)
  18422. * \details
  18423. * **Type**: SIMD
  18424. *
  18425. * **Syntax**:\n
  18426. * ~~~
  18427. * DSMMUL Rd, Rs1, Rs2
  18428. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18429. * ~~~
  18430. *
  18431. * **Purpose**:\n
  18432. * Do MSW 32x32 element signed multiplications simultaneously. The results are written into Rd.
  18433. *
  18434. * **Description**:\n
  18435. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
  18436. * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
  18437. * elements of Rs1 and Rs2 are treated as signed integers. The .u form of the instruction rounds up
  18438. * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
  18439. *
  18440. * **Operations**:\n
  18441. * ~~~
  18442. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  18443. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  18444. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  18445. * res = (aop s* bop)[63:32];
  18446. * }
  18447. * Rd = concat(rest, resb);
  18448. * x=0
  18449. * ~~~
  18450. *
  18451. * \param [in] a unsigned long long type of value stored in a
  18452. * \param [in] b unsigned long long type of value stored in b
  18453. * \return value stored in unsigned long long type
  18454. */
  18455. __STATIC_FORCEINLINE unsigned long long __RV_DSMMUL(unsigned long long a, unsigned long long b)
  18456. {
  18457. unsigned long long result;
  18458. __ASM volatile("dsmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18459. return result;
  18460. }
  18461. /* ===== Inline Function End for DSMMUL ===== */
  18462. /* ===== Inline Function Start for DSMMUL.u ===== */
  18463. /**
  18464. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18465. * \brief DSMMUL.u (64-bit MSW 32x32 Unsigned Multiply)
  18466. * \details
  18467. * **Type**: SIMD
  18468. *
  18469. * **Syntax**:\n
  18470. * ~~~
  18471. * DSMMUL.u Rd, Rs1, Rs2
  18472. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18473. * ~~~
  18474. *
  18475. * **Purpose**:\n
  18476. * Do MSW 32x32 element unsigned multiplications simultaneously. The results are written into Rd.
  18477. *
  18478. * **Description**:\n
  18479. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the
  18480. * most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit
  18481. * elements of Rs1 and Rs2 are treated as unsigned integers. The .u form of the instruction rounds up
  18482. * the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.
  18483. *
  18484. * **Operations**:\n
  18485. * ~~~
  18486. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  18487. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  18488. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  18489. * res = RUND(aop u* bop)[63:32];
  18490. * }
  18491. * Rd = concat(rest, resb);
  18492. * x=0
  18493. * ~~~
  18494. *
  18495. * \param [in] a unsigned long long type of value stored in a
  18496. * \param [in] b unsigned long long type of value stored in b
  18497. * \return value stored in unsigned long long type
  18498. */
  18499. __STATIC_FORCEINLINE unsigned long long __RV_DSMMUL_U(unsigned long long a, unsigned long long b)
  18500. {
  18501. unsigned long long result;
  18502. __ASM volatile("dsmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18503. return result;
  18504. }
  18505. /* ===== Inline Function End for DSMMUL.u ===== */
  18506. /* ===== Inline Function Start for DKWMMUL ===== */
  18507. /**
  18508. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18509. * \brief DKWMMUL (64-bit MSW 32x32 Signed Multiply & Double)
  18510. * \details
  18511. * **Type**: SIMD
  18512. *
  18513. * **Syntax**:\n
  18514. * ~~~
  18515. * DKWMMUL Rd, Rs1, Rs2
  18516. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18517. * ~~~
  18518. *
  18519. * **Purpose**:\n
  18520. * Do MSW 32x32 element signed multiplications simultaneously and double. The results are written into Rd.
  18521. *
  18522. * **Description**:\n
  18523. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
  18524. * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
  18525. * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
  18526. * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The .u
  18527. * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
  18528. * 30 before the shift and saturation operations.
  18529. *
  18530. * **Operations**:\n
  18531. * ~~~
  18532. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  18533. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  18534. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  18535. * res = sat.q31((aop s* bop) << 1)[63:32];
  18536. * }
  18537. * Rd = concat(rest, resb);
  18538. * x=0
  18539. * ~~~
  18540. *
  18541. * \param [in] a unsigned long long type of value stored in a
  18542. * \param [in] b unsigned long long type of value stored in b
  18543. * \return value stored in unsigned long long type
  18544. */
  18545. __STATIC_FORCEINLINE unsigned long long __RV_DKWMMUL(unsigned long long a, unsigned long long b)
  18546. {
  18547. unsigned long long result;
  18548. __ASM volatile("dkwmmul %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18549. return result;
  18550. }
  18551. /* ===== Inline Function End for DKWMMUL ===== */
  18552. /* ===== Inline Function Start for DKWMMUL.u ===== */
  18553. /**
  18554. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18555. * \brief DKWMMUL.u (64-bit MSW 32x32 Unsigned Multiply & Double)
  18556. * \details
  18557. * **Type**: SIMD
  18558. *
  18559. * **Syntax**:\n
  18560. * ~~~
  18561. * DKWMMUL.u Rd, Rs1, Rs2
  18562. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18563. * ~~~
  18564. *
  18565. * **Purpose**:\n
  18566. * Do MSW 32x32 element unsigned multiplications simultaneously and double. The results are written into Rd.
  18567. *
  18568. * **Description**:\n
  18569. * This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts
  18570. * the multiplication results one bit to the left and takes the most significant 32-bit results. If the
  18571. * shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element
  18572. * result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The .u
  18573. * form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit
  18574. * 30 before the shift and saturation operations.
  18575. *
  18576. * **Operations**:\n
  18577. * ~~~
  18578. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  18579. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  18580. * for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  18581. * res = sat.q31(RUND(aop u* bop) << 1)[63:32];
  18582. * }
  18583. * Rd = concat(rest, resb);
  18584. * x=0
  18585. * ~~~
  18586. *
  18587. * \param [in] a unsigned long long type of value stored in a
  18588. * \param [in] b unsigned long long type of value stored in b
  18589. * \return value stored in unsigned long long type
  18590. */
  18591. __STATIC_FORCEINLINE unsigned long long __RV_DKWMMUL_U(unsigned long long a, unsigned long long b)
  18592. {
  18593. unsigned long long result;
  18594. __ASM volatile("dkwmmul.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18595. return result;
  18596. }
  18597. /* ===== Inline Function End for DKWMMUL.u ===== */
  18598. /* ===== Inline Function Start for DKABS32 ===== */
  18599. /**
  18600. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18601. * \brief DKABS32 (64-bit SIMD 32-bit Saturating Absolute)
  18602. * \details
  18603. * **Type**: SIMD
  18604. *
  18605. * **Syntax**:\n
  18606. * ~~~
  18607. * DKABS32 Rd, Rs1
  18608. * # Rd, Rs1 are all even/odd pair of registers
  18609. * ~~~
  18610. *
  18611. * **Purpose**:\n
  18612. * Get the absolute value of 32-bit signed integer elements simultaneously.
  18613. *
  18614. * **Description**:\n
  18615. * This instruction calculates the absolute value of 32-bit signed integer elements stored in Rs1 and writes the element
  18616. * results to Rd. If the input number is 0x8000_0000, this instruction generates 0x7fff_ffff as the output and sets the OV
  18617. * bit to 1.
  18618. *
  18619. * **Operations**:\n
  18620. * ~~~
  18621. * src = Rs1.W[x];
  18622. * if (src == 0x8000_0000) {
  18623. * src = 0x7fff_ffff;
  18624. * OV = 1;
  18625. * } else if (src[31] == 1)
  18626. * src = -src;
  18627. * }
  18628. * Rd.W[x] = src;
  18629. * x=1...0
  18630. * ~~~
  18631. *
  18632. * \param [in] a unsigned long long type of value stored in a
  18633. * \return value stored in unsigned long long type
  18634. */
  18635. __STATIC_FORCEINLINE unsigned long long __RV_DKABS32(unsigned long long a)
  18636. {
  18637. unsigned long long result;
  18638. __ASM volatile("dkabs32 %0, %1" : "=r"(result) : "r"(a));
  18639. return result;
  18640. }
  18641. /* ===== Inline Function End for DKABS32 ===== */
  18642. /* ===== Inline Function Start for DKSLRA32 ===== */
  18643. /**
  18644. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18645. * \brief DKSLRA32 (64-bit SIMD 32-bit Shift Left Logical with Saturation or Shift Right Arithmetic)
  18646. * \details
  18647. * **Type**: SIMD
  18648. *
  18649. * **Syntax**:\n
  18650. * ~~~
  18651. * DKSLRA32 Rd, Rs1, Rs2
  18652. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18653. * ~~~
  18654. *
  18655. * **Purpose**:\n
  18656. * Do 31-bit elements logical left (positive) or arithmetic right (negative) shift operation with Q31 saturation for the left shift.
  18657. *
  18658. * **Description**:\n
  18659. * The 31-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically based on the value of Rs2[5:0].
  18660. * Rs2[5:0] is in the signed range of [-2^5, 2^5-1]. A positive Rs2[5:0] means logical left shift and a negative Rs2[4:0]
  18661. * means arithmetic right shift. The shift amount is the absolute value of Rs2[5:0]. However, the behavior of Rs2[5:0]==-
  18662. * 2^5 (0x20) is defined to be equivalent to the behavior of Rs2[5:0]==-(2^5-1) (0x21).
  18663. *
  18664. * **Operations**:\n
  18665. * ~~~
  18666. * if (Rs2[5:0] < 0) {
  18667. * sa = -Rs2[5:0];
  18668. * sa = (sa == 32)? 31 : sa;
  18669. * Rd.W[x] = SE32(Rs1.W[x][31:sa]);
  18670. * } else {
  18671. * sa = Rs2[4:0];
  18672. * res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
  18673. * if (res > (2^31)-1) {
  18674. * res[31:0] = 0x7fff_ffff; OV = 1;
  18675. * } else if (res < -2^31) {
  18676. * res[31:0] = 0x8000_0000; OV = 1;
  18677. * }
  18678. * Rd.W[x] = res[31:0];
  18679. * }
  18680. * x=1...0
  18681. * ~~~
  18682. *
  18683. * \param [in] a unsigned long long type of value stored in a
  18684. * \param [in] b int type of value stored in b
  18685. * \return value stored in unsigned long long type
  18686. */
  18687. __STATIC_FORCEINLINE unsigned long long __RV_DKSLRA32(unsigned long long a, int b)
  18688. {
  18689. unsigned long long result;
  18690. __ASM volatile("dkslra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18691. return result;
  18692. }
  18693. /* ===== Inline Function End for DKSLRA32 ===== */
  18694. /* ===== Inline Function Start for DKADD32 ===== */
  18695. /**
  18696. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18697. * \brief DKADD32(64-bit SIMD 32-bit Signed Saturating Addition)
  18698. * \details
  18699. * **Type**: SIMD
  18700. *
  18701. * **Syntax**:\n
  18702. * ~~~
  18703. * DKADD32 Rd, Rs1, Rs2
  18704. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18705. * ~~~
  18706. *
  18707. * **Purpose**:\n
  18708. * Do 32-bit signed integer element saturating additions simultaneously.
  18709. *
  18710. * **Description**:\n
  18711. * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed integer elements in Rs2. If any
  18712. * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV
  18713. * bit is set to 1. The saturated results are written to Rd.
  18714. *
  18715. * **Operations**:\n
  18716. * ~~~
  18717. * res[x] = Rs1.W[x] + Rs2.W[x];
  18718. * if (res[x] > 0x7fff_ffff) {
  18719. * res[x] = 0x7fff_ffff;
  18720. * OV = 1;
  18721. * } else if (res[x] < 0x8000_0000) {
  18722. * res[x] = 0x8000_0000;
  18723. * OV = 1;
  18724. * }
  18725. * Rd.W[x] = res[x];
  18726. * x=1...0
  18727. * ~~~
  18728. *
  18729. * \param [in] a unsigned long long type of value stored in a
  18730. * \param [in] b unsigned long long type of value stored in b
  18731. * \return value stored in unsigned long long type
  18732. */
  18733. __STATIC_FORCEINLINE unsigned long long __RV_DKADD32(unsigned long long a, unsigned long long b)
  18734. {
  18735. unsigned long long result;
  18736. __ASM volatile("dkadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18737. return result;
  18738. }
  18739. /* ===== Inline Function End for DKADD32 ===== */
  18740. /* ===== Inline Function Start for DKSUB32 ===== */
  18741. /**
  18742. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18743. * \brief DKSUB32 (64-bit SIMD 32-bit Signed Saturating Subtraction)
  18744. * \details
  18745. * **Type**: SIMD
  18746. *
  18747. * **Syntax**:\n
  18748. * ~~~
  18749. * DKSUB32 Rd, Rs1, Rs2
  18750. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18751. * ~~~
  18752. *
  18753. * **Purpose**:\n
  18754. * Do 32-bit signed integer element saturating subtractions simultaneously.
  18755. *
  18756. * **Description**:\n
  18757. * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1. If
  18758. * any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the
  18759. * OV bit is set to 1. The saturated results are written to Rd.
  18760. *
  18761. * **Operations**:\n
  18762. * ~~~
  18763. * res[x] = Rs1.W[x] - Rs2.W[x];
  18764. * if (res[x] > (2^31)-1) {
  18765. * res[x] = (2^31)-1;
  18766. * OV = 1;
  18767. * } else if (res[x] < -2^31) {
  18768. * res[x] = -2^31;
  18769. * OV = 1;
  18770. * }
  18771. * Rd.W[x] = res[x];
  18772. * x=1...0
  18773. * ~~~
  18774. *
  18775. * \param [in] a unsigned long long type of value stored in a
  18776. * \param [in] b unsigned long long type of value stored in b
  18777. * \return value stored in unsigned long long type
  18778. */
  18779. __STATIC_FORCEINLINE unsigned long long __RV_DKSUB32(unsigned long long a, unsigned long long b)
  18780. {
  18781. unsigned long long result;
  18782. __ASM volatile("dksub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18783. return result;
  18784. }
  18785. /* ===== Inline Function End for DKSUB32 ===== */
  18786. /* ===== Inline Function Start for DRADD16 ===== */
  18787. /**
  18788. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18789. * \brief DRADD16 (64-bit SIMD 16-bit Halving Signed Addition)
  18790. * \details
  18791. * **Type**: SIMD
  18792. *
  18793. * **Syntax**:\n
  18794. * ~~~
  18795. * DRADD16 Rd, Rs1, Rs2
  18796. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18797. * ~~~
  18798. *
  18799. * **Purpose**:\n
  18800. * Do 16-bit signed integer element additions simultaneously. The results are halved to avoid overflow or saturation.
  18801. *
  18802. * **Description**:\n
  18803. * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed integer elements in Rs2. The results
  18804. * are first arithmetically right-shifted by 1 bit and then written to Rd.
  18805. *
  18806. * **Operations**:\n
  18807. * ~~~
  18808. * Rd.H[x] = [(Rs1.H[x]) + (Rs2.H[x])] s>> 1;
  18809. * x=3...0
  18810. * ~~~
  18811. *
  18812. * \param [in] a unsigned long long type of value stored in a
  18813. * \param [in] b unsigned long long type of value stored in b
  18814. * \return value stored in unsigned long long type
  18815. */
  18816. __STATIC_FORCEINLINE unsigned long long __RV_DRADD16(unsigned long long a, unsigned long long b)
  18817. {
  18818. unsigned long long result;
  18819. __ASM volatile("dradd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18820. return result;
  18821. }
  18822. /* ===== Inline Function End for DRADD16 ===== */
  18823. /* ===== Inline Function Start for DSUB16 ===== */
  18824. /**
  18825. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18826. * \brief DSUB16 (64-bit SIMD 16-bit Halving Signed Subtraction)
  18827. * \details
  18828. * **Type**: SIMD
  18829. *
  18830. * **Syntax**:\n
  18831. * ~~~
  18832. * DSUB16 Rd, Rs1, Rs2
  18833. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18834. * ~~~
  18835. *
  18836. * **Purpose**:\n
  18837. * Do 16-bit integer element subtractions simultaneously.
  18838. *
  18839. * **Description**:\n
  18840. * This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed integer elements in Rs2. The results
  18841. * are first arithmetically right-shifted by 1 bit and then written to Rd.
  18842. *
  18843. * **Operations**:\n
  18844. * ~~~
  18845. * Rd.H[x] = [(Rs1.H[x]) - (Rs2.H[x])] ;
  18846. * x=3...0
  18847. * ~~~
  18848. *
  18849. * \param [in] a unsigned long long type of value stored in a
  18850. * \param [in] b unsigned long long type of value stored in b
  18851. * \return value stored in unsigned long long type
  18852. */
  18853. __STATIC_FORCEINLINE unsigned long long __RV_DSUB16(unsigned long long a, unsigned long long b)
  18854. {
  18855. unsigned long long result;
  18856. __ASM volatile("dsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18857. return result;
  18858. }
  18859. /* ===== Inline Function End for DSUB16 ===== */
  18860. /* ===== Inline Function Start for DRADD32 ===== */
  18861. /**
  18862. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18863. * \brief DRADD32 (64-bit SIMD 32-bit Halving Signed Addition)
  18864. * \details
  18865. * **Type**: SIMD
  18866. *
  18867. * **Syntax**:\n
  18868. * ~~~
  18869. * DRADD32 Rd, Rs1, Rs2
  18870. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18871. * ~~~
  18872. *
  18873. * **Purpose**:\n
  18874. * Do 32-bit signed integer element additions simultaneously. The results are halved to avoid overflow or saturation.
  18875. *
  18876. * **Description**:\n
  18877. * This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed integer elements in Rs2. The results
  18878. * are first arithmetically right-shifted by 1 bit and then written to Rd.
  18879. *
  18880. * **Operations**:\n
  18881. * ~~~
  18882. * Rd.W[x] = [(Rs1.W[x]) + (Rs2.W[x])] s>> 1;
  18883. * x=1...0
  18884. * ~~~
  18885. *
  18886. * \param [in] a unsigned long long type of value stored in a
  18887. * \param [in] b unsigned long long type of value stored in b
  18888. * \return value stored in unsigned long long type
  18889. */
  18890. __STATIC_FORCEINLINE unsigned long long __RV_DRADD32(unsigned long long a, unsigned long long b)
  18891. {
  18892. unsigned long long result;
  18893. __ASM volatile("dradd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18894. return result;
  18895. }
  18896. /* ===== Inline Function End for DRADD32 ===== */
  18897. /* ===== Inline Function Start for DSUB32 ===== */
  18898. /**
  18899. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18900. * \brief DSUB32 (64-bit SIMD 32-bit Halving Signed Subtraction)
  18901. * \details
  18902. * **Type**: SIMD
  18903. *
  18904. * **Syntax**:\n
  18905. * ~~~
  18906. * DSUB32 Rd, Rs1, Rs2
  18907. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18908. * ~~~
  18909. *
  18910. * **Purpose**:\n
  18911. * Do 32-bit integer element subtractions simultaneously.
  18912. *
  18913. * **Description**:\n
  18914. * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1 . The
  18915. * results are written to Rd.
  18916. *
  18917. * **Operations**:\n
  18918. * ~~~
  18919. * Rd.W[x] = [(Rs1.E[x]) - (Rs2.E[x])] ;
  18920. * x=1...0
  18921. * ~~~
  18922. *
  18923. * \param [in] a unsigned long long type of value stored in a
  18924. * \param [in] b unsigned long long type of value stored in b
  18925. * \return value stored in unsigned long long type
  18926. */
  18927. __STATIC_FORCEINLINE unsigned long long __RV_DSUB32(unsigned long long a, unsigned long long b)
  18928. {
  18929. unsigned long long result;
  18930. __ASM volatile("dsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18931. return result;
  18932. }
  18933. /* ===== Inline Function End for DSUB32 ===== */
  18934. /* ===== Inline Function Start for DMSR16 ===== */
  18935. /**
  18936. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18937. * \brief DMSR16 (Signed Multiply Halfs with Right Shift 16-bit and Cross Multiply Halfs with Right Shift 16-bit)
  18938. * \details
  18939. * **Type**: SIMD
  18940. *
  18941. * **Syntax**:\n
  18942. * ~~~
  18943. * DMSR16 Rd, Rs1, Rs2
  18944. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18945. * ~~~
  18946. *
  18947. * **Purpose**:\n
  18948. * Do two signed 16-bit multiplications and cross multiplications from the 16-bit elements of two registers; and each multiplications performs a right shift operation.
  18949. *
  18950. * **Description**:\n
  18951. * For the `DMSR16` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content
  18952. * of 32-bit chunks in Rs2, multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content
  18953. * of 32-bit chunks in Rs2.
  18954. * At the same time, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom16-bit Q15 content of 32-bit
  18955. * chunks in Rs2 and multiply the bottom16-bit Q15 content of 32-bit chunks in Rs1 with the top16-bit Q15 content of 32-bit
  18956. * chunks in Rs2. The Q31 results are then right-shifted 16-bits and clipped to Q15 values. The Q15 results are then written
  18957. * into Rd.
  18958. *
  18959. * **Operations**:\n
  18960. * ~~~
  18961. * Rd.H[0] = (Rs1.H[0] s* Rs2.H[0]) s>> 16
  18962. * Rd.H[1] = (Rs1.H[1] s* Rs2.H[1]) s>> 16
  18963. * Rd.H[2] = (Rs1.H[1] s* Rs2.H[0]) s>> 16
  18964. * Rd.H[3] = (Rs1.H[0] s* Rs2.H[1]) s>> 16
  18965. * ~~~
  18966. *
  18967. * \param [in] a unsigned long type of value stored in a
  18968. * \param [in] b unsigned long type of value stored in b
  18969. * \return value stored in unsigned long long type
  18970. */
  18971. __STATIC_FORCEINLINE unsigned long long __RV_DMSR16(unsigned long a, unsigned long b)
  18972. {
  18973. unsigned long long result;
  18974. __ASM volatile("dmsr16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  18975. return result;
  18976. }
  18977. /* ===== Inline Function End for DMSR16 ===== */
  18978. /* ===== Inline Function Start for DMSR17 ===== */
  18979. /**
  18980. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  18981. * \brief DMSR17 (Signed Multiply Halfs with Right Shift 17-bit and Cross Multiply Halfs with Right Shift 17-bit)
  18982. * \details
  18983. * **Type**: SIMD
  18984. *
  18985. * **Syntax**:\n
  18986. * ~~~
  18987. * DMSR17 Rd, Rs1, Rs2
  18988. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  18989. * ~~~
  18990. *
  18991. * **Purpose**:\n
  18992. * Do two signed 16-bit multiplications and cross multiplications from the 16-bit elements of two registers;
  18993. * and each multiplications performs a right shift operation.
  18994. *
  18995. * **Description**:\n
  18996. * For the `DMSR17` instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content
  18997. * of 32-bit chunks in Rs2, multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content
  18998. * of 32-bit chunks in Rs2.
  18999. * At the same time, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit
  19000. * chunks in Rs2 and multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit
  19001. * chunks in Rs2. The Q31 results are then right-shifted 17-bits and clipped to Q15 values. The Q15 results are then written
  19002. * into Rd.
  19003. *
  19004. * **Operations**:\n
  19005. * ~~~
  19006. * Rd.H[0] = (Rs1.H[0] s* Rs2.H[0]) s>> 17
  19007. * Rd.H[1] = (Rs1.H[1] s* Rs2.H[1]) s>> 17
  19008. * Rd.H[2] = (Rs1.H[1] s* Rs2.H[0]) s>> 17
  19009. * Rd.H[3] = (Rs1.H[0] s* Rs2.H[1]) s>> 17
  19010. * ~~~
  19011. *
  19012. * \param [in] a unsigned long type of value stored in a
  19013. * \param [in] b unsigned long type of value stored in b
  19014. * \return value stored in unsigned long long type
  19015. */
  19016. __STATIC_FORCEINLINE unsigned long long __RV_DMSR17(unsigned long a, unsigned long b)
  19017. {
  19018. unsigned long long result;
  19019. __ASM volatile("dmsr17 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19020. return result;
  19021. }
  19022. /* ===== Inline Function End for DMSR17 ===== */
  19023. /* ===== Inline Function Start for DMSR33 ===== */
  19024. /**
  19025. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19026. * \brief DMSR33 (Signed Multiply with Right Shift 33-bit and Cross Multiply with Right Shift 33-bit)
  19027. * \details
  19028. * **Type**: SIMD
  19029. *
  19030. * **Syntax**:\n
  19031. * ~~~
  19032. * DMSR33 Rd, Rs1, Rs2
  19033. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19034. * ~~~
  19035. *
  19036. * **Purpose**:\n
  19037. * Do two signed 32-bit multiplications from the 32-bit elements of two registers, and each multiplications performs a right
  19038. * shift operation.
  19039. *
  19040. * **Description**:\n
  19041. * For the `DMSR33` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the top 32-bit Q31 content
  19042. * of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64bit chunks in Rs1 with the bottom
  19043. * 32-bit Q31 content of 64-bit.
  19044. * The Q64 results are then right-shifted 33-bits and clipped to Q31 values. The Q31 results are then written into Rd.
  19045. *
  19046. * **Operations**:\n
  19047. * ~~~
  19048. * Rd.W[0] = (Rs1.W[0] s* Rs2.W[0]) s>> 33
  19049. * Rd.W[1] = (Rs1.W[1] s* Rs2.W[1]) s>> 33
  19050. * ~~~
  19051. *
  19052. * \param [in] a unsigned long long type of value stored in a
  19053. * \param [in] b unsigned long long type of value stored in b
  19054. * \return value stored in unsigned long long type
  19055. */
  19056. __STATIC_FORCEINLINE unsigned long long __RV_DMSR33(unsigned long long a, unsigned long long b)
  19057. {
  19058. unsigned long long result;
  19059. __ASM volatile("dmsr33 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19060. return result;
  19061. }
  19062. /* ===== Inline Function End for DMSR33 ===== */
  19063. /* ===== Inline Function Start for DMXSR33 ===== */
  19064. /**
  19065. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19066. * \brief DMXSR33 (Signed Multiply with Right Shift 33-bit and Cross Multiply with Right Shift 33-bit)
  19067. * \details
  19068. * **Type**: SIMD
  19069. *
  19070. * **Syntax**:\n
  19071. * ~~~
  19072. * DMXSR33 Rd, Rs1, Rs2
  19073. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19074. * ~~~
  19075. *
  19076. * **Purpose**:\n
  19077. * Do two signed 32-bit cross multiplications from the 32-bit elements of two registers, and each multiplications performs a
  19078. * right shift operation.
  19079. *
  19080. * **Description**:\n
  19081. * For the `DMXSR33` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit Q31
  19082. * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with
  19083. * the top 32-bit Q31 content of 64-bit chunks in Rs2.
  19084. * The Q63 results are then right-shifted 33-bits and clipped to Q31 values. The Q31 results are then written into Rd.
  19085. *
  19086. * **Operations**:\n
  19087. * ~~~
  19088. * Rd.W[0] = (Rs1.W[0] s* Rs2.W[1]) s>> 33
  19089. * Rd.W[1] = (Rs1.W[1] s* Rs2.W[0]) s>> 33
  19090. * ~~~
  19091. *
  19092. * \param [in] a unsigned long long type of value stored in a
  19093. * \param [in] b unsigned long long type of value stored in b
  19094. * \return value stored in unsigned long long type
  19095. */
  19096. __STATIC_FORCEINLINE unsigned long long __RV_DMXSR33(unsigned long long a, unsigned long long b)
  19097. {
  19098. unsigned long long result;
  19099. __ASM volatile("dmxsr33 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19100. return result;
  19101. }
  19102. /* ===== Inline Function End for DMXSR33 ===== */
  19103. /* ===== Inline Function Start for DREDAS16 ===== */
  19104. /**
  19105. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19106. * \brief DREDAS16 (Reduced Addition and Reduced Subtraction)
  19107. * \details
  19108. * **Type**: SIMD
  19109. *
  19110. * **Syntax**:\n
  19111. * ~~~
  19112. * DREDAS16 Rd, Rs1, Rs2
  19113. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19114. * ~~~
  19115. *
  19116. * **Purpose**:\n
  19117. * Do halfs reduced subtraction and halfs reduced addition from a register. The result is written to Rd.
  19118. *
  19119. * **Description**:\n
  19120. * For the `DREDAS16` instruction, subtract the top 16-bit Q15 element from the bottom 16-bit Q15 element of the bottom
  19121. * 32-bit Q31 content of 64-bit chunks in Rs1. At the same time, add the the top16-bit Q15 element with the bottom16-bit
  19122. * Q15 element of the top 32-bit Q31 content of 64-bit chunks in Rs1. The two Q15 results are then written into Rd.
  19123. *
  19124. * **Operations**:\n
  19125. * ~~~
  19126. * Rd.H[0] = Rs1.H[0] - Rs1.H[1]
  19127. * Rd.H[1] = Rs1.H[2] + Rs1.H[3]
  19128. * ~~~
  19129. *
  19130. * \param [in] a unsigned long long type of value stored in a
  19131. * \return value stored in unsigned long type
  19132. */
  19133. __STATIC_FORCEINLINE unsigned long __RV_DREDAS16(unsigned long long a)
  19134. {
  19135. unsigned long result;
  19136. __ASM volatile("dredas16 %0, %1" : "=r"(result) : "r"(a));
  19137. return result;
  19138. }
  19139. /* ===== Inline Function End for DREDAS16 ===== */
  19140. /* ===== Inline Function Start for DREDSA16 ===== */
  19141. /**
  19142. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19143. * \brief DREDSA16 (Reduced Subtraction and Reduced Addition)
  19144. * \details
  19145. * **Type**: SIMD
  19146. *
  19147. * **Syntax**:\n
  19148. * ~~~
  19149. * DREDSA16 Rd, Rs1, Rs2
  19150. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19151. * ~~~
  19152. *
  19153. * **Purpose**:\n
  19154. * Do halfs reduced subtraction and halfs reduced addition from a register. The result is written to Rd.
  19155. *
  19156. * **Description**:\n
  19157. * For the `DREDSA16` instruction, add the top 16-bit Q15 element from the bottom 16-bit Q15 element of the bottom 32-bit Q31 content of 64-bit chunks in Rs1. At the same time, subtract the the top16-bit Q15 element with the bottom16-bit Q15 element of the top 32-bit Q31 content of 64-bit chunks in Rs1. The two Q15 results are then written into Rd.
  19158. *
  19159. * **Operations**:\n
  19160. * ~~~
  19161. * Rd.H[0] = Rs1.H[0] + Rs1.H[1]
  19162. * Rd.H[1] = Rs1.H[2] - Rs1.H[3]
  19163. * ~~~
  19164. *
  19165. * \param [in] a unsigned long longtype of value stored in a
  19166. * \return value stored in unsigned long type
  19167. */
  19168. __STATIC_FORCEINLINE unsigned long __RV_DREDSA16(unsigned long long a)
  19169. {
  19170. unsigned long result;
  19171. __ASM volatile("dredsa16 %0, %1" : "=r"(result) : "r"(a));
  19172. return result;
  19173. }
  19174. /* ===== Inline Function End for DREDSA16 ===== */
  19175. /* ===== Inline Function Start for DKCLIP64 ===== */
  19176. /**
  19177. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19178. * \brief DKCLIP64 (64-bit Clipped to 16-bit Saturation Value)
  19179. * \details
  19180. * **Type**: SIMD
  19181. *
  19182. * **Syntax**:\n
  19183. * ~~~
  19184. * DKCLIP64 Rd, Rs1, Rs2
  19185. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19186. * ~~~
  19187. *
  19188. * **Purpose**:\n
  19189. * Do 15-bit element arithmetic right shift operations and limit result into 32-bit int,then do saturate operation to 16-bit and
  19190. * clip result to 16-bit Q15.
  19191. *
  19192. * **Description**:\n
  19193. * For the `DKCLIP64` instruction, shift the input 15 bits to the right and data convert the result to 32-bit int type, after
  19194. * which the input is saturated to limit the data to between 2^15-1 and -2^15. the result is converted to 16-bits q15 type. The
  19195. * final results are written to Rd.
  19196. *
  19197. * **Operations**:\n
  19198. * ~~~
  19199. * const int32_t max = (int32_t)((1U << 15U) - 1U);
  19200. * const int32_t min = -1 - max ;
  19201. * int32_t val = (int32_t)(Rs s>> 15);
  19202. * if (val > max) {
  19203. * Rd = max;
  19204. * } else if (val < min) {
  19205. * Rd = min;
  19206. * } else {
  19207. * Rd = (int16_t)val;
  19208. * }
  19209. * ~~~
  19210. *
  19211. * \param [in] a unsigned long long type of value stored in a
  19212. * \return value stored in int16_t type
  19213. */
  19214. __STATIC_FORCEINLINE int16_t __RV_DKCLIP64(unsigned long long a)
  19215. {
  19216. int16_t result;
  19217. __ASM volatile("dkclip64 %0, %1" : "=r"(result) : "r"(a));
  19218. return result;
  19219. }
  19220. /* ===== Inline Function End for DKCLIP64 ===== */
  19221. /* ===== Inline Function Start for DKMDA ===== */
  19222. /**
  19223. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19224. * \brief DKMDA (Signed Multiply Two Halfs and Add)
  19225. * \details
  19226. * **Type**: SIMD
  19227. *
  19228. * **Syntax**:\n
  19229. * ~~~
  19230. * DKMDA Rd, Rs1, Rs2
  19231. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19232. * ~~~
  19233. *
  19234. * **Purpose**:\n
  19235. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then adds the two 32-bit results together.
  19236. * The addition result may be saturated.
  19237. *
  19238. * **Description**:\n
  19239. * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the
  19240. * 32-bit elements of Rs2 and then adds the result to the result of multiplying the top 16-bit content of the 32-bit elements of
  19241. * Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  19242. * The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1 The final results are
  19243. * written to Rd. The 16-bit contents are treated as signed integers
  19244. *
  19245. * **Operations**:\n
  19246. * ~~~
  19247. * if (Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000){
  19248. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
  19249. * } else {
  19250. * Rd.W[x] = 0x7fffffff;
  19251. * OV = 1;
  19252. * }
  19253. * x=1...0
  19254. * ~~~
  19255. *
  19256. * \param [in] a unsigned long long type of value stored in a
  19257. * \param [in] b unsigned long long type of value stored in b
  19258. * \return value stored in unsigned long long type
  19259. */
  19260. __STATIC_FORCEINLINE unsigned long long __RV_DKMDA(unsigned long long a, unsigned long long b)
  19261. {
  19262. unsigned long long result;
  19263. __ASM volatile("dkmda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19264. return result;
  19265. }
  19266. /* ===== Inline Function End for DKMDA ===== */
  19267. /* ===== Inline Function Start for DKMXDA ===== */
  19268. /**
  19269. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19270. * \brief DKMXDA (Signed Crossed Multiply Two Halfs and Add)
  19271. * \details
  19272. * **Type**: SIMD
  19273. *
  19274. * **Syntax**:\n
  19275. * ~~~
  19276. * DKMXDA Rd, Rs1, Rs2
  19277. * ~~~
  19278. *
  19279. * **Purpose**:\n
  19280. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then adds the two 32-bit results together.
  19281. * The addition result may be saturated.
  19282. * * DKMXDA: top*bottom + top*bottom (per 32-bit element)
  19283. *
  19284. * **Description**:\n
  19285. * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit
  19286. * elements of Rs2 and then adds the result to the result of multiplying the top 16-bit content of the 32-bit elements of Rs1
  19287. * with the bottom 16-bit content of the 32-bit elements of Rs2.
  19288. * The addition result is checked for saturation.If saturation happens, the result is saturated to 2^31-1 The final results are
  19289. * written to Rd. The 16-bit contents are treated as signed integers.
  19290. *
  19291. * **Operations**:\n
  19292. * ~~~
  19293. * if (Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000){
  19294. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
  19295. * } else {
  19296. * Rd.W[x] = 0x7fffffff;
  19297. * OV = 1;
  19298. * }
  19299. * x=1...0
  19300. * ~~~
  19301. *
  19302. * \param [in] a unsigned long long type of value stored in a
  19303. * \param [in] b unsigned long long type of value stored in b
  19304. * \return value stored in unsigned long long type
  19305. */
  19306. __STATIC_FORCEINLINE unsigned long long __RV_DKMXDA(unsigned long long a, unsigned long long b)
  19307. {
  19308. unsigned long long result;
  19309. __ASM volatile("dkmxda %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19310. return result;
  19311. }
  19312. /* ===== Inline Function End for DKMXDA ===== */
  19313. /* ===== Inline Function Start for DSMDRS ===== */
  19314. /**
  19315. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19316. * \brief DSMDRS (Signed Multiply Two Halfs and Reverse Subtract)
  19317. * \details
  19318. * **Type**: SIMD
  19319. *
  19320. * **Syntax**:\n
  19321. * ~~~
  19322. * DSMDRS Rd, Rs1, Rs2
  19323. * ~~~
  19324. *
  19325. * **Purpose**:\n
  19326. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then perform a subtraction operation
  19327. * between the two 32-bit results.
  19328. * * DSMDRS: bottom*bottom - top*top (per 32-bit element)
  19329. *
  19330. * **Description**:\n
  19331. * This instruction multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit
  19332. * elements of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of the 32-bit elements
  19333. * of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
  19334. * The subtraction result is written to the corresponding 32-bit element of Rd (The 16-bit contents of multiplication are
  19335. * treated as signed integers).
  19336. *
  19337. * **Operations**:\n
  19338. * ~~~
  19339. * Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); x = 1...0
  19340. * ~~~
  19341. *
  19342. * \param [in] a unsigned long long type of value stored in a
  19343. * \param [in] b unsigned long long type of value stored in b
  19344. * \return value stored in unsigned long long type
  19345. */
  19346. __STATIC_FORCEINLINE unsigned long long __RV_DSMDRS(unsigned long long a, unsigned long long b)
  19347. {
  19348. unsigned long long result;
  19349. __ASM volatile("dsmdrs %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19350. return result;
  19351. }
  19352. /* ===== Inline Function End for DSMDRS ===== */
  19353. /* ===== Inline Function Start for DSMXDS ===== */
  19354. /**
  19355. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19356. * \brief DSMXDS (Signed Crossed Multiply Two Halfs and Subtract)
  19357. * \details
  19358. * **Type**: SIMD
  19359. *
  19360. * **Syntax**:\n
  19361. * ~~~
  19362. * DSMXDS Rd, Rs1, Rs2
  19363. * ~~~
  19364. *
  19365. * **Purpose**:\n
  19366. * Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then perform a subtraction operation
  19367. * between the two 32-bit results.
  19368. * * DSMXDS: top*bottom - bottom*top (per 32-bit element)
  19369. *
  19370. * **Description**:\n
  19371. * This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit
  19372. * elements of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of the 32-bit elements
  19373. * of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
  19374. * The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of multiplication are
  19375. * treated as signed integers.
  19376. *
  19377. * **Operations**:\n
  19378. * ~~~
  19379. * Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); x = 1...0
  19380. * ~~~
  19381. *
  19382. * \param [in] a unsigned long long type of value stored in a
  19383. * \param [in] b unsigned long long type of value stored in b
  19384. * \return value stored in unsigned long long type
  19385. */
  19386. __STATIC_FORCEINLINE unsigned long long __RV_DSMXDS(unsigned long long a, unsigned long long b)
  19387. {
  19388. unsigned long long result;
  19389. __ASM volatile("dsmxds %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19390. return result;
  19391. }
  19392. /* ===== Inline Function End for DSMXDS ===== */
  19393. /* ===== Inline Function Start for DSMBB32 ===== */
  19394. /**
  19395. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19396. * \brief DSMBB32 (Signed Multiply Bottom Word & Bottom Word)
  19397. * \details
  19398. * **Type**: SIMD
  19399. *
  19400. * **Syntax**:\n
  19401. * ~~~
  19402. * DSMBB32 Rd, Rs1, Rs2
  19403. * ~~~
  19404. *
  19405. * **Purpose**:\n
  19406. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit result to a third register.
  19407. * * DSMBB32: bottom*bottom
  19408. *
  19409. * **Description**:\n
  19410. * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2. The 64-bit multiplication result is written to Rd.
  19411. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19412. *
  19413. * **Operations**:\n
  19414. * ~~~
  19415. * res = (Rs1.W[0] * Rs2.W[0]);
  19416. * Rd = res;
  19417. * ~~~
  19418. *
  19419. * \param [in] a unsigned long long type of value stored in a
  19420. * \param [in] b unsigned long long type of value stored in b
  19421. * \return value stored in long long type
  19422. */
  19423. __STATIC_FORCEINLINE long long __RV_DSMBB32(unsigned long long a, unsigned long long b)
  19424. {
  19425. long long result;
  19426. __ASM volatile("dsmbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19427. return result;
  19428. }
  19429. /* ===== Inline Function End for DSMBB32 ===== */
  19430. /* ===== Inline Function Start for DSMBB32.sra14 ===== */
  19431. /**
  19432. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19433. * \brief DSMBB32.sra14 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 14)
  19434. * \details
  19435. * **Type**: SIMD
  19436. *
  19437. * **Syntax**:\n
  19438. * ~~~
  19439. * DSMBB32.sra14 Rd, Rs1, Rs2
  19440. * ~~~
  19441. *
  19442. * **Purpose**:\n
  19443. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 14-
  19444. * bit,finally write the 64-bit result to a third register.
  19445. * * DSMBB32.sra14: bottom*bottom s>> 14
  19446. *
  19447. * **Description**:\n
  19448. * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2. The 64-bit multiplication result is written to Rd after right shift 14-bit.
  19449. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19450. *
  19451. * **Operations**:\n
  19452. * ~~~
  19453. * res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
  19454. * Rd = res;
  19455. * ~~~
  19456. *
  19457. * \param [in] a unsigned long long type of value stored in a
  19458. * \param [in] b unsigned long long type of value stored in b
  19459. * \return value stored in long long type
  19460. */
  19461. __STATIC_FORCEINLINE long long __RV_DSMBB32_SRA14(unsigned long long a, unsigned long long b)
  19462. {
  19463. long long result;
  19464. __ASM volatile("dsmbb32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19465. return result;
  19466. }
  19467. /* ===== Inline Function End for DSMBB32.sra14 ===== */
  19468. /* ===== Inline Function Start for DSMBB32.sra32 ===== */
  19469. /**
  19470. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19471. * \brief DSMBB32.sra32 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 32)
  19472. * \details
  19473. * **Type**: SIMD
  19474. *
  19475. * **Syntax**:\n
  19476. * ~~~
  19477. * DSMBB32.sra32 Rd, Rs1, Rs2
  19478. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19479. * ~~~
  19480. *
  19481. * **Purpose**:\n
  19482. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 32-
  19483. * bit,finally write the 64-bit result to a third register.
  19484. * * DSMBB32.sra32: bottom*bottom s >> 32
  19485. *
  19486. * **Description**:\n
  19487. * This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
  19488. * The 64-bit multiplication result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19489. *
  19490. * **Operations**:\n
  19491. * ~~~
  19492. * res = (Rs1.W[0] * Rs2.W[0]) s>> 32;
  19493. * Rd = res;
  19494. * ~~~
  19495. *
  19496. * \param [in] a unsigned long long type of value stored in a
  19497. * \param [in] b unsigned long long type of value stored in b
  19498. * \return value stored in long long type
  19499. */
  19500. __STATIC_FORCEINLINE long long __RV_DSMBB32_SRA32(unsigned long long a, unsigned long long b)
  19501. {
  19502. long long result;
  19503. __ASM volatile("dsmbb32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19504. return result;
  19505. }
  19506. /* ===== Inline Function End for DSMBB32.sra32 ===== */
  19507. /* ===== Inline Function Start for DSMBT32 ===== */
  19508. /**
  19509. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19510. * \brief SMBT32 (Signed Multiply Bottom Word & Top Word)
  19511. * \details
  19512. * **Type**: SIMD
  19513. *
  19514. * **Syntax**:\n
  19515. * ~~~
  19516. * DSMBT32 Rd, Rs1, Rs2
  19517. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19518. * ~~~
  19519. *
  19520. * **Purpose**:\n
  19521. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit
  19522. * result to a third register.
  19523. * * DSMBT32: bottom*top
  19524. *
  19525. * **Description**:\n
  19526. * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
  19527. * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19528. *
  19529. * **Operations**:\n
  19530. * ~~~
  19531. * res = (Rs1.W[0] * Rs2.W[0]);
  19532. * Rd = res;
  19533. * ~~~
  19534. *
  19535. * \param [in] a unsigned long long type of value stored in a
  19536. * \param [in] b unsigned long long type of value stored in b
  19537. * \return value stored in long long type
  19538. */
  19539. __STATIC_FORCEINLINE long long __RV_DSMBT32(unsigned long long a, unsigned long long b)
  19540. {
  19541. long long result;
  19542. __ASM volatile("dsmbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19543. return result;
  19544. }
  19545. /* ===== Inline Function End for DSMBT32 ===== */
  19546. /* ===== Inline Function Start for DSMBT32.sra14 ===== */
  19547. /**
  19548. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19549. * \brief DSMBT32.sra14 (Signed Multiply Bottom Word & Top Word with Right Shift 14)
  19550. * \details
  19551. * **Type**: SIMD
  19552. *
  19553. * **Syntax**:\n
  19554. * ~~~
  19555. * DSMBT32.sra14 Rd, Rs1, Rs2
  19556. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19557. * ~~~
  19558. *
  19559. * **Purpose**:\n
  19560. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 14-
  19561. * bit,finally write the 64-bit result to a third register.
  19562. * * DSMBT32.sra14: bottom*bottom s>> 14
  19563. *
  19564. * **Description**:\n
  19565. * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
  19566. * result is written to Rd after right shift 14-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19567. *
  19568. * **Operations**:\n
  19569. * ~~~
  19570. * res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
  19571. * Rd = res;
  19572. * ~~~
  19573. *
  19574. * \param [in] a unsigned long long type of value stored in a
  19575. * \param [in] b unsigned long long type of value stored in b
  19576. * \return value stored in long long type
  19577. */
  19578. __STATIC_FORCEINLINE long long __RV_DSMBT32_SRA14(unsigned long long a, unsigned long long b)
  19579. {
  19580. long long result;
  19581. __ASM volatile("dsmbt32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19582. return result;
  19583. }
  19584. /* ===== Inline Function End for DSMBT32.sra14 ===== */
  19585. /* ===== Inline Function Start for DSMBT32.sra32 ===== */
  19586. /**
  19587. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19588. * \brief DSMBT32.sra32 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 32)
  19589. * \details
  19590. * **Type**: SIMD
  19591. *
  19592. * **Syntax**:\n
  19593. * ~~~
  19594. * DSMBT32.sra32 Rd, Rs1, Rs2
  19595. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19596. * ~~~
  19597. *
  19598. * **Purpose**:\n
  19599. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 32-
  19600. * bit,finally write the 64-bit result to a third register.
  19601. * * DSMBT32.sra32: bottom*bottom s>> 32
  19602. *
  19603. * **Description**:\n
  19604. * This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
  19605. * result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19606. *
  19607. * **Operations**:\n
  19608. * ~~~
  19609. * res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
  19610. * Rd = res;
  19611. * ~~~
  19612. *
  19613. * \param [in] a unsigned long long type of value stored in a
  19614. * \param [in] b unsigned long long type of value stored in b
  19615. * \return value stored in long long type
  19616. */
  19617. __STATIC_FORCEINLINE long long __RV_DSMBT32_SRA32(unsigned long long a, unsigned long long b)
  19618. {
  19619. long long result;
  19620. __ASM volatile("dsmbt32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19621. return result;
  19622. }
  19623. /* ===== Inline Function End for DSMBT32.sra32 ===== */
  19624. /* ===== Inline Function Start for DSMTT32 ===== */
  19625. /**
  19626. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19627. * \brief DSMTT32 (Signed Multiply Top Word & Top Word)
  19628. * \details
  19629. * **Type**: SIMD
  19630. *
  19631. * **Syntax**:\n
  19632. * ~~~
  19633. * DSMTT32 Rd, Rs1, Rs2
  19634. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19635. * ~~~
  19636. *
  19637. * **Purpose**:\n
  19638. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit
  19639. * result to a third register.
  19640. * * DSMTT32: top*top
  19641. *
  19642. * **Description**:\n
  19643. * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
  19644. * result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19645. *
  19646. * **Operations**:\n
  19647. * ~~~
  19648. * res = Rs1.W[1] * Rs2.W[1];
  19649. * Rd = res;
  19650. * ~~~
  19651. *
  19652. * \param [in] a unsigned long long type of value stored in a
  19653. * \param [in] b unsigned long long type of value stored in b
  19654. * \return value stored in long long type
  19655. */
  19656. __STATIC_FORCEINLINE long long __RV_DSMTT32(unsigned long long a, unsigned long long b)
  19657. {
  19658. long long result;
  19659. __ASM volatile("dsmtt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19660. return result;
  19661. }
  19662. /* ===== Inline Function End for DSMTT32 ===== */
  19663. /* ===== Inline Function Start for DSMTT32.sra14 ===== */
  19664. /**
  19665. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19666. * \brief DSMTT32.sra14 (Signed Multiply Top Word & Top Word with Right Shift 14-bit)
  19667. * \details
  19668. * **Type**: SIMD
  19669. *
  19670. * **Syntax**:\n
  19671. * ~~~
  19672. * DSMTT32.sra14 Rd, Rs1, Rs2
  19673. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19674. * ~~~
  19675. *
  19676. * **Purpose**:\n
  19677. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register,then right shift 14-bit,
  19678. * finally write the 64-bit result to a third register.
  19679. * * DSMTT32.sra14: top*top s>> 14
  19680. *
  19681. * **Description**:\n
  19682. * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
  19683. * result is written to Rd after right shift 14-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19684. *
  19685. * **Operations**:\n
  19686. * ~~~
  19687. * res = Rs1.W[1] * Rs2.W[1] >> 14;
  19688. * Rd = res;
  19689. * ~~~
  19690. *
  19691. * \param [in] a unsigned long long type of value stored in a
  19692. * \param [in] b unsigned long long type of value stored in b
  19693. * \return value stored in long long type
  19694. */
  19695. __STATIC_FORCEINLINE long long __RV_DSMTT32_SRA14(unsigned long long a, unsigned long long b)
  19696. {
  19697. long long result;
  19698. __ASM volatile("dsmtt32.sra14 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19699. return result;
  19700. }
  19701. /* ===== Inline Function End for DSMTT32.sra14 ===== */
  19702. /* ===== Inline Function Start for DSMTT32.sra32 ===== */
  19703. /**
  19704. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19705. * \brief DSMTT32.sra32 (Signed Multiply Top Word & Top Word with Right Shift 32-bit)
  19706. * \details
  19707. * **Type**: SIMD
  19708. *
  19709. * **Syntax**:\n
  19710. * ~~~
  19711. * DSMTT32.sra32 Rd, Rs1, Rs2
  19712. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19713. * ~~~
  19714. *
  19715. * **Purpose**:\n
  19716. * Multiply the signed 32-bit element of a register with the signed 32-bit element of another register,then right shift 32-bit,
  19717. * finally write the 64-bit result to a third register.
  19718. * * DSMTT32.sra32: top*top s>> 32
  19719. *
  19720. * **Description**:\n
  19721. * This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication
  19722. * result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  19723. *
  19724. * **Operations**:\n
  19725. * ~~~
  19726. * res = Rs1.W[1] * Rs2.W[1] >> 32;
  19727. * Rd = res;
  19728. * ~~~
  19729. *
  19730. * \param [in] a unsigned long long type of value stored in a
  19731. * \param [in] b unsigned long long type of value stored in b
  19732. * \return value stored in long long type
  19733. */
  19734. __STATIC_FORCEINLINE long long __RV_DSMTT32_SRA32(unsigned long long a, unsigned long long b)
  19735. {
  19736. long long result;
  19737. __ASM volatile("dsmtt32.sra32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19738. return result;
  19739. }
  19740. /* ===== Inline Function End for DSMTT32.sra32 ===== */
  19741. /* ===== Inline Function Start for DPKBB32 ===== */
  19742. /**
  19743. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19744. * \brief DPKBB32 (Pack Two 32-bit Data from Both Bottom Half)
  19745. * \details
  19746. * **Type**: SIMD
  19747. *
  19748. * **Syntax**:\n
  19749. * ~~~
  19750. * DPKBB32 Rd, Rs1, Rs2
  19751. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19752. * ~~~
  19753. *
  19754. * **Purpose**:\n
  19755. * Pack 32-bit data from 64-bit chunks in two registers.
  19756. * * DPKBB32: bottom.bottom
  19757. *
  19758. * **Description**:\n
  19759. * This instruction moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  19760. *
  19761. * **Operations**:\n
  19762. * ~~~
  19763. * Rd = CONCAT(Rs1.W[0], Rs2.W[0]);
  19764. * ~~~
  19765. *
  19766. * \param [in] a unsigned long long type of value stored in a
  19767. * \param [in] b unsigned long long type of value stored in b
  19768. * \return value stored in unsigned long long type
  19769. */
  19770. __STATIC_FORCEINLINE unsigned long long __RV_DPKBB32(unsigned long long a, unsigned long long b)
  19771. {
  19772. unsigned long long result;
  19773. __ASM volatile("dpkbb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19774. return result;
  19775. }
  19776. /* ===== Inline Function End for DPKBB32 ===== */
  19777. /* ===== Inline Function Start for DPKBT32 ===== */
  19778. /**
  19779. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19780. * \brief DPKBT32 (Pack Two 32-bit Data from Bottom and Top Half)
  19781. * \details
  19782. * **Type**: SIMD
  19783. *
  19784. * **Syntax**:\n
  19785. * ~~~
  19786. * DPKBT32 Rd, Rs1, Rs2
  19787. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19788. * ~~~
  19789. *
  19790. * **Purpose**:\n
  19791. * Pack 32-bit data from 64-bit chunks in two registers.
  19792. * * DPKBT32: bottom.top
  19793. *
  19794. * **Description**:\n
  19795. * This instruction moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].
  19796. *
  19797. * **Operations**:\n
  19798. * ~~~
  19799. * Rd = CONCAT(Rs1.W[0], Rs2.W[1]);
  19800. * ~~~
  19801. *
  19802. * \param [in] a unsigned long long type of value stored in a
  19803. * \param [in] b unsigned long long type of value stored in b
  19804. * \return value stored in unsigned long long type
  19805. */
  19806. __STATIC_FORCEINLINE unsigned long long __RV_DPKBT32(unsigned long long a, unsigned long long b)
  19807. {
  19808. unsigned long long result;
  19809. __ASM volatile("dpkbt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19810. return result;
  19811. }
  19812. /* ===== Inline Function End for DPKBT32 ===== */
  19813. /* ===== Inline Function Start for DPKTT32 ===== */
  19814. /**
  19815. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19816. * \brief DPKTT32 (Pack Two 32-bit Data from Both Top Half)
  19817. * \details
  19818. * **Type**: SIMD
  19819. *
  19820. * **Syntax**:\n
  19821. * ~~~
  19822. * DPKTT32 Rd, Rs1, Rs2
  19823. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19824. * ~~~
  19825. *
  19826. * **Purpose**:\n
  19827. * Pack 32-bit data from 64-bit chunks in two registers.
  19828. * * DPKTT32: top.top
  19829. *
  19830. * **Description**:\n
  19831. * This instruction moves Rs1.W[1] to Rd.W[0] and moves Rs2.W[1] to Rd.W[0].
  19832. *
  19833. * **Operations**:\n
  19834. * ~~~
  19835. * Rd = CONCAT(Rs1.W[1], Rs2.W[1]);
  19836. * ~~~
  19837. *
  19838. * \param [in] a unsigned long long type of value stored in a
  19839. * \param [in] b unsigned long long type of value stored in b
  19840. * \return value stored in unsigned long long type
  19841. */
  19842. __STATIC_FORCEINLINE unsigned long long __RV_DPKTT32(unsigned long long a, unsigned long long b)
  19843. {
  19844. unsigned long long result;
  19845. __ASM volatile("dpktt32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19846. return result;
  19847. }
  19848. /* ===== Inline Function End for DPKTT32 ===== */
  19849. /* ===== Inline Function Start for DPKTB32 ===== */
  19850. /**
  19851. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19852. * \brief DPKTB32 (Pack Two 32-bit Data from Top and Bottom Half)
  19853. * \details
  19854. * **Type**: SIMD
  19855. *
  19856. * **Syntax**:\n
  19857. * ~~~
  19858. * DPKTB32 Rd, Rs1, Rs2
  19859. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19860. * ~~~
  19861. *
  19862. * **Purpose**:\n
  19863. * Pack 32-bit data from 64-bit chunks in two registers.
  19864. * * DPKTB32: top.bottom
  19865. *
  19866. * **Description**:\n
  19867. * This instruction moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].
  19868. *
  19869. * **Operations**:\n
  19870. * ~~~
  19871. * Rd = CONCAT(Rs1.W[1], Rs2.W[0]);
  19872. * ~~~
  19873. *
  19874. * \param [in] a unsigned long long type of value stored in a
  19875. * \param [in] b unsigned long long type of value stored in b
  19876. * \return value stored in unsigned long long type
  19877. */
  19878. __STATIC_FORCEINLINE unsigned long long __RV_DPKTB32(unsigned long long a, unsigned long long b)
  19879. {
  19880. unsigned long long result;
  19881. __ASM volatile("dpktb32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19882. return result;
  19883. }
  19884. /* ===== Inline Function End for DPKTB32 ===== */
  19885. /* ===== Inline Function Start for DPKTB16 ===== */
  19886. /**
  19887. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19888. * \brief DPKTB16 (Pack Two 32-bit Data from Top and Bottom Half)
  19889. * \details
  19890. * **Type**: SIMD
  19891. *
  19892. * **Syntax**:\n
  19893. * ~~~
  19894. * DPKTB16 Rd, Rs1, Rs2
  19895. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19896. * ~~~
  19897. *
  19898. * **Purpose**:\n
  19899. * Pack 16-bit data from 32-bit chunks in two registers.
  19900. * * DPKTB16: top.bottom
  19901. *
  19902. * **Description**:\n
  19903. * This instruction moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
  19904. *
  19905. * **Operations**:\n
  19906. * ~~~
  19907. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]);
  19908. * x=1...0
  19909. * ~~~
  19910. *
  19911. * \param [in] a unsigned long long type of value stored in a
  19912. * \param [in] b unsigned long long type of value stored in b
  19913. * \return value stored in unsigned long long type
  19914. */
  19915. __STATIC_FORCEINLINE unsigned long long __RV_DPKTB16(unsigned long long a, unsigned long long b)
  19916. {
  19917. unsigned long long result;
  19918. __ASM volatile("dpktb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19919. return result;
  19920. }
  19921. /* ===== Inline Function End for DPKTB16 ===== */
  19922. /* ===== Inline Function Start for DPKBB16 ===== */
  19923. /**
  19924. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19925. * \brief DPKBB16 (Pack Two 16-bit Data from Both Bottom Half)
  19926. * \details
  19927. * **Type**: SIMD
  19928. *
  19929. * **Syntax**:\n
  19930. * ~~~
  19931. * DPKBB16 Rd, Rs1, Rs2
  19932. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19933. * ~~~
  19934. *
  19935. * **Purpose**:\n
  19936. * Pack 16-bit data from 32-bit chunks in two registers.
  19937. * * PKBB16: bottom.bottom
  19938. *
  19939. * **Description**:\n
  19940. * This instruction moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].
  19941. *
  19942. * **Operations**:\n
  19943. * ~~~
  19944. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]);
  19945. * x=1...0
  19946. * ~~~
  19947. *
  19948. * \param [in] a unsigned long long type of value stored in a
  19949. * \param [in] b unsigned long long type of value stored in b
  19950. * \return value stored in unsigned long long type
  19951. */
  19952. __STATIC_FORCEINLINE unsigned long long __RV_DPKBB16(unsigned long long a, unsigned long long b)
  19953. {
  19954. unsigned long long result;
  19955. __ASM volatile("dpkbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19956. return result;
  19957. }
  19958. /* ===== Inline Function End for DPKBB16 ===== */
  19959. /* ===== Inline Function Start for DPKBT16 ===== */
  19960. /**
  19961. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19962. * \brief DPKBT16 (Pack Two 16-bit Data from Bottom and Top Half)
  19963. * \details
  19964. * **Type**: SIMD
  19965. *
  19966. * **Syntax**:\n
  19967. * ~~~
  19968. * DPKBT16 Rd, Rs1, Rs2
  19969. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  19970. * ~~~
  19971. *
  19972. * **Purpose**:\n
  19973. * Pack 16-bit data from 32-bit chunks in two registers.
  19974. * * PKBT16: bottom.top
  19975. *
  19976. * **Description**:\n
  19977. * This instruction moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  19978. *
  19979. * **Operations**:\n
  19980. * ~~~
  19981. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]);
  19982. * x=1...0
  19983. * ~~~
  19984. *
  19985. * \param [in] a unsigned long long type of value stored in a
  19986. * \param [in] b unsigned long long type of value stored in b
  19987. * \return value stored in unsigned long long type
  19988. */
  19989. __STATIC_FORCEINLINE unsigned long long __RV_DPKBT16(unsigned long long a, unsigned long long b)
  19990. {
  19991. unsigned long long result;
  19992. __ASM volatile("dpkbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  19993. return result;
  19994. }
  19995. /* ===== Inline Function End for DPKBT16 ===== */
  19996. /* ===== Inline Function Start for DPKTT16 ===== */
  19997. /**
  19998. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  19999. * \brief DPKTT16 (Pack Two 16-bit Data from Both Top Half)
  20000. * \details
  20001. * **Type**: SIMD
  20002. *
  20003. * **Syntax**:\n
  20004. * ~~~
  20005. * DPKTT16 Rd, Rs1, Rs2
  20006. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20007. * ~~~
  20008. *
  20009. * **Purpose**:\n
  20010. * Pack 16-bit data from 32-bit chunks in two registers.
  20011. * * PKTT16 top.top
  20012. *
  20013. * **Description**:\n
  20014. * This instruction moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].
  20015. *
  20016. * **Operations**:\n
  20017. * ~~~
  20018. * Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]);
  20019. * x=1...0
  20020. * ~~~
  20021. *
  20022. * \param [in] a unsigned long long type of value stored in a
  20023. * \param [in] b unsigned long long type of value stored in b
  20024. * \return value stored in unsigned long long type
  20025. */
  20026. __STATIC_FORCEINLINE unsigned long long __RV_DPKTT16(unsigned long long a, unsigned long long b)
  20027. {
  20028. unsigned long long result;
  20029. __ASM volatile("dpktt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20030. return result;
  20031. }
  20032. /* ===== Inline Function End for DPKTT16 ===== */
  20033. /* ===== Inline Function Start for DSRA16 ===== */
  20034. /**
  20035. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20036. * \brief DSRA16 (SIMD 16-bit Shift Right Arithmetic)
  20037. * \details
  20038. * **Type**: SIMD
  20039. *
  20040. * **Syntax**:\n
  20041. * ~~~
  20042. * DSRA16 Rd, Rs1, Rs2
  20043. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20044. * ~~~
  20045. *
  20046. * **Purpose**:\n
  20047. * Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a variable from a GPR.
  20048. *
  20049. * **Description**:\n
  20050. * The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out bits are filled with the sign-bit of
  20051. * the data elements. The shift amount is specified by the low-order 4-bits of the value in the Rs2 register. And the results
  20052. * are written to Rd.
  20053. *
  20054. * **Operations**:\n
  20055. * ~~~
  20056. * sa = Rs2[3:0];
  20057. * if (sa != 0)
  20058. * {
  20059. * Rd.H[x] = SE16(Rs1.H[x][15:sa]);
  20060. * } else {
  20061. * Rd = Rs1;
  20062. * }
  20063. * x=3...0
  20064. * ~~~
  20065. *
  20066. * \param [in] a unsigned long long type of value stored in a
  20067. * \param [in] b unsigned long type of value stored in b
  20068. * \return value stored in unsigned long long type
  20069. */
  20070. __STATIC_FORCEINLINE unsigned long long __RV_DSRA16(unsigned long long a, unsigned long b)
  20071. {
  20072. unsigned long long result;
  20073. __ASM volatile("dsra16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20074. return result;
  20075. }
  20076. /* ===== Inline Function End for DSRA16 ===== */
  20077. /* ===== Inline Function Start for DADD16 ===== */
  20078. /**
  20079. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20080. * \brief DADD16 (16-bit Addition)
  20081. * \details
  20082. * **Type**: SIMD
  20083. *
  20084. * **Syntax**:\n
  20085. * ~~~
  20086. * DADD16 Rd, Rs1, Rs2
  20087. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20088. * ~~~
  20089. *
  20090. * **Purpose**:\n
  20091. * Do 16-bit integer element additions simultaneously.
  20092. *
  20093. * **Description**:\n
  20094. * This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit unsigned integer elements in Rs2. And
  20095. * the results are written to Rd.
  20096. *
  20097. * **Operations**:\n
  20098. * ~~~
  20099. * Rd.H[x] = Rs1.H[x] + Rs2.H[x];
  20100. * x=3...0
  20101. * ~~~
  20102. *
  20103. * \param [in] a unsigned long long type of value stored in a
  20104. * \param [in] b unsigned long long type of value stored in b
  20105. * \return value stored in unsigned long long type
  20106. */
  20107. __STATIC_FORCEINLINE unsigned long long __RV_DADD16(unsigned long long a, unsigned long long b)
  20108. {
  20109. unsigned long long result;
  20110. __ASM volatile("dadd16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20111. return result;
  20112. }
  20113. /* ===== Inline Function End for DADD16 ===== */
  20114. /* ===== Inline Function Start for DADD32 ===== */
  20115. /**
  20116. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20117. * \brief DADD32 (32-bit Addition)
  20118. * \details
  20119. * **Type**: SIMD
  20120. *
  20121. * **Syntax**:\n
  20122. * ~~~
  20123. * DADD32 Rd, Rs1, Rs2
  20124. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20125. * ~~~
  20126. *
  20127. * **Purpose**:\n
  20128. * Do 32-bit integer element additions simultaneously.
  20129. *
  20130. * **Description**:\n
  20131. * This instruction adds the 32-bit integer elements in Rs1 with the 32-bit integer elements in Rs2, and then writes the 32-bit
  20132. * element results to Rd.
  20133. *
  20134. * **Operations**:\n
  20135. * ~~~
  20136. * Rd.W[x] = Rs1.W[x] + Rs2.W[x];
  20137. * x=1...0
  20138. * ~~~
  20139. *
  20140. * \param [in] a unsigned long long type of value stored in a
  20141. * \param [in] b unsigned long long type of value stored in b
  20142. * \return value stored in unsigned long long type
  20143. */
  20144. __STATIC_FORCEINLINE unsigned long long __RV_DADD32(unsigned long long a, unsigned long long b)
  20145. {
  20146. unsigned long long result;
  20147. __ASM volatile("dadd32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20148. return result;
  20149. }
  20150. /* ===== Inline Function End for DADD32 ===== */
  20151. /* ===== Inline Function Start for DSMBB16 ===== */
  20152. /**
  20153. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20154. * \brief DSMBB16 (Signed Multiply Bottom Half & Bottom Half)
  20155. * \details
  20156. * **Type**: SIMD
  20157. *
  20158. * **Syntax**:\n
  20159. * ~~~
  20160. * DSMBB16 Rd, Rs1, Rs2
  20161. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20162. * ~~~
  20163. *
  20164. * **Purpose**:\n
  20165. * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit elements
  20166. * of another register and write the result to a third register.
  20167. * * DSMBB16: W[x].bottom*W[x].bottom
  20168. *
  20169. * **Description**:\n
  20170. * For the `DSMBB16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom
  20171. * 16-bit content of the 32-bit elements of Rs2.
  20172. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.
  20173. *
  20174. * **Operations**:\n
  20175. * ~~~
  20176. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0];
  20177. * x=1...0
  20178. * ~~~
  20179. *
  20180. * \param [in] a unsigned long long type of value stored in a
  20181. * \param [in] b unsigned long long type of value stored in b
  20182. * \return value stored in unsigned long long type
  20183. */
  20184. __STATIC_FORCEINLINE unsigned long long __RV_DSMBB16(unsigned long long a, unsigned long long b) /* pass */
  20185. {
  20186. unsigned long long result;
  20187. __ASM volatile("dsmbb16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20188. return result;
  20189. }
  20190. /* ===== Inline Function End for DSMBB16 ===== */
  20191. /* ===== Inline Function Start for DSMBT16 ===== */
  20192. /**
  20193. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20194. * \brief DSMBT16 (Signed Multiply Bottom Half & Top Half)
  20195. * \details
  20196. * **Type**: SIMD
  20197. *
  20198. * **Syntax**:\n
  20199. * ~~~
  20200. * DSMBT16 Rd, Rs1, Rs2
  20201. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20202. * ~~~
  20203. *
  20204. * **Purpose**:\n
  20205. * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit
  20206. * elements of another register and write the result to a third register.
  20207. * * DSMBT16: W[x].bottom *W[x].top
  20208. *
  20209. * **Description**:\n
  20210. * For the `DSMBT16` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
  20211. * content of the 32-bit elements of Rs2.
  20212. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.
  20213. *
  20214. * **Operations**:\n
  20215. * ~~~
  20216. * Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1];
  20217. * x=1...0
  20218. * ~~~
  20219. *
  20220. * \param [in] a unsigned long long type of value stored in a
  20221. * \param [in] b unsigned long long type of value stored in b
  20222. * \return value stored in unsigned long long type
  20223. */
  20224. __STATIC_FORCEINLINE unsigned long long __RV_DSMBT16(unsigned long long a, unsigned long long b) /* pass */
  20225. {
  20226. unsigned long long result;
  20227. __ASM volatile("dsmbt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20228. return result;
  20229. }
  20230. /* ===== Inline Function End for DSMBT16 ===== */
  20231. /* ===== Inline Function Start for DSMTT16 ===== */
  20232. /**
  20233. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20234. * \brief DSMTT16 (Signed Multiply Top Half & Top Half)
  20235. * \details
  20236. * **Type**: SIMD
  20237. *
  20238. * **Syntax**:\n
  20239. * ~~~
  20240. * DSMTT16 Rd, Rs1, Rs2
  20241. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20242. * ~~~
  20243. *
  20244. * **Purpose**:\n
  20245. * Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit
  20246. * elements of another register and write the result to a third register.
  20247. * * DSMTT16: W[x].top * W[x].top
  20248. *
  20249. * **Description**:\n
  20250. * For the `DSMTT16` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
  20251. * content of the 32-bit elements of Rs2.
  20252. * The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.
  20253. *
  20254. * **Operations**:\n
  20255. * ~~~
  20256. * Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1];
  20257. * x=1...0
  20258. * ~~~
  20259. *
  20260. * \param [in] a unsigned long long type of value stored in a
  20261. * \param [in] b unsigned long long type of value stored in b
  20262. * \return value stored in unsigned long long type
  20263. */
  20264. __STATIC_FORCEINLINE unsigned long long __RV_DSMTT16(unsigned long long a, unsigned long long b)
  20265. {
  20266. unsigned long long result;
  20267. __ASM volatile("dsmtt16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20268. return result;
  20269. }
  20270. /* ===== Inline Function End for DSMTT16 ===== */
  20271. /* ===== Inline Function Start for DRCRSA16 ===== */
  20272. /**
  20273. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20274. * \brief DRCRSA16 (16-bit Signed Halving Cross Subtraction & Addition)
  20275. * \details
  20276. * **Type**: SIMD
  20277. *
  20278. * **Syntax**:\n
  20279. * ~~~
  20280. * DRCRSA16 Rd, Rs1, Rs2
  20281. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20282. * ~~~
  20283. *
  20284. * **Purpose**:\n
  20285. * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in a 32-bit chunk simultaneously.
  20286. * Operands are from crossed positions in 32-bit chunks. The results are halved to avoid overflow or saturation.
  20287. *
  20288. * **Description**:\n
  20289. * This instruction subtracts the 16-bit signed integer in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer in
  20290. * [15:0] of 32-bit chunks in Rs2, and adds the 16-bit signed integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed
  20291. * integer in [15:0] of 32-bit chunks in Rs1. The element results are first logically right-shifted by 1 bit and then written to
  20292. * [31:16] of 32- bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  20293. *
  20294. * **Operations**:\n
  20295. * ~~~
  20296. * Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) s>> 1;
  20297. * Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) s>> 1;
  20298. * x=1...0
  20299. * ~~~
  20300. *
  20301. * \param [in] a unsigned long long type of value stored in a
  20302. * \param [in] b unsigned long long type of value stored in b
  20303. * \return value stored in unsigned long long type
  20304. */
  20305. __STATIC_FORCEINLINE unsigned long long __RV_DRCRSA16(unsigned long long a, unsigned long long b)
  20306. {
  20307. unsigned long long result;
  20308. __ASM volatile("drcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20309. return result;
  20310. }
  20311. /* ===== Inline Function End for DRCRSA16 ===== */
  20312. /* ===== Inline Function Start for DRCRSA32 ===== */
  20313. /**
  20314. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20315. * \brief DRCRSA32 (32-bit Signed Halving Cross Subtraction & Addition)
  20316. * \details
  20317. * **Type**: SIMD
  20318. *
  20319. * **Syntax**:\n
  20320. * ~~~
  20321. * DRCRSA32 Rd, Rs1, Rs2
  20322. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20323. * ~~~
  20324. *
  20325. * **Purpose**:\n
  20326. * Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in a 64-bit chunk simultaneously.
  20327. * Operands are from crossed 32-bit elements. The results are halved to avoid overflow or saturation.
  20328. *
  20329. * **Description**:\n
  20330. * This instruction subtracts the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit signed integer element in
  20331. * [31:0] of Rs2, and adds the 32-bit signed integer element in [63:32] of Rs2 from the 32-bit signed integer element in [31:0]
  20332. * of Rs1. The element results are first arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and
  20333. * [31:0] of Rd for subtraction.
  20334. *
  20335. * **Operations**:\n
  20336. * ~~~
  20337. * Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) s>> 1;
  20338. * Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) s>> 1;
  20339. * ~~~
  20340. *
  20341. * \param [in] a unsigned long long type of value stored in a
  20342. * \param [in] b unsigned long long type of value stored in b
  20343. * \return value stored in unsigned long long type
  20344. */
  20345. __STATIC_FORCEINLINE unsigned long long __RV_DRCRSA32(unsigned long long a, unsigned long long b)
  20346. {
  20347. unsigned long long result;
  20348. __ASM volatile("drcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20349. return result;
  20350. }
  20351. /* ===== Inline Function End for DRCRSA32 ===== */
  20352. /* ===== Inline Function Start for DRCRAS16 ===== */
  20353. /**
  20354. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20355. * \brief DRCRAS16 (16-bit Signed Halving Cross Addition & Subtraction)
  20356. * \details
  20357. * **Type**: SIMD
  20358. *
  20359. * **Syntax**:\n
  20360. * ~~~
  20361. * DRCRAS16 Rd, Rs1, Rs2
  20362. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20363. * ~~~
  20364. *
  20365. * **Purpose**:\n
  20366. * Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in a 32-bit chunk simultaneously.
  20367. * Operands are from crossed positions in 32-bit chunks. The results are halved to avoid overflow or saturation.
  20368. *
  20369. * **Description**:\n
  20370. * This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in
  20371. * [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit
  20372. * unsigned integer in [15:0] of 32-bit chunks in Rs1. The element results are first logically right-shifted by 1 bit and then
  20373. * written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.
  20374. *
  20375. * **Operations**:\n
  20376. * ~~~
  20377. * Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) s>> 1;
  20378. * Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) s>> 1;
  20379. * x=1...0
  20380. * ~~~
  20381. *
  20382. * \param [in] a unsigned long long type of value stored in a
  20383. * \param [in] b unsigned long long type of value stored in b
  20384. * \return value stored in unsigned long long type
  20385. */
  20386. __STATIC_FORCEINLINE unsigned long long __RV_DRCRAS16(unsigned long long a, unsigned long long b)
  20387. {
  20388. unsigned long long result;
  20389. __ASM volatile("drcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20390. return result;
  20391. }
  20392. /* ===== Inline Function End for DRCRAS16 ===== */
  20393. /* ===== Inline Function Start for DRCRAS32 ===== */
  20394. /**
  20395. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20396. * \brief DRCRAS32 (32-bit Signed Cross Addition & Subtraction)
  20397. * \details
  20398. * **Type**: SIMD
  20399. *
  20400. * **Syntax**:\n
  20401. * ~~~
  20402. * DRCRAS32 Rd, Rs1, Rs2
  20403. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20404. * ~~~
  20405. *
  20406. * **Purpose**:\n
  20407. * Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in a 64-bit chunk simultaneously.
  20408. * Operands are from crossed 32-bit elements. The results are halved to avoid overflow or saturation.
  20409. *
  20410. * **Description**:\n
  20411. * This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit signed integer element in [31:0]
  20412. * of Rs2, and subtracts the 32-bit signed integer element in [63:32] of Rs2 from the 32-bit signed integer element in [31:0]
  20413. * of Rs1. The element results are first arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition
  20414. * and [31:0] of Rd for subtraction.
  20415. *
  20416. * **Operations**:\n
  20417. * ~~~
  20418. * Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) s>> 1;
  20419. * Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) s>> 1;
  20420. * ~~~
  20421. *
  20422. * \param [in] a unsigned long long type of value stored in a
  20423. * \param [in] b unsigned long long type of value stored in b
  20424. * \return value stored in unsigned long long type
  20425. */
  20426. __STATIC_FORCEINLINE unsigned long long __RV_DRCRAS32(unsigned long long a, unsigned long long b)
  20427. {
  20428. unsigned long long result;
  20429. __ASM volatile("DRCRAS32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20430. return result;
  20431. }
  20432. /* ===== Inline Function End for DRCRAS32 ===== */
  20433. /* ===== Inline Function Start for DKCRAS16 ===== */
  20434. /**
  20435. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20436. * \brief DKCRAS16 (16-bit Signed Saturating Cross Addition & Subtraction)
  20437. * \details
  20438. * **Type**: SIMD
  20439. *
  20440. * **Syntax**:\n
  20441. * ~~~
  20442. * DKCRAS16 Rd, Rs1, Rs2
  20443. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20444. * ~~~
  20445. *
  20446. * **Purpose**:\n
  20447. * Do 16-bit signed integer element saturating addition and 16-bit signed integer element saturating subtraction in a 32-bit
  20448. * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
  20449. *
  20450. * **Description**:\n
  20451. * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer
  20452. * element in [15:0] of 32-bit chunks in Rs2; at the same time, it subtracts the 16-bit signed integer element in [31:16] of
  20453. * 32-bit chunks in Rs2 from the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
  20454. * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
  20455. * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks
  20456. * in Rd for addition.
  20457. *
  20458. * **Operations**:\n
  20459. * ~~~
  20460. * res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
  20461. * res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
  20462. * for (res in [res1, res2]) {
  20463. * if (res > (2^15)-1) {
  20464. * res = (2^15)-1;
  20465. * OV = 1;
  20466. * } else if (res < -2^15) {
  20467. * res = -2^15;
  20468. * OV = 1;
  20469. * }
  20470. * }
  20471. * Rd.W[x][31:16] = res1;
  20472. * Rd.W[x][15:0] = res2;
  20473. * x=1...0
  20474. * ~~~
  20475. *
  20476. * \param [in] a unsigned long long type of value stored in a
  20477. * \param [in] b unsigned long long type of value stored in b
  20478. * \return value stored in unsigned long long type
  20479. */
  20480. __STATIC_FORCEINLINE unsigned long long __RV_DKCRAS16(unsigned long long a, unsigned long long b)
  20481. {
  20482. unsigned long long result;
  20483. __ASM volatile("dkcras16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20484. return result;
  20485. }
  20486. /* ===== Inline Function End for DKCRAS16 ===== */
  20487. /* ===== Inline Function Start for DKCRSA16 ===== */
  20488. /**
  20489. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20490. * \brief DKCRSA16 (16-bit Signed Saturating Cross Subtraction & Addition)
  20491. * \details
  20492. * **Type**: SIMD
  20493. *
  20494. * **Syntax**:\n
  20495. * ~~~
  20496. * DKCRSA16 Rd, Rs1, Rs2
  20497. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20498. * ~~~
  20499. *
  20500. * **Purpose**:\n
  20501. * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element saturating addition in a 32-bit
  20502. * chunk simultaneously. Operands are from crossed positions in 32-bit chunks.
  20503. *
  20504. * **Description**:\n
  20505. * This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer
  20506. * element in [31:16] of 32-bit chunks in Rs1; at the same time, it adds the 16-bit signed integer element in [31:16] of 32-bit
  20507. * chunks in Rs2 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
  20508. * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
  20509. * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks
  20510. * in Rd for subtraction.
  20511. *
  20512. * **Operations**:\n
  20513. * ~~~
  20514. * res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
  20515. * res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
  20516. * for (res in [res1, res2]) {
  20517. * if (res > (2^15)-1) {
  20518. * res = (2^15)-1;
  20519. * OV = 1;
  20520. * } else if (res < -2^15) {
  20521. * res = -2^15;
  20522. * OV = 1;
  20523. * }
  20524. * }
  20525. * Rd.W[x][31:16] = res1;
  20526. * Rd.W[x][15:0] = res2;
  20527. * x=1...0
  20528. * ~~~
  20529. *
  20530. * \param [in] a unsigned long long type of value stored in a
  20531. * \param [in] b unsigned long long type of value stored in b
  20532. * \return value stored in unsigned long long type
  20533. */
  20534. __STATIC_FORCEINLINE unsigned long long __RV_DKCRSA16(unsigned long long a, unsigned long long b)
  20535. {
  20536. unsigned long long result;
  20537. __ASM volatile("dkcrsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20538. return result;
  20539. }
  20540. /* ===== Inline Function End for DKCRSA16 ===== */
  20541. /* ===== Inline Function Start for DRSUB16 ===== */
  20542. /**
  20543. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20544. * \brief DRSUB16 (16-bit Signed Halving Subtraction)
  20545. * \details
  20546. * **Type**: SIMD
  20547. *
  20548. * **Syntax**:\n
  20549. * ~~~
  20550. * DRSUB16 Rd, Rs1, Rs2
  20551. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20552. * ~~~
  20553. *
  20554. * **Purpose**:\n
  20555. * Do 16-bit signed integer element subtractions simultaneously. The results are halved to avoid overflow or saturation.
  20556. *
  20557. * **Description**:\n
  20558. * This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit signed integer elements in Rs1. The
  20559. * results are first arithmetically right-shifted by 1 bit and then written to Rd.
  20560. *
  20561. * **Operations**:\n
  20562. * ~~~
  20563. * Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) s>> 1;
  20564. * x=3...0
  20565. * ~~~
  20566. *
  20567. * \param [in] a unsigned long long type of value stored in a
  20568. * \param [in] b unsigned long long type of value stored in b
  20569. * \return value stored in unsigned long long type
  20570. */
  20571. __STATIC_FORCEINLINE unsigned long long __RV_DRSUB16(unsigned long long a, unsigned long long b)
  20572. {
  20573. unsigned long long result;
  20574. __ASM volatile("drsub16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20575. return result;
  20576. }
  20577. /* ===== Inline Function End for DRSUB16 ===== */
  20578. /* ===== Inline Function Start for DSTSA32 ===== */
  20579. /**
  20580. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20581. * \brief DSTSA32 (32-bit Straight Subtraction & Addition)
  20582. * \details
  20583. * **Type**: SIMD
  20584. *
  20585. * **Syntax**:\n
  20586. * ~~~
  20587. * DSTSA32 Rd, Rs1, Rs2
  20588. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20589. * ~~~
  20590. *
  20591. * **Purpose**:\n
  20592. * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit chunk simultaneously. Operands are
  20593. * from corresponding 32-bit elements.
  20594. *
  20595. * **Description**:\n
  20596. * This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [63:32] of Rs1,
  20597. * and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit
  20598. * integer element in [31:0] of Rs2, and writes the result to [31:0] of Rd.
  20599. *
  20600. * **Operations**:\n
  20601. * ~~~
  20602. * Rd.W[1] = Rs1.W[1] - Rs2.W[1];
  20603. * Rd.W[0] = Rs1.W[0] + Rs2.W[0];
  20604. * ~~~
  20605. *
  20606. * \param [in] a unsigned long long type of value stored in a
  20607. * \param [in] b unsigned long long type of value stored in b
  20608. * \return value stored in unsigned long long type
  20609. */
  20610. __STATIC_FORCEINLINE unsigned long long __RV_DSTSA32(unsigned long long a, unsigned long long b)
  20611. {
  20612. unsigned long long result;
  20613. __ASM volatile("dstsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20614. return result;
  20615. }
  20616. /* ===== Inline Function End for DSTSA32 ===== */
  20617. /* ===== Inline Function Start for DSTAS32 ===== */
  20618. /**
  20619. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20620. * \brief DSTAS32 (SIMD 32-bit Straight Addition & Subtractionn)
  20621. * \details
  20622. * **Type**: SIMD
  20623. *
  20624. * **Syntax**:\n
  20625. * ~~~
  20626. * DSTAS32 Rd, Rs1, Rs2
  20627. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20628. * ~~~
  20629. *
  20630. * **Purpose**:\n
  20631. * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit chunk simultaneously. Operands are
  20632. * from corresponding 32-bit elements.
  20633. *
  20634. * **Description**:\n
  20635. * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [63:32] of Rs2,
  20636. * and writes the result to [63:32] of Rd; at the same time, it subtracts the 32-bit integer element in [31:0] of Rs2
  20637. * from the 32-bit integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.
  20638. *
  20639. * **Operations**:\n
  20640. * ~~~
  20641. * Rd.W[1] = Rs1.W[1] + Rs2.W[1];
  20642. * Rd.W[0] = Rs1.W[0] - Rs2.W[0];
  20643. * ~~~
  20644. *
  20645. * \param [in] a unsigned long long type of value stored in a
  20646. * \param [in] b unsigned long long type of value stored in b
  20647. * \return value stored in unsigned long long type
  20648. */
  20649. __STATIC_FORCEINLINE unsigned long long __RV_DSTAS32(unsigned long long a, unsigned long long b)
  20650. {
  20651. unsigned long long result;
  20652. __ASM volatile("DSTAS32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20653. return result;
  20654. }
  20655. /* ===== Inline Function End for DSTAS32 ===== */
  20656. /* ===== Inline Function Start for DKCRSA32 ===== */
  20657. /**
  20658. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20659. * \brief DKCRSA32 (32-bit Signed Saturating Cross Subtraction & Addition)
  20660. * \details
  20661. * **Type**: SIMD
  20662. *
  20663. * **Syntax**:\n
  20664. * ~~~
  20665. * DKCRSA32 Rd, Rs1, Rs2
  20666. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20667. * ~~~
  20668. *
  20669. * **Purpose**:\n
  20670. * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element saturating addition in a 64-bit
  20671. * chunk simultaneously. Operands are from crossed 32-bit elements.
  20672. *
  20673. * **Description**:\n
  20674. * This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [63:32] of Rs1; at
  20675. * the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2. If any
  20676. * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is
  20677. * set to 1. The saturated results are written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
  20678. *
  20679. * **Operations**:\n
  20680. * ~~~
  20681. * res[1] = Rs1.W[1] - Rs2.W[0];
  20682. * res[0] = Rs1.W[0] + Rs2.W[1];
  20683. * if (res[x] > (2^31)-1) {
  20684. * res[x] = (2^31)-1;
  20685. * OV = 1;
  20686. * } else if (res < -2^31) {
  20687. * res[x] = -2^31;
  20688. * OV = 1;
  20689. * }
  20690. * Rd.W[1] = res[1];
  20691. * Rd.W[0] = res[0];
  20692. * ~~~
  20693. *
  20694. * \param [in] a unsigned long long type of value stored in a
  20695. * \param [in] b unsigned long long type of value stored in b
  20696. * \return value stored in unsigned long long type
  20697. */
  20698. __STATIC_FORCEINLINE unsigned long long __RV_DKCRSA32(unsigned long long a, unsigned long long b)
  20699. {
  20700. unsigned long long result;
  20701. __ASM volatile("dkcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20702. return result;
  20703. }
  20704. /* ===== Inline Function End for DKCRSA32 ===== */
  20705. /* ===== Inline Function Start for DKCRAS32 ===== */
  20706. /**
  20707. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20708. * \brief DKCRAS32 (32-bit Signed Saturating Cross Addition & Subtraction)
  20709. * \details
  20710. * **Type**: SIMD
  20711. *
  20712. * **Syntax**:\n
  20713. * ~~~
  20714. * DKCRAS32 Rd, Rs1, Rs2
  20715. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20716. * ~~~
  20717. *
  20718. * **Purpose**:\n
  20719. * Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element saturating addition in a 64-bit
  20720. * chunk simultaneously. Operands are from crossed 32-bit elements.
  20721. *
  20722. * **Description**:\n
  20723. * This instruction adds the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [63:32] of Rs1; at the
  20724. * same time, it subtracts the 32-bit integer element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2. If any
  20725. * of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is
  20726. * set to 1. The saturated results are written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.
  20727. *
  20728. * **Operations**:\n
  20729. * ~~~
  20730. * res[1] = Rs1.W[1] + Rs2.W[0];
  20731. * res[0] = Rs1.W[0] - Rs2.W[1];
  20732. * if (res[x] > (2^31)-1) {
  20733. * res[x] = (2^31)-1;
  20734. * OV = 1;
  20735. * } else if (res < -2^31) {
  20736. * res[x] = -2^31;
  20737. * OV = 1;
  20738. * }
  20739. * Rd.W[1] = res[1];
  20740. * Rd.W[0] = res[0];
  20741. * ~~~
  20742. *
  20743. * \param [in] a unsigned long long type of value stored in a
  20744. * \param [in] b unsigned long long type of value stored in b
  20745. * \return value stored in unsigned long long type
  20746. */
  20747. __STATIC_FORCEINLINE unsigned long long __RV_DKCRAS32(unsigned long long a, unsigned long long b)
  20748. {
  20749. unsigned long long result;
  20750. __ASM volatile("dkcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20751. return result;
  20752. }
  20753. /* ===== Inline Function End for DKCRAS32 ===== */
  20754. /* ===== Inline Function Start for DCRSA32 ===== */
  20755. /**
  20756. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20757. * \brief DCRSA32 (32-bit Cross Subtraction & Addition)
  20758. * \details
  20759. * **Type**: SIMD
  20760. *
  20761. * **Syntax**:\n
  20762. * ~~~
  20763. * DCRSA32 Rd, Rs1, Rs2
  20764. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20765. * ~~~
  20766. *
  20767. * **Purpose**:\n
  20768. * Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit chunk simultaneously. Operands are
  20769. * from crossed 32-bit elements.
  20770. *
  20771. * **Description**:\n
  20772. * This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and
  20773. * writes the result to [63:32] of Rd; at the same time, it subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit
  20774. * integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.
  20775. *
  20776. * **Operations**:\n
  20777. * ~~~
  20778. * res[1] = Rs1.W[1] - Rs2.W[0];
  20779. * res[0] = Rs1.W[0] + Rs2.W[1];
  20780. * ~~~
  20781. *
  20782. * \param [in] a unsigned long long type of value stored in a
  20783. * \param [in] b unsigned long long type of value stored in b
  20784. * \return value stored in unsigned long long type
  20785. */
  20786. __STATIC_FORCEINLINE unsigned long long __RV_DCRSA32(unsigned long long a, unsigned long long b)
  20787. {
  20788. unsigned long long result;
  20789. __ASM volatile("dcrsa32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20790. return result;
  20791. }
  20792. /* ===== Inline Function End for DCRSA32 ===== */
  20793. /* ===== Inline Function Start for DCRAS32 ===== */
  20794. /**
  20795. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20796. * \brief DCRAS32 (32-bit Cross Addition & Subtraction)
  20797. * \details
  20798. * **Type**: SIMD
  20799. *
  20800. * **Syntax**:\n
  20801. * ~~~
  20802. * DCRAS32 Rd, Rs1, Rs2
  20803. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20804. * ~~~
  20805. *
  20806. * **Purpose**:\n
  20807. * Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit chunk simultaneously. Operands are
  20808. * from crossed 32-bit elements.
  20809. *
  20810. * **Description**:\n
  20811. * This instruction subtracts the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and
  20812. * writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer element in [63:32] of Rs2 from the 32-bit
  20813. * integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.
  20814. *
  20815. * **Operations**:\n
  20816. * ~~~
  20817. * res[1] = Rs1.W[1] - Rs2.W[0];
  20818. * res[0] = Rs1.W[0] + Rs2.W[1];
  20819. * ~~~
  20820. *
  20821. * \param [in] a unsigned long long type of value stored in a
  20822. * \param [in] b unsigned long long type of value stored in b
  20823. * \return value stored in unsigned long long type
  20824. */
  20825. __STATIC_FORCEINLINE unsigned long long __RV_DCRAS32(unsigned long long a, unsigned long long b)
  20826. {
  20827. unsigned long long result;
  20828. __ASM volatile("dcras32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20829. return result;
  20830. }
  20831. /* ===== Inline Function End for DCRAS32 ===== */
  20832. /* ===== Inline Function Start for DKSTSA16 ===== */
  20833. /**
  20834. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20835. * \brief DKSTSA16 (16-bit Signed Saturating Straight Subtraction & Addition)
  20836. * \details
  20837. * **Type**: SIMD
  20838. *
  20839. * **Syntax**:\n
  20840. * ~~~
  20841. * DKSTSA16 Rd, Rs1, Rs2
  20842. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20843. * ~~~
  20844. *
  20845. * **Purpose**:\n
  20846. * Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element saturating addition in a 32-bit
  20847. * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
  20848. *
  20849. * **Description**:\n
  20850. * This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer
  20851. * element in [31:16] of 32-bit chunks in Rs1; at the same time, it adds the 16-bit signed integer element in [15:0] of 32-bit
  20852. * chunks in Rs2 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
  20853. * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
  20854. * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks
  20855. * in Rd for addition.
  20856. *
  20857. * **Operations**:\n
  20858. * ~~~
  20859. * res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
  20860. * res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
  20861. * for (res in [res1, res2]) {
  20862. * if (res > (2^15)-1) {
  20863. * res = (2^15)-1;
  20864. * OV = 1;
  20865. * } else if (res < -2^15) {
  20866. * res = -2^15;
  20867. * OV = 1;
  20868. * }
  20869. * }
  20870. * Rd.W[x][31:16] = res1;
  20871. * Rd.W[x][15:0] = res2;
  20872. * x=1...0
  20873. * ~~~
  20874. *
  20875. * \param [in] a unsigned long long type of value stored in a
  20876. * \param [in] b unsigned long long type of value stored in b
  20877. * \return value stored in unsigned long long type
  20878. */
  20879. __STATIC_FORCEINLINE unsigned long long __RV_DKSTSA16(unsigned long long a, unsigned long long b)
  20880. {
  20881. unsigned long long result;
  20882. __ASM volatile("dkstsa16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20883. return result;
  20884. }
  20885. /* ===== Inline Function End for DKSTSA16 ===== */
  20886. /* ===== Inline Function Start for DKSTAS16 ===== */
  20887. /**
  20888. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20889. * \brief DKSTAS16 (16-bit Signed Saturating Straight Addition & Subtraction)
  20890. * \details
  20891. * **Type**: SIMD
  20892. *
  20893. * **Syntax**:\n
  20894. * ~~~
  20895. * DKSTAS16 Rd, Rs1, Rs2
  20896. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  20897. * ~~~
  20898. *
  20899. * **Purpose**:\n
  20900. * Do 16-bit signed integer element saturating addition and 16-bit signed integer element saturating subtraction in a 32-bit
  20901. * chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.
  20902. *
  20903. * **Description**:\n
  20904. * This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer
  20905. * element in [31:16] of 32-bit chunks in Rs2; at the same time, it subtracts the 16-bit signed integer element in [15:0] of
  20906. * 32-bit chunks in Rs2 from the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1.
  20907. * If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV
  20908. * bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks
  20909. * in Rd for addition.
  20910. *
  20911. * **Operations**:\n
  20912. * ~~~
  20913. * res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
  20914. * res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
  20915. * for (res in [res1, res2]) {
  20916. * if (res > (2^15)-1) {
  20917. * res = (2^15)-1;
  20918. * OV = 1;
  20919. * } else if (res < -2^15) {
  20920. * res = -2^15;
  20921. * OV = 1;
  20922. * }
  20923. * }
  20924. * Rd.W[x][31:16] = res1;
  20925. * Rd.W[x][15:0] = res2;
  20926. * x=1...0
  20927. * ~~~
  20928. *
  20929. * \param [in] a unsigned long long type of value stored in a
  20930. * \param [in] b unsigned long long type of value stored in b
  20931. * \return value stored in unsigned long long type
  20932. */
  20933. __STATIC_FORCEINLINE unsigned long long __RV_DKSTAS16(unsigned long long a, unsigned long long b)
  20934. {
  20935. unsigned long long result;
  20936. __ASM volatile("dkstas16 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  20937. return result;
  20938. }
  20939. /* ===== Inline Function End for DKSTAS16 ===== */
  20940. /* ===== Inline Function Start for DSCLIP8 ===== */
  20941. /**
  20942. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20943. * \brief DSCLIP8 (8-bit Signed Saturation and Clip)
  20944. * \details
  20945. * **Type**: SIMD
  20946. *
  20947. * **Syntax**:\n
  20948. * ~~~
  20949. * DSCLIP8 Rd, Rs1, imm3u[2:0]
  20950. * # Rd, Rs1 are all even/odd pair of registers
  20951. * ~~~
  20952. *
  20953. * **Purpose**:\n
  20954. * Limit the 8-bit signed integer elements of a register into a signed range simultaneously.
  20955. *
  20956. * **Description**:\n
  20957. * This instruction limits the 8-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm3u and
  20958. * 2^imm3u-1, and writes the limited results to Rd. For example, if imm3u is 3, the 8-bit input values should be saturated
  20959. * between 7 and -8. If saturation is performed, set OV bit to 1.
  20960. *
  20961. * **Operations**:\n
  20962. * ~~~
  20963. * src = Rs1.B[x];
  20964. * if (src > (2^imm3u)-1) {
  20965. * src = (2^imm3u)-1;
  20966. * OV = 1;
  20967. * } else if (src < -2^imm3u) {
  20968. * src = -2^imm3u;
  20969. * OV = 1;
  20970. * }
  20971. * Rd.B[x] = src
  20972. * x=7...0
  20973. * ~~~
  20974. *
  20975. * \param [in] a unsigned long long type of value stored in a
  20976. * \param [in] b unsigned long long type of value stored in b
  20977. * \return value stored in unsigned long long type
  20978. */
  20979. #define __RV_DSCLIP8(a, b) \
  20980. ({ \
  20981. unsigned long long result; \
  20982. unsigned long long __a = (unsigned long long)(a); \
  20983. __ASM volatile("dsclip8 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  20984. result; \
  20985. })
  20986. /* ===== Inline Function End for DSCLIP8 ===== */
  20987. /* ===== Inline Function Start for DSCLIP16 ===== */
  20988. /**
  20989. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  20990. * \brief DSCLIP16 (16-bit Signed Saturation and Clip)
  20991. * \details
  20992. * **Type**: SIMD
  20993. *
  20994. * **Syntax**:\n
  20995. * ~~~
  20996. * DSCLIP16 Rd, Rs1, imm4u[3:0]
  20997. * # Rd, Rs1 are all even/odd pair of registers
  20998. * ~~~
  20999. *
  21000. * **Purpose**:\n
  21001. * Limit the 16-bit signed integer elements of a register into a signed range simultaneously.
  21002. *
  21003. * **Description**:\n
  21004. * This instruction limits the 16-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm4u and
  21005. * 2^imm4u-1, and writes the limited results to Rd. For example, if imm4u is 3, the 32-bit input values should be saturated
  21006. * between 7 and -8. If saturation is performed, set OV bit to 1.
  21007. *
  21008. * **Operations**:\n
  21009. * ~~~
  21010. * src = Rs1.H[x];
  21011. * if (src > (2^imm4u)-1) {
  21012. * src = (2^imm4u)-1;
  21013. * OV = 1;
  21014. * } else if (src < -2^imm4u) {
  21015. * src = -2^imm4u;
  21016. * OV = 1;
  21017. * }
  21018. * Rd.H[x] = src
  21019. * x=3...0
  21020. * ~~~
  21021. *
  21022. * \param [in] a unsigned long long type of value stored in a
  21023. * \param [in] b unsigned long long type of value stored in b
  21024. * \return value stored in unsigned long long type
  21025. */
  21026. #define __RV_DSCLIP16(a, b) \
  21027. ({ \
  21028. unsigned long long result; \
  21029. unsigned long long __a = (unsigned long long)(a); \
  21030. __ASM volatile("dsclip16 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  21031. result; \
  21032. })
  21033. /* ===== Inline Function End for DSCLIP16 ===== */
  21034. /* ===== Inline Function Start for DSCLIP32 ===== */
  21035. /**
  21036. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21037. * \brief DSCLIP32 (32-bit Signed Saturation and Clip)
  21038. * \details
  21039. * **Type**: SIMD
  21040. *
  21041. * **Syntax**:\n
  21042. * ~~~
  21043. * DSCLIP32 Rd, Rs1, imm5u[4:0]
  21044. * # Rd, Rs1 are all even/odd pair of registers
  21045. * ~~~
  21046. *
  21047. * **Purpose**:\n
  21048. * Limit the 32-bit signed integer elements of a register into a signed range simultaneously.
  21049. *
  21050. * **Description**:\n
  21051. * This instruction limits the 32-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm5u and
  21052. * 2^imm5u-1, and writes the limited results to Rd. For example, if imm5u is 3, the 32-bit input values should be saturated
  21053. * between 7 and -8. If saturation is performed, set OV bit to 1.
  21054. *
  21055. * **Operations**:\n
  21056. * ~~~
  21057. * src = Rs1.W[x];
  21058. * if (src > (2^imm5u)-1) {
  21059. * src = (2^imm5u)-1;
  21060. * OV = 1;
  21061. * } else if (src < -2^imm5u) {
  21062. * src = -2^imm5u;
  21063. * OV = 1;
  21064. * }
  21065. * Rd.W[x] = src
  21066. * x=1...0
  21067. * ~~~
  21068. *
  21069. * \param [in] a unsigned long long type of value stored in a
  21070. * \param [in] b unsigned long long type of value stored in b
  21071. * \return value stored in unsigned long long type
  21072. */
  21073. #define __RV_DSCLIP32(a, b) \
  21074. ({ \
  21075. unsigned long long result; \
  21076. unsigned long long __a = (unsigned long long)(a); \
  21077. __ASM volatile("dsclip32 %0, %1, %2" : "=r"(result) : "r"(__a), "K"(b)); \
  21078. result; \
  21079. })
  21080. /* ===== Inline Function End for DSCLIP32 ===== */
  21081. /* ===== Inline Function Start for DRSUB32 ===== */
  21082. /**
  21083. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21084. * \brief DRSUB32 (32-bit Signed Halving Subtraction)
  21085. * \details
  21086. * **Type**: SIMD
  21087. *
  21088. * **Syntax**:\n
  21089. * ~~~
  21090. * DRSUB32 Rd, Rs1, Rs2
  21091. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  21092. * ~~~
  21093. *
  21094. * **Purpose**:\n
  21095. * Do 32-bit signed integer element subtractions simultaneously. The results are halved to avoid overflow or saturation.
  21096. *
  21097. * **Description**:\n
  21098. * This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1. The
  21099. * results are first arithmetically right-shifted by 1 bit and then written to Rd.
  21100. *
  21101. * **Operations**:\n
  21102. * ~~~
  21103. * Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) s>> 1;
  21104. * x=1...0
  21105. * ~~~
  21106. *
  21107. * \param [in] a unsigned long long type of value stored in a
  21108. * \param [in] b unsigned long long type of value stored in b
  21109. * \return value stored in unsigned long long type
  21110. */
  21111. __STATIC_FORCEINLINE unsigned long long __RV_DRSUB32(unsigned long long a, unsigned long long b)
  21112. {
  21113. unsigned long long result;
  21114. __ASM volatile("drsub32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  21115. return result;
  21116. }
  21117. /* ===== Inline Function End for DRSUB32 ===== */
  21118. /* ===== Inline Function Start for DPACK32 ===== */
  21119. /**
  21120. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21121. * \brief DPACK32 (SIMD Pack Two 32-bit Data To 64-bit)
  21122. * \details
  21123. * **Type**: SIMD
  21124. *
  21125. * **Syntax**:\n
  21126. * ~~~
  21127. * DPACK32 Rd, Rs1, Rs2
  21128. * # Rd is even/odd pair of register
  21129. * ~~~
  21130. *
  21131. * **Purpose**:\n
  21132. * Pack two 32-bit datas which from two registers into a 64-bit data.
  21133. *
  21134. * **Description**:\n
  21135. * This instruction moves 32-bit Rs1 to Rd.W[1] and moves 32-bit Rs2 to Rd.W[0].
  21136. *
  21137. * **Operations**:\n
  21138. * ~~~
  21139. * Rd = CONCAT(Rs1.W , Rs2.W);
  21140. * ~~~
  21141. *
  21142. * \param [in] a signed long type of value stored in a
  21143. * \param [in] b signed long type of value stored in b
  21144. * \return value stored in unsigned long long type
  21145. */
  21146. __STATIC_FORCEINLINE unsigned long long __RV_DPACK32(signed long a, signed long b)
  21147. {
  21148. unsigned long long result;
  21149. __ASM volatile("dpack32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  21150. return result;
  21151. }
  21152. /* ===== Inline Function End for DPACK32 ===== */
  21153. /* ===== Inline Function Start for DSUNPKD810 ===== */
  21154. /**
  21155. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21156. * \brief DSUNPKD810 (Signed Unpacking Bytes 1 & 0)
  21157. * \details
  21158. * **Type**: SIMD
  21159. *
  21160. * **Syntax**:\n
  21161. * ~~~
  21162. * DSUNPKD810 Rd, Rs1
  21163. * # Rd, Rs1 are all even/odd pair of registers
  21164. * ~~~
  21165. *
  21166. * **Purpose**:\n
  21167. * Unpack byte 1 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
  21168. *
  21169. * **Description**:\n
  21170. * For the `DSUNPKD810` instruction, it unpacks byte 1 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
  21171. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21172. *
  21173. * **Operations**:\n
  21174. * ~~~
  21175. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[1])
  21176. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
  21177. * ~~~
  21178. *
  21179. * \param [in] a unsigned long long type of value stored in a
  21180. * \return value stored in unsigned long long type
  21181. */
  21182. __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD810(unsigned long long a)
  21183. {
  21184. unsigned long long result;
  21185. __ASM volatile("dsunpkd810 %0, %1" : "=r"(result) : "r"(a));
  21186. return result;
  21187. }
  21188. /* ===== Inline Function End for DSUNPKD810 ===== */
  21189. /* ===== Inline Function Start for DSUNPKD820 ===== */
  21190. /**
  21191. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21192. * \brief DSUNPKD820 (Signed Unpacking Bytes 2 & 0)
  21193. * \details
  21194. * **Type**: SIMD
  21195. *
  21196. * **Syntax**:\n
  21197. * ~~~
  21198. * DSUNPKD820 Rd, Rs1
  21199. * # Rd, Rs1 are all even/odd pair of registers
  21200. * ~~~
  21201. *
  21202. * **Purpose**:\n
  21203. * Unpack byte 2 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
  21204. *
  21205. * **Description**:\n
  21206. * For the `DSUNPKD820` instruction, it unpacks byte 2 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
  21207. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21208. *
  21209. * **Operations**:\n
  21210. * ~~~
  21211. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[2])
  21212. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
  21213. * ~~~
  21214. *
  21215. * \param [in] a unsigned long long type of value stored in a
  21216. * \return value stored in unsigned long long type
  21217. */
  21218. __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD820(unsigned long long a)
  21219. {
  21220. unsigned long long result;
  21221. __ASM volatile("dsunpkd820 %0, %1" : "=r"(result) : "r"(a));
  21222. return result;
  21223. }
  21224. /* ===== Inline Function End for DSUNPKD820 ===== */
  21225. /* ===== Inline Function Start for DSUNPKD830 ===== */
  21226. /**
  21227. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21228. * \brief DSUNPKD830 (Signed Unpacking Bytes 3 & 0)
  21229. * \details
  21230. * **Type**: SIMD
  21231. *
  21232. * **Syntax**:\n
  21233. * ~~~
  21234. * DSUNPKD830 Rd, Rs1
  21235. * # Rd, Rs1 are all even/odd pair of registers
  21236. * ~~~
  21237. *
  21238. * **Purpose**:\n
  21239. * Unpack byte 3 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
  21240. *
  21241. * **Description**:\n
  21242. * For the `DSUNPKD830` instruction, it unpacks byte 3 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
  21243. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21244. *
  21245. * **Operations**:\n
  21246. * ~~~
  21247. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
  21248. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
  21249. * ~~~
  21250. *
  21251. * \param [in] a unsigned long long type of value stored in a
  21252. * \return value stored in unsigned long long type
  21253. */
  21254. __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD830(unsigned long long a)
  21255. {
  21256. unsigned long long result;
  21257. __ASM volatile("dsunpkd830 %0, %1" : "=r"(result) : "r"(a));
  21258. return result;
  21259. }
  21260. /* ===== Inline Function End for DSUNPKD830 ===== */
  21261. /* ===== Inline Function Start for DSUNPKD831 ===== */
  21262. /**
  21263. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21264. * \brief DSUNPKD831 (Signed Unpacking Bytes 3 & 1)
  21265. * \details
  21266. * **Type**: SIMD
  21267. *
  21268. * **Syntax**:\n
  21269. * ~~~
  21270. * DSUNPKD831 Rd, Rs1
  21271. * # Rd, Rs1 are all even/odd pair of registers
  21272. * ~~~
  21273. *
  21274. * **Purpose**:\n
  21275. * Unpack byte 3 and byte 1 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
  21276. *
  21277. * **Description**:\n
  21278. * For the `DSUNPKD831` instruction, it unpacks byte 3 and byte 1 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
  21279. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21280. *
  21281. * **Operations**:\n
  21282. * ~~~
  21283. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
  21284. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[1])
  21285. * ~~~
  21286. *
  21287. * \param [in] a unsigned long long type of value stored in a
  21288. * \return value stored in unsigned long long type
  21289. */
  21290. __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD831(unsigned long long a)
  21291. {
  21292. unsigned long long result;
  21293. __ASM volatile("dsunpkd831 %0, %1" : "=r"(result) : "r"(a));
  21294. return result;
  21295. }
  21296. /* ===== Inline Function End for DSUNPKD831 ===== */
  21297. /* ===== Inline Function Start for DSUNPKD832 ===== */
  21298. /**
  21299. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21300. * \brief DSUNPKD832 (Signed Unpacking Bytes 3 & 2)
  21301. * \details
  21302. * **Type**: SIMD
  21303. *
  21304. * **Syntax**:\n
  21305. * ~~~
  21306. * DSUNPKD832 Rd, Rs1
  21307. * # Rd, Rs1 are all even/odd pair of registers
  21308. * ~~~
  21309. *
  21310. * **Purpose**:\n
  21311. * Unpack byte 3 and byte 2 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.
  21312. *
  21313. * **Description**:\n
  21314. * For the `DSUNPKD832` instruction, it unpacks byte 3 and byte 2 of 32-bit chunks in Rs1 into two 16-bit signed halfwords
  21315. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21316. *
  21317. * **Operations**:\n
  21318. * ~~~
  21319. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
  21320. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[2])
  21321. * ~~~
  21322. *
  21323. * \param [in] a unsigned long long type of value stored in a
  21324. * \return value stored in unsigned long long type
  21325. */
  21326. __STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD832(unsigned long long a)
  21327. {
  21328. unsigned long long result;
  21329. __ASM volatile("dsunpkd832 %0, %1" : "=r"(result) : "r"(a));
  21330. return result;
  21331. }
  21332. /* ===== Inline Function End for DSUNPKD832 ===== */
  21333. /* ===== Inline Function Start for DZUNPKD810 ===== */
  21334. /**
  21335. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21336. * \brief DZUNPKD810 (UnSigned Unpacking Bytes 1 & 0)
  21337. * \details
  21338. * **Type**: SIMD
  21339. *
  21340. * **Syntax**:\n
  21341. * ~~~
  21342. * DZUNPKD810 Rd, Rs1
  21343. * # Rd, Rs1 are all even/odd pair of registers
  21344. * ~~~
  21345. *
  21346. * **Purpose**:\n
  21347. * Unpack byte 1 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
  21348. *
  21349. * **Description**:\n
  21350. * For the `DZUNPKD810` instruction, it unpacks byte 1 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
  21351. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21352. *
  21353. * **Operations**:\n
  21354. * ~~~
  21355. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[1])
  21356. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
  21357. * ~~~
  21358. *
  21359. * \param [in] a unsigned long long type of value stored in a
  21360. * \return value stored in unsigned long long type
  21361. */
  21362. __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD810(unsigned long long a)
  21363. {
  21364. unsigned long long result;
  21365. __ASM volatile("dzunpkd810 %0, %1" : "=r"(result) : "r"(a));
  21366. return result;
  21367. }
  21368. /* ===== Inline Function End for DZUNPKD810 ===== */
  21369. /* ===== Inline Function Start for DZUNPKD820 ===== */
  21370. /**
  21371. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21372. * \brief DZUNPKD820 (UnSigned Unpacking Bytes 2 & 0)
  21373. * \details
  21374. * **Type**: SIMD
  21375. *
  21376. * **Syntax**:\n
  21377. * ~~~
  21378. * DZUNPKD820 Rd, Rs1
  21379. * # Rd, Rs1 are all even/odd pair of registers
  21380. * ~~~
  21381. *
  21382. * **Purpose**:\n
  21383. * Unpack byte 2 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
  21384. *
  21385. * **Description**:\n
  21386. * For the `DZUNPKD820` instruction, it unpacks byte 2 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
  21387. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21388. *
  21389. * **Operations**:\n
  21390. * ~~~
  21391. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[2])
  21392. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
  21393. * ~~~
  21394. *
  21395. * \param [in] a unsigned long long type of value stored in a
  21396. * \return value stored in unsigned long long type
  21397. */
  21398. __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD820(unsigned long long a)
  21399. {
  21400. unsigned long long result;
  21401. __ASM volatile("dzunpkd820 %0, %1" : "=r"(result) : "r"(a));
  21402. return result;
  21403. }
  21404. /* ===== Inline Function End for DZUNPKD820 ===== */
  21405. /* ===== Inline Function Start for DZUNPKD830 ===== */
  21406. /**
  21407. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21408. * \brief DZUNPKD830 (UnSigned Unpacking Bytes 3 & 0)
  21409. * \details
  21410. * **Type**: SIMD
  21411. *
  21412. * **Syntax**:\n
  21413. * ~~~
  21414. * DZUNPKD830 Rd, Rs1
  21415. * # Rd, Rs1 are all even/odd pair of registers
  21416. * ~~~
  21417. *
  21418. * **Purpose**:\n
  21419. * Unpack byte 3 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
  21420. *
  21421. * **Description**:\n
  21422. * For the `DZUNPKD830` instruction, it unpacks byte 3 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
  21423. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21424. *
  21425. * **Operations**:\n
  21426. * ~~~
  21427. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
  21428. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])
  21429. * ~~~
  21430. *
  21431. * \param [in] a unsigned long long type of value stored in a
  21432. * \return value stored in unsigned long long type
  21433. */
  21434. __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD830(unsigned long long a)
  21435. {
  21436. unsigned long long result;
  21437. __ASM volatile("dzunpkd830 %0, %1" : "=r"(result) : "r"(a));
  21438. return result;
  21439. }
  21440. /* ===== Inline Function End for DZUNPKD830 ===== */
  21441. /* ===== Inline Function Start for DZUNPKD831 ===== */
  21442. /**
  21443. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21444. * \brief DZUNPKD831 (UnSigned Unpacking Bytes 3 & 1)
  21445. * \details
  21446. * **Type**: SIMD
  21447. *
  21448. * **Syntax**:\n
  21449. * ~~~
  21450. * DZUNPKD831 Rd, Rs1
  21451. * # Rd, Rs1 are all even/odd pair of registers
  21452. * ~~~
  21453. *
  21454. * **Purpose**:\n
  21455. * Unpack byte 3 and byte 1 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
  21456. *
  21457. * **Description**:\n
  21458. * For the `DZUNPKD831` instruction, it unpacks byte 3 and byte 1 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
  21459. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21460. *
  21461. * **Operations**:\n
  21462. * ~~~
  21463. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
  21464. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[1])
  21465. * ~~~
  21466. *
  21467. * \param [in] a unsigned long long type of value stored in a
  21468. * \return value stored in unsigned long long type
  21469. */
  21470. __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD831(unsigned long long a)
  21471. {
  21472. unsigned long long result;
  21473. __ASM volatile("dzunpkd831 %0, %1" : "=r"(result) : "r"(a));
  21474. return result;
  21475. }
  21476. /* ===== Inline Function End for DZUNPKD831 ===== */
  21477. /* ===== Inline Function Start for DZUNPKD832 ===== */
  21478. /**
  21479. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N2
  21480. * \brief DZUNPKD832 (UnSigned Unpacking Bytes 3 & 2)
  21481. * \details
  21482. * **Type**: SIMD
  21483. *
  21484. * **Syntax**:\n
  21485. * ~~~
  21486. * DZUNPKD832 Rd, Rs1
  21487. * # Rd, Rs1 are all even/odd pair of registers
  21488. * ~~~
  21489. *
  21490. * **Purpose**:\n
  21491. * Unpack byte 3 and byte 2 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.
  21492. *
  21493. * **Description**:\n
  21494. * For the `DZUNPKD832` instruction, it unpacks byte 3 and byte 2 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords
  21495. * and writes the results to the top part and the bottom part of 32-bit chunks in Rd.
  21496. *
  21497. * **Operations**:\n
  21498. * ~~~
  21499. * Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
  21500. * Rd.W[m].H[0] = SE16(Rs1.W[m].B[2])
  21501. * ~~~
  21502. *
  21503. * \param [in] a unsigned long long type of value stored in a
  21504. * \return value stored in unsigned long long type
  21505. */
  21506. __STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD832(unsigned long long a)
  21507. {
  21508. unsigned long long result;
  21509. __ASM volatile("dzunpkd832 %0, %1" : "=r"(result) : "r"(a));
  21510. return result;
  21511. }
  21512. /* ===== Inline Function End for DZUNPKD832 ===== */
  21513. /* ===== Inline Function Start for DKMMAC ===== */
  21514. /**
  21515. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21516. * \brief DKMMAC (64-bit MSW 32x32 Signed Multiply and Saturating Add)
  21517. * \details
  21518. * **Type**: SIMD
  21519. *
  21520. * **Syntax**:\n
  21521. * ~~~
  21522. * DKMMAC Rd, Rs1, Rs2
  21523. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  21524. * ~~~
  21525. *
  21526. * **Purpose**:\n
  21527. * Do MSW 32x32 element signed multiplications and saturating addition simultaneously. The results are written into Rd.
  21528. *
  21529. * **Description**:\n
  21530. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  21531. * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
  21532. * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
  21533. * and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
  21534. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  21535. * adding a 1 to bit 31 of the results.
  21536. *
  21537. * **Operations**:\n
  21538. * ~~~
  21539. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21540. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21541. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21542. * res = sat.q31(dop + (aop s* bop)[63:32]);
  21543. * }
  21544. * Rd = concat(rest, resb);
  21545. * x=0
  21546. * ~~~
  21547. *
  21548. * \param [in] t unsigned long long type of value stored in t
  21549. * \param [in] a unsigned long long type of value stored in a
  21550. * \param [in] b unsigned long long type of value stored in b
  21551. * \return value stored in unsigned long long type
  21552. */
  21553. __STATIC_FORCEINLINE unsigned long long __RV_DKMMAC(unsigned long long t, unsigned long long a, unsigned long long b)
  21554. {
  21555. __ASM volatile("dkmmac %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21556. return t;
  21557. }
  21558. /* ===== Inline Function End for DKMMAC ===== */
  21559. /* ===== Inline Function Start for DKMMAC.u ===== */
  21560. /**
  21561. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21562. * \brief DKMMAC.u (64-bit MSW 32x32 Unsigned Multiply and Saturating Add)
  21563. * \details
  21564. * **Type**: SIMD
  21565. *
  21566. * **Syntax**:\n
  21567. * ~~~
  21568. * DKMMAC.u Rd, Rs1, Rs2
  21569. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  21570. * ~~~
  21571. *
  21572. * **Purpose**:\n
  21573. * Do MSW 32x32 element unsigned multiplications and saturating addition simultaneously. The results are written into Rd.
  21574. *
  21575. * **Description**:\n
  21576. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  21577. * and adds the most significant 32-bit multiplication results with the signed 32-bit elements of Rd. If
  21578. * the addition result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the range
  21579. * and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
  21580. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  21581. * adding a 1 to bit 31 of the results.
  21582. *
  21583. * **Operations**:\n
  21584. * ~~~
  21585. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21586. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21587. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21588. * res = sat.q31(dop + RUND(aop u* bop)[63:32]);
  21589. * }
  21590. * Rd = concat(rest, resb);
  21591. * x=0
  21592. * ~~~
  21593. *
  21594. * \param [in] t unsigned long long type of value stored in t
  21595. * \param [in] a unsigned long long type of value stored in a
  21596. * \param [in] b unsigned long long type of value stored in b
  21597. * \return value stored in unsigned long long type
  21598. */
  21599. __STATIC_FORCEINLINE unsigned long long __RV_DKMMAC_U(unsigned long long t, unsigned long long a, unsigned long long b)
  21600. {
  21601. __ASM volatile("dkmmac.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21602. return t;
  21603. }
  21604. /* ===== Inline Function End for DKMMAC.u ===== */
  21605. /* ===== Inline Function Start for DKMMSB ===== */
  21606. /**
  21607. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21608. * \brief DKMMSB (64-bit MSW 32x32 Signed Multiply and Saturating Sub)
  21609. * \details
  21610. * **Type**: SIMD
  21611. *
  21612. * **Syntax**:\n
  21613. * ~~~
  21614. * DKMMSB Rd, Rs1, Rs2
  21615. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  21616. * ~~~
  21617. *
  21618. * **Purpose**:\n
  21619. * Do MSW 32x32 element signed multiplications and saturating subtraction simultaneously. The results are written into Rd.
  21620. *
  21621. * **Description**:\n
  21622. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  21623. * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
  21624. * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
  21625. * range and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
  21626. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  21627. * adding a 1 to bit 31 of the results.
  21628. *
  21629. * **Operations**:\n
  21630. * ~~~
  21631. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21632. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21633. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21634. * res = sat.q31(dop - (aop s* bop)[63:32]);
  21635. * }
  21636. * Rd = concat(rest, resb);
  21637. * x=0
  21638. * ~~~
  21639. *
  21640. * \param [in] t unsigned long long type of value stored in t
  21641. * \param [in] a unsigned long long type of value stored in a
  21642. * \param [in] b unsigned long long type of value stored in b
  21643. * \return value stored in unsigned long long type
  21644. */
  21645. __STATIC_FORCEINLINE unsigned long long __RV_DKMMSB(unsigned long long t, unsigned long long a, unsigned long long b)
  21646. {
  21647. __ASM volatile("dkmmsb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21648. return t;
  21649. }
  21650. /* ===== Inline Function End for DKMMSB ===== */
  21651. /* ===== Inline Function Start for DKMMSB.u ===== */
  21652. /**
  21653. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21654. * \brief DKMMSB.u (64-bit MSW 32x32 Unsigned Multiply and Saturating Sub)
  21655. * \details
  21656. * **Type**: SIMD
  21657. *
  21658. * **Syntax**:\n
  21659. * ~~~
  21660. * DKMMSB.u Rd, Rs1, Rs2
  21661. * # Rd, Rs1, Rs2 are all even/odd pair of registers
  21662. * ~~~
  21663. *
  21664. * **Purpose**:\n
  21665. * Do MSW 32x32 element unsigned multiplications and saturating subtraction simultaneously. The results are written into Rd.
  21666. *
  21667. * **Description**:\n
  21668. * This instruction multiplies the signed 32-bit elements of Rs1 with the signed 32-bit elements of Rs2
  21669. * and subtracts the most significant 32-bit multiplication results from the signed 32-bit elements of
  21670. * Rd. If the subtraction result is beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), it is saturated to the
  21671. * range and the OV bit is set to 1. The results after saturation are written to Rd. The .u form of the
  21672. * instruction additionally rounds up the most significant 32-bit of the 64-bit multiplication results by
  21673. * adding a 1 to bit 31 of the results.
  21674. *
  21675. * **Operations**:\n
  21676. * ~~~
  21677. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21678. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21679. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21680. * res = sat.q31(dop - (aop u* bop)[63:32]);
  21681. * }
  21682. * Rd = concat(rest, resb);
  21683. * x=0
  21684. * ~~~
  21685. *
  21686. * \param [in] t unsigned long long type of value stored in t
  21687. * \param [in] a unsigned long long type of value stored in a
  21688. * \param [in] b unsigned long long type of value stored in b
  21689. * \return value stored in unsigned long long type
  21690. */
  21691. __STATIC_FORCEINLINE unsigned long long __RV_DKMMSB_U(unsigned long long t, unsigned long long a, unsigned long long b)
  21692. {
  21693. __ASM volatile("dkmmsb.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21694. return t;
  21695. }
  21696. /* ===== Inline Function End for DKMMSB.u ===== */
  21697. /* ===== Inline Function Start for DKMADA ===== */
  21698. /**
  21699. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21700. * \brief DKMADA (Saturating Signed Multiply Two Halfs and Two Adds)
  21701. * \details
  21702. * **Type**: DSP
  21703. *
  21704. * **Syntax**:\n
  21705. * ~~~
  21706. * DKMADA Rd, Rs1, Rs2
  21707. * ~~~
  21708. *
  21709. * **Purpose**:\n
  21710. * Do two 16x16 with 32-bit signed double addition simultaneously. The results are written into Rd.
  21711. *
  21712. * **Description**:\n
  21713. * It multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  21714. * the bottom 16-bit content of 32-bit elements in Rs2 and then adds the result to the result of
  21715. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
  21716. * elements in Rs2.
  21717. *
  21718. * **Operations**:\n
  21719. * ~~~
  21720. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21721. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21722. *
  21723. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21724. * mul1 = aop.H[1] s* bop.H[1];
  21725. * mul2 = aop.H[0] s* bop.H[0];
  21726. * res = sat.q31(dop + mul1 + mul2);
  21727. * }
  21728. * Rd = concat(rest, resb);
  21729. * x=0
  21730. * ~~~
  21731. *
  21732. * \param [in] t unsigned long long type of value stored in t
  21733. * \param [in] a unsigned long long type of value stored in a
  21734. * \param [in] b unsigned long long type of value stored in b
  21735. * \return value stored in unsigned long long type
  21736. */
  21737. __STATIC_FORCEINLINE unsigned long long __RV_DKMADA(unsigned long long t, unsigned long long a, unsigned long long b)
  21738. {
  21739. __ASM volatile("dkmada %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21740. return t;
  21741. }
  21742. /* ===== Inline Function End for DKMADA ===== */
  21743. /* ===== Inline Function Start for DKMAXDA ===== */
  21744. /**
  21745. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21746. * \brief DKMAXDA (Two Cross 16x16 with 32-bit Signed Double Add)
  21747. * \details
  21748. * **Type**: DSP
  21749. *
  21750. * **Syntax**:\n
  21751. * ~~~
  21752. * DKMAXDA Rd, Rs1, Rs2
  21753. * ~~~
  21754. *
  21755. * **Purpose**:\n
  21756. * Do two cross 16x16 with 32-bit signed double addition simultaneously. The results are written into Rd.
  21757. *
  21758. * **Description**:\n
  21759. * It multiplies the top 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-bit
  21760. * elements in Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of
  21761. * 32-bit elements in Rs1 with the top 16-bit content of 32-bit elements in elements in Rs2.
  21762. *
  21763. * **Operations**:\n
  21764. * ~~~
  21765. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21766. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21767. *
  21768. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21769. * mul1 = aop.H[1] s* bop.H[0];
  21770. * mul2 = aop.H[0] s* bop.H[1];
  21771. * res = sat.q31(dop + mul1 + mul2);
  21772. * }
  21773. * Rd = concat(rest, resb);
  21774. * x=0
  21775. * ~~~
  21776. *
  21777. * \param [in] t unsigned long long type of value stored in t
  21778. * \param [in] a unsigned long long type of value stored in a
  21779. * \param [in] b unsigned long long type of value stored in b
  21780. * \return value stored in unsigned long long type
  21781. */
  21782. __STATIC_FORCEINLINE unsigned long long __RV_DKMAXDA(unsigned long long t, unsigned long long a, unsigned long long b)
  21783. {
  21784. __ASM volatile("dkmaxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21785. return t;
  21786. }
  21787. /* ===== Inline Function End for DKMAXDA ===== */
  21788. /* ===== Inline Function Start for DKMADS ===== */
  21789. /**
  21790. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21791. * \brief DKMADS (Two 16x16 with 32-bit Signed Add and Sub)
  21792. * \details
  21793. * **Type**: DSP
  21794. *
  21795. * **Syntax**:\n
  21796. * ~~~
  21797. * DKMADS Rd, Rs1, Rs2
  21798. * ~~~
  21799. *
  21800. * **Purpose**:\n
  21801. * Do two 16x16 with 32-bit signed addition and subtraction simultaneously. The results are written into Rd.
  21802. *
  21803. * **Description**:\n
  21804. * It multiplies the bottom 16-bit content of 32-bit elements in Rs1 with
  21805. * the bottom 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  21806. * multiplying the top 16-bit content of 32-bit elements in Rs1 with the top 16-bit content of 32-bit
  21807. * elements in Rs2.
  21808. *
  21809. * **Operations**:\n
  21810. * ~~~
  21811. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21812. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21813. *
  21814. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21815. * mul1 = aop.H[1] s* bop.H[1];
  21816. * mul2 = aop.H[0] s* bop.H[0];
  21817. * res = sat.q31(dop + mul1 - mul2);
  21818. * }
  21819. * Rd = concat(rest, resb);
  21820. * x=0
  21821. * ~~~
  21822. *
  21823. * \param [in] t unsigned long long type of value stored in t
  21824. * \param [in] a unsigned long long type of value stored in a
  21825. * \param [in] b unsigned long long type of value stored in b
  21826. * \return value stored in unsigned long long type
  21827. */
  21828. __STATIC_FORCEINLINE unsigned long long __RV_DKMADS(unsigned long long t, unsigned long long a, unsigned long long b)
  21829. {
  21830. __ASM volatile("dkmads %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21831. return t;
  21832. }
  21833. /* ===== Inline Function End for DKMADS ===== */
  21834. /* ===== Inline Function Start for DKMADRS ===== */
  21835. /**
  21836. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21837. * \brief DKMADRS (Two 16x16 with 32-bit Signed Add and Reversed Sub)
  21838. * \details
  21839. * **Type**: DSP
  21840. *
  21841. * **Syntax**:\n
  21842. * ~~~
  21843. * DKMADRS Rd, Rs1, Rs2
  21844. * ~~~
  21845. *
  21846. * **Purpose**:\n
  21847. * Do two 16x16 with 32-bit signed addition and revered subtraction simultaneously. The results are written into Rd.
  21848. *
  21849. * **Description**:\n
  21850. * it multiplies the top 16-bit content of 32-bit elements in Rs1 with the
  21851. * top 16-bit content of 32-bit elements in Rs2 and then subtracts the result from the result of
  21852. * multiplying the bottom 16-bit content of 32-bit elements in Rs1 with the bottom 16-bit content of 32-
  21853. * bit elements in Rs2
  21854. *
  21855. * **Operations**:\n
  21856. * ~~~
  21857. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21858. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21859. *
  21860. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21861. * mul1 = aop.H[1] s* bop.H[1];
  21862. * mul2 = aop.H[0] s* bop.H[0];
  21863. * res = sat.q31(dop - mul1 + mul2);
  21864. * }
  21865. * Rd = concat(rest, resb);
  21866. * x=0
  21867. * ~~~
  21868. *
  21869. * \param [in] t unsigned long long type of value stored in t
  21870. * \param [in] a unsigned long long type of value stored in a
  21871. * \param [in] b unsigned long long type of value stored in b
  21872. * \return value stored in unsigned long long type
  21873. */
  21874. __STATIC_FORCEINLINE unsigned long long __RV_DKMADRS(unsigned long long t, unsigned long long a, unsigned long long b)
  21875. {
  21876. __ASM volatile("dkmadrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21877. return t;
  21878. }
  21879. /* ===== Inline Function End for DKMADRS ===== */
  21880. /* ===== Inline Function Start for DKMAXDS ===== */
  21881. /**
  21882. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21883. * \brief DKMAXDS (Saturating Signed Crossed Multiply Two Halfs & Subtract & Add)
  21884. * \details
  21885. * **Type**: DSP
  21886. *
  21887. * **Syntax**:\n
  21888. * ~~~
  21889. * DKMAXDS Rd, Rs1, Rs2
  21890. * ~~~
  21891. *
  21892. * **Purpose**:\n
  21893. * Do two cross 16x16 with 32-bit signed addition and subtraction simultaneously. The results are written into Rd.
  21894. *
  21895. * **Description**:\n
  21896. * Do two signed 16-bit multiplications from 32-bit elements in two registers; and then
  21897. * perform a subtraction operation between the two 32-bit results. Then add the subtraction result to
  21898. * the corresponding 32-bit elements in a third register. The addition result may be saturated.
  21899. *
  21900. * **Operations**:\n
  21901. * ~~~
  21902. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21903. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21904. *
  21905. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21906. * mul1 = aop.H[1] s* bop.H[0];
  21907. * mul2 = aop.H[0] s* bop.H[1];
  21908. * res = sat.q31(dop + mul1 - mul2);
  21909. * }
  21910. * Rd = concat(rest, resb);
  21911. * x=0
  21912. * ~~~
  21913. *
  21914. * \param [in] t unsigned long long type of value stored in t
  21915. * \param [in] a unsigned long long type of value stored in a
  21916. * \param [in] b unsigned long long type of value stored in b
  21917. * \return value stored in unsigned long long type
  21918. */
  21919. __STATIC_FORCEINLINE unsigned long long __RV_DKMAXDS(unsigned long long t, unsigned long long a, unsigned long long b)
  21920. {
  21921. __ASM volatile("dkmaxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21922. return t;
  21923. }
  21924. /* ===== Inline Function End for DKMAXDS ===== */
  21925. /* ===== Inline Function Start for DKMSDA ===== */
  21926. /**
  21927. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21928. * \brief DKMSDA (Two 16x16 with 32-bit Signed Double Sub)
  21929. * \details
  21930. * **Type**: DSP
  21931. *
  21932. * **Syntax**:\n
  21933. * ~~~
  21934. * DKMSDA Rd, Rs1, Rs2
  21935. * ~~~
  21936. *
  21937. * **Purpose**:\n
  21938. * Do two 16x16 with 32-bit signed double subtraction simultaneously. The results are written into Rd.
  21939. *
  21940. * **Description**:\n
  21941. * it multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  21942. * with the bottom 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of
  21943. * the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2.
  21944. *
  21945. * **Operations**:\n
  21946. * ~~~
  21947. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21948. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21949. *
  21950. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21951. * mul1 = aop.H[1] s* bop.H[0];
  21952. * mul2 = aop.H[0] s* bop.H[1];
  21953. * res = sat.q31(dop - mul1 - mul2);
  21954. * }
  21955. * Rd = concat(rest, resb);
  21956. * x=0
  21957. * ~~~
  21958. *
  21959. * \param [in] t unsigned long long type of value stored in t
  21960. * \param [in] a unsigned long long type of value stored in a
  21961. * \param [in] b unsigned long long type of value stored in b
  21962. * \return value stored in unsigned long long type
  21963. */
  21964. __STATIC_FORCEINLINE unsigned long long __RV_DKMSDA(unsigned long long t, unsigned long long a, unsigned long long b)
  21965. {
  21966. __ASM volatile("dkmsda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  21967. return t;
  21968. }
  21969. /* ===== Inline Function End for DKMSDA ===== */
  21970. /* ===== Inline Function Start for DKMSXDA ===== */
  21971. /**
  21972. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  21973. * \brief DKMSXDA (Two Cross 16x16 with 32-bit Signed Double Sub)
  21974. * \details
  21975. * **Type**: DSP
  21976. *
  21977. * **Syntax**:\n
  21978. * ~~~
  21979. * DKMSXDA Rd, Rs1, Rs2
  21980. * ~~~
  21981. *
  21982. * **Purpose**:\n
  21983. * Do two cross 16x16 with 32-bit signed double subtraction simultaneously. The results are written into Rd.
  21984. *
  21985. * **Description**:\n
  21986. * It multiplies the bottom 16-bit content of the 32-bit elements of Rs1
  21987. * with the top 16-bit content of the 32-bit elements of Rs2 and multiplies the top 16-bit content of the
  21988. * 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2.
  21989. *
  21990. * **Operations**:\n
  21991. * ~~~
  21992. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  21993. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  21994. *
  21995. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  21996. * mul1 = aop.H[1] s* bop.H[0];
  21997. * mul2 = aop.H[0] s* bop.H[1];
  21998. * res = sat.q31(dop - mul1 - mul2);
  21999. * }
  22000. * Rd = concat(rest, resb);
  22001. * x=0
  22002. * ~~~
  22003. *
  22004. * \param [in] t unsigned long long type of value stored in t
  22005. * \param [in] a unsigned long long type of value stored in a
  22006. * \param [in] b unsigned long long type of value stored in b
  22007. * \return value stored in unsigned long long type
  22008. */
  22009. __STATIC_FORCEINLINE unsigned long long __RV_DKMSXDA(unsigned long long t, unsigned long long a, unsigned long long b)
  22010. {
  22011. __ASM volatile("dkmsxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22012. return t;
  22013. }
  22014. /* ===== Inline Function End for DKMSXDA ===== */
  22015. /* ===== Inline Function Start for DSMAQA ===== */
  22016. /**
  22017. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22018. * \brief DSMAQA (Four Signed 8x8 with 32-bit Signed Add)
  22019. * \details
  22020. * **Type**: DSP
  22021. *
  22022. * **Syntax**:\n
  22023. * ~~~
  22024. * DSMAQA Rd, Rs1, Rs2
  22025. * ~~~
  22026. *
  22027. * **Purpose**:\n
  22028. * Do four signed 8x8 with 32-bit signed addition simultaneously. The results are written into Rd.
  22029. *
  22030. * **Description**:\n
  22031. * This instruction multiplies the four signed 8-bit elements of 32-bit chunks of Rs1 with the four
  22032. * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the signed
  22033. * content of the corresponding 32-bit chunks of Rd. The final results are written back to the
  22034. * corresponding 32-bit chunks in Rd.
  22035. *
  22036. * **Operations**:\n
  22037. * ~~~
  22038. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  22039. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  22040. *
  22041. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  22042. * m0 = aop.B[0] s* bop.B[0];
  22043. * m1 = aop.B[1] s* bop.B[1];
  22044. * m2 = aop.B[2] s* bop.B[2];
  22045. * m3 = aop.B[3] s* bop.B[3];
  22046. * res = dop + m0 + m1 + m2 + m3;
  22047. * }
  22048. * Rd = concat(rest, resb);
  22049. * x=0
  22050. * ~~~
  22051. *
  22052. * \param [in] t unsigned long long type of value stored in t
  22053. * \param [in] a unsigned long long type of value stored in a
  22054. * \param [in] b unsigned long long type of value stored in b
  22055. * \return value stored in unsigned long long type
  22056. */
  22057. __STATIC_FORCEINLINE unsigned long long __RV_DSMAQA(unsigned long long t, unsigned long long a, unsigned long long b)
  22058. {
  22059. __ASM volatile("dsmaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22060. return t;
  22061. }
  22062. /* ===== Inline Function End for DSMAQA ===== */
  22063. /* ===== Inline Function Start for DSMAQA.SU ===== */
  22064. /**
  22065. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22066. * \brief DSMAQA.SU (Four Signed 8 x Unsigned 8 with 32-bit Signed Add)
  22067. * \details
  22068. * **Type**: DSP
  22069. *
  22070. * **Syntax**:\n
  22071. * ~~~
  22072. * DSMAQA.SU Rd, Rs1, Rs2
  22073. * ~~~
  22074. *
  22075. * **Purpose**:\n
  22076. * Do four Signed 8 x Unsigned 8 with 32-bit unsigned addition simultaneously. The results are written into Rd.
  22077. *
  22078. * **Description**:\n
  22079. * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four
  22080. * signed 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the unsigned
  22081. * content of the corresponding 32-bit chunks of Rd. The final results are written back to the
  22082. * corresponding 32-bit chunks in Rd.
  22083. *
  22084. * **Operations**:\n
  22085. * ~~~
  22086. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  22087. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  22088. *
  22089. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  22090. * m0 = aop.B[0] su* bop.B[0];
  22091. * m1 = aop.B[1] su* bop.B[1];
  22092. * m2 = aop.B[2] su* bop.B[2];
  22093. * m3 = aop.B[3] su* bop.B[3];
  22094. * res = dop + m0 + m1 + m2 + m3;
  22095. * }
  22096. * Rd = concat(rest, resb);
  22097. * x=0
  22098. * ~~~
  22099. *
  22100. * \param [in] t unsigned long long type of value stored in t
  22101. * \param [in] a unsigned long long type of value stored in a
  22102. * \param [in] b unsigned long long type of value stored in b
  22103. * \return value stored in unsigned long long type
  22104. */
  22105. __STATIC_FORCEINLINE unsigned long long __RV_DSMAQA_SU(unsigned long long t, unsigned long long a, unsigned long long b)
  22106. {
  22107. __ASM volatile("dsmaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22108. return t;
  22109. }
  22110. /* ===== Inline Function End for DSMAQA.SU ===== */
  22111. /* ===== Inline Function Start for DUMAQA ===== */
  22112. /**
  22113. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22114. * \brief DUMAQA (Four Unsigned 8x8 with 32-bit Unsigned Add)
  22115. * \details
  22116. * **Type**: DSP
  22117. *
  22118. * **Syntax**:\n
  22119. * ~~~
  22120. * DUMAQA Rd, Rs1, Rs2
  22121. * ~~~
  22122. *
  22123. * **Purpose**:\n
  22124. * Do four unsigned 8x8 with 32-bit unsigned addition simultaneously. The results are written into Rd.
  22125. *
  22126. * **Description**:\n
  22127. * This instruction multiplies the four unsigned 8-bit elements of 32-bit chunks of Rs1 with the four
  22128. * unsigned 8-bit elements of 32-bit chunks of Rs2 and then adds the four results together with the
  22129. * unsigned content of the corresponding 32-bit chunks of Rd. The final results are written back to the
  22130. * corresponding 32-bit chunks in Rd.
  22131. *
  22132. * **Operations**:\n
  22133. * ~~~
  22134. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; op3t = Rd.W[x+1] // top
  22135. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; op3b = Rd.W[x] // bottom
  22136. *
  22137. * for ((aop,bop,dop,res) in [(op1t,op2t,op3t,rest), (op1b,op2b,op3b,resb)]) {
  22138. * m0 = aop.B[0] su* bop.B[0];
  22139. * m1 = aop.B[1] su* bop.B[1];
  22140. * m2 = aop.B[2] su* bop.B[2];
  22141. * m3 = aop.B[3] su* bop.B[3];
  22142. * res = dop + m0 + m1 + m2 + m3;
  22143. * }
  22144. * Rd = concat(rest, resb);
  22145. * x=0
  22146. * ~~~
  22147. *
  22148. * \param [in] t unsigned long long type of value stored in t
  22149. * \param [in] a unsigned long long type of value stored in a
  22150. * \param [in] b unsigned long long type of value stored in b
  22151. * \return value stored in unsigned long long type
  22152. */
  22153. __STATIC_FORCEINLINE unsigned long long __RV_DUMAQA(unsigned long long t, unsigned long long a, unsigned long long b)
  22154. {
  22155. __ASM volatile("dumaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22156. return t;
  22157. }
  22158. /* ===== Inline Function End for DUMAQA ===== */
  22159. /* ===== Inline Function Start for DKMDA32 ===== */
  22160. /**
  22161. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22162. * \brief DKMDA32 (Two Signed 32x32 with 64-bit Saturation Add)
  22163. * \details
  22164. * **Type**: DSP
  22165. *
  22166. * **Syntax**:\n
  22167. * ~~~
  22168. * DKMDA32 Rd, Rs1, Rs2
  22169. * ~~~
  22170. *
  22171. * **Purpose**:\n
  22172. * Do two signed 32x32 add the signed multiplication results with Q63 saturation. The results are written into Rd.
  22173. *
  22174. * **Description**:\n
  22175. * For the `KMDA32` instruction, it multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  22176. * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
  22177. * with the top 32-bit element of Rs2.
  22178. *
  22179. * **Operations**:\n
  22180. * ~~~
  22181. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22182. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22183. * t0 = op1b s* op2b;
  22184. * t1 = op1t s* op2t;
  22185. * Rd = concat(rest, resb);
  22186. * x=0
  22187. * ~~~
  22188. *
  22189. * \param [in] a unsigned long long type of value stored in a
  22190. * \param [in] b unsigned long long type of value stored in b
  22191. * \return value stored in long long type
  22192. */
  22193. __STATIC_FORCEINLINE long long __RV_DKMDA32(unsigned long long a, unsigned long long b)
  22194. {
  22195. long long result;
  22196. __ASM volatile("dkmda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  22197. return result;
  22198. }
  22199. /* ===== Inline Function End for DKMDA32 ===== */
  22200. /* ===== Inline Function Start for DKMXDA32 ===== */
  22201. /**
  22202. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22203. * \brief DKMXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Add)
  22204. * \details
  22205. * **Type**: DSP
  22206. *
  22207. * **Syntax**:\n
  22208. * ~~~
  22209. * DKMXDA32 Rd, Rs1, Rs2
  22210. * ~~~
  22211. *
  22212. * **Purpose**:\n
  22213. * Do two cross signed 32x32 and add the signed multiplication results with Q63 saturation. The results are written into Rd.
  22214. *
  22215. * **Description**:\n
  22216. * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  22217. * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
  22218. * with the bottom 32-bit element of Rs2.
  22219. *
  22220. * **Operations**:\n
  22221. * ~~~
  22222. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22223. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22224. * t01 = op1b s* op2t;
  22225. * t10 = op1t s* op2b;
  22226. * Rd = sat.q63(t01 + t10);
  22227. * x=0
  22228. * ~~~
  22229. *
  22230. * \param [in] a unsigned long long type of value stored in a
  22231. * \param [in] b unsigned long long type of value stored in b
  22232. * \return value stored in long long type
  22233. */
  22234. __STATIC_FORCEINLINE long long __RV_DKMXDA32(unsigned long long a, unsigned long long b)
  22235. {
  22236. long long result;
  22237. __ASM volatile("dkmxda32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  22238. return result;
  22239. }
  22240. /* ===== Inline Function End for DKMXDA32 ===== */
  22241. /* ===== Inline Function Start for DKMADA32 ===== */
  22242. /**
  22243. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22244. * \brief DKMADA32 (Two Signed 32x32 with 64-bit Saturation Add)
  22245. * \details
  22246. * **Type**: DSP
  22247. *
  22248. * **Syntax**:\n
  22249. * ~~~
  22250. * DKMADA32 Rd, Rs1, Rs2
  22251. * ~~~
  22252. *
  22253. * **Purpose**:\n
  22254. * Do two signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The results are written into Rd.
  22255. *
  22256. * **Description**:\n
  22257. * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  22258. * element of Rs2 and then adds the result to the result of multiplying the top 32-bit element of Rs1
  22259. * with the top 32-bit element of Rs2.
  22260. *
  22261. * **Operations**:\n
  22262. * ~~~
  22263. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22264. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22265. * t01 = op1b s* op2b;
  22266. * t10 = op1t s* op2t;
  22267. * Rd = sat.q63(t01 + t10);
  22268. * x=0
  22269. * ~~~
  22270. *
  22271. * \param [in] t long long type of value stored in t
  22272. * \param [in] a unsigned long long type of value stored in a
  22273. * \param [in] b unsigned long long type of value stored in b
  22274. * \return value stored in long long type
  22275. */
  22276. __STATIC_FORCEINLINE long long __RV_DKMADA32(long long t, unsigned long long a, unsigned long long b)
  22277. {
  22278. __ASM volatile("dkmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22279. return t;
  22280. }
  22281. /* ===== Inline Function End for DKMADA32 ===== */
  22282. /* ===== Inline Function Start for DKMAXDA32 ===== */
  22283. /**
  22284. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22285. * \brief DKMAXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Add)
  22286. * \details
  22287. * **Type**: DSP
  22288. *
  22289. * **Syntax**:\n
  22290. * ~~~
  22291. * DKMAXDA32 Rd, Rs1, Rs2
  22292. * ~~~
  22293. *
  22294. * **Purpose**:\n
  22295. * Do two cross signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The
  22296. * results are written into Rd.
  22297. *
  22298. * **Description**:\n
  22299. * It multiplies the top 32-bit element in Rs1 with the bottom 32-bit
  22300. * element in Rs2 and then adds the result to the result of multiplying the bottom 32-bit element in Rs1
  22301. * with the top 32-bit element in Rs2.
  22302. *
  22303. * **Operations**:\n
  22304. * ~~~
  22305. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22306. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22307. * t01 = op1b s* op2t;
  22308. * t10 = op1t s* op2b;
  22309. * Rd = sat.q63(Rd + t01 + t10);
  22310. * x=0
  22311. * ~~~
  22312. *
  22313. * \param [in] t long long type of value stored in t
  22314. * \param [in] a unsigned long long type of value stored in a
  22315. * \param [in] b unsigned long long type of value stored in b
  22316. * \return value stored in long long type
  22317. */
  22318. __STATIC_FORCEINLINE long long __RV_DKMAXDA32(long long t, unsigned long long a, unsigned long long b)
  22319. {
  22320. __ASM volatile("dkmaxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22321. return t;
  22322. }
  22323. /* ===== Inline Function End for DKMAXDA32 ===== */
  22324. /* ===== Inline Function Start for DKMADS32 ===== */
  22325. /**
  22326. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22327. * \brief DKMADS32 (Two Signed 32x32 with 64-bit Saturation Add and Sub)
  22328. * \details
  22329. * **Type**: DSP
  22330. *
  22331. * **Syntax**:\n
  22332. * ~~~
  22333. * DKMADS32 Rd, Rs1, Rs2
  22334. * ~~~
  22335. *
  22336. * **Purpose**:\n
  22337. * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication results
  22338. * and add a third register with Q63 saturation. The results are written into Rd.
  22339. *
  22340. * **Description**:\n
  22341. * It multiplies the top 32-bit element in Rs1 with the bottom 32-bit
  22342. * element in Rs2 and then subtracts the result to the result of multiplying the top 32-bit element in Rs1
  22343. * with the top 32-bit element in Rs2.
  22344. *
  22345. * **Operations**:\n
  22346. * ~~~
  22347. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22348. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22349. *
  22350. * t0 = op1b s* op2b;
  22351. * t1 = op1t s* op2t;
  22352. * Rd = sat.q63(Rd - t0 + t1);
  22353. * x=0
  22354. * ~~~
  22355. *
  22356. * \param [in] t long long type of value stored in t
  22357. * \param [in] a unsigned long long type of value stored in a
  22358. * \param [in] b unsigned long long type of value stored in b
  22359. * \return value stored in long long type
  22360. */
  22361. __STATIC_FORCEINLINE long long __RV_DKMADS32(long long t, unsigned long long a, unsigned long long b)
  22362. {
  22363. __ASM volatile("dkmads32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22364. return t;
  22365. }
  22366. /* ===== Inline Function End for DKMADS32 ===== */
  22367. /* ===== Inline Function Start for DKMADRS32 ===== */
  22368. /**
  22369. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22370. * \brief DKMADRS32 (Two Signed 32x32 with 64-bit Saturation Revered Add and Sub)
  22371. * \details
  22372. * **Type**: DSP
  22373. *
  22374. * **Syntax**:\n
  22375. * ~~~
  22376. * DKMADRS32 Rd, Rs1, Rs2
  22377. * ~~~
  22378. *
  22379. * **Purpose**:\n
  22380. * Do two signed 32x32 and add the signed multiplication results and a third register with Q63 saturation. The results
  22381. * are written into Rd.Do two signed 32x32 and subtraction the top signed multiplication results and add bottom signed
  22382. * multiplication results and add a third register with Q63 saturation. The results are written into Rd.
  22383. *
  22384. * **Description**:\n
  22385. * It multiplies the top 32-bit element in Rs1 with the top 32-bit
  22386. * element in Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  22387. * element in Rs1 with the bottom 32-bit element in Rs2.
  22388. *
  22389. * **Operations**:\n
  22390. * ~~~
  22391. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22392. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22393. * t0 = op1b s* op2b;
  22394. * t1 = op1t s* op2t;
  22395. * Rd = sat.q63(Rd + t0 - t1);
  22396. * x=0
  22397. * ~~~
  22398. *
  22399. * \param [in] t long long type of value stored in t
  22400. * \param [in] a unsigned long long type of value stored in a
  22401. * \param [in] b unsigned long long type of value stored in b
  22402. * \return value stored in long long type
  22403. */
  22404. __STATIC_FORCEINLINE long long __RV_DKMADRS32(long long t, unsigned long long a, unsigned long long b)
  22405. {
  22406. __ASM volatile("dkmadrs32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22407. return t;
  22408. }
  22409. /* ===== Inline Function End for DKMADRS32 ===== */
  22410. /* ===== Inline Function Start for DKMAXDS32 ===== */
  22411. /**
  22412. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22413. * \brief DKMAXDS32 (Two Cross Signed 32x32 with 64-bit Saturation Add and Sub)
  22414. * \details
  22415. * **Type**: DSP
  22416. *
  22417. * **Syntax**:\n
  22418. * ~~~
  22419. * DKMAXDS32 Rd, Rs1, Rs2
  22420. * ~~~
  22421. *
  22422. * **Purpose**:\n
  22423. * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication results
  22424. * and add a third register with Q63 saturation. The results are written into Rd.
  22425. *
  22426. * **Description**:\n
  22427. * It multiplies the bottom 32-bit element in Rs1 with the top 32-bit
  22428. * element in Rs2 and then subtracts the result from the result of multiplying the top 32-bit element in
  22429. * Rs1 with the bottom 32-bit element in Rs2.
  22430. *
  22431. * **Operations**:\n
  22432. * ~~~
  22433. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22434. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22435. *
  22436. * t01 = op1b s* op2t;
  22437. * t10 = op1t s* op2b;
  22438. * Rd = sat.q63(Rd - t01 + t10);
  22439. * x=0
  22440. * ~~~
  22441. *
  22442. * \param [in] t long long type of value stored in t
  22443. * \param [in] a unsigned long long type of value stored in a
  22444. * \param [in] b unsigned long long type of value stored in b
  22445. * \return value stored in long long type
  22446. */
  22447. __STATIC_FORCEINLINE long long __RV_DKMAXDS32(long long t, unsigned long long a, unsigned long long b)
  22448. {
  22449. __ASM volatile("dkmaxds32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22450. return t;
  22451. }
  22452. /* ===== Inline Function End for DKMAXDS32 ===== */
  22453. /* ===== Inline Function Start for DKMSDA32 ===== */
  22454. /**
  22455. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22456. * \brief DKMSDA32 (Two Signed 32x32 with 64-bit Saturation Sub)
  22457. * \details
  22458. * **Type**: DSP
  22459. *
  22460. * **Syntax**:\n
  22461. * ~~~
  22462. * DKMSDA32 Rd, Rs1, Rs2
  22463. * ~~~
  22464. *
  22465. * **Purpose**:\n
  22466. * Do two signed 32x32 and subtraction the top signed multiplication results and subtraction bottom signed multiplication
  22467. * results and add a third register with Q63 saturation. The results are written into Rd.
  22468. *
  22469. * **Description**:\n
  22470. * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  22471. * element of Rs2 and multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2.
  22472. *
  22473. * **Operations**:\n
  22474. * ~~~
  22475. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22476. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22477. *
  22478. * t0 = op1b s* op2b;
  22479. * t1 = op1t s* op2t;
  22480. * Rd = sat.q63(Rd - t0 - t1);
  22481. * x=0
  22482. * ~~~
  22483. *
  22484. * \param [in] t long long type of value stored in t
  22485. * \param [in] a unsigned long long type of value stored in a
  22486. * \param [in] b unsigned long long type of value stored in b
  22487. * \return value stored in long long type
  22488. */
  22489. __STATIC_FORCEINLINE long long __RV_DKMSDA32(long long t, unsigned long long a, unsigned long long b)
  22490. {
  22491. __ASM volatile("dkmsda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22492. return t;
  22493. }
  22494. /* ===== Inline Function End for DKMSDA32 ===== */
  22495. /* ===== Inline Function Start for DKMSXDA32 ===== */
  22496. /**
  22497. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22498. * \brief DKMSXDA32 (Two Cross Signed 32x32 with 64-bit Saturation Sub)
  22499. * \details
  22500. * **Type**: DSP
  22501. *
  22502. * **Syntax**:\n
  22503. * ~~~
  22504. * DKMSXDA32 Rd, Rs1, Rs2
  22505. * ~~~
  22506. *
  22507. * **Purpose**:\n
  22508. * Do two cross signed 32x32 and subtraction the top signed multiplication results and subtraction bottom signed multiplication
  22509. * results and add a third register with Q63 saturation. The results are written into Rd.
  22510. *
  22511. * **Description**:\n
  22512. * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  22513. * element of Rs2 and multiplies the top 32-bit element of Rs1 with the bottom 32-bit element of Rs2.
  22514. *
  22515. * **Operations**:\n
  22516. * ~~~
  22517. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22518. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22519. *
  22520. * t0 = op1b s* op2t;
  22521. * t1 = op1t s* op2b;
  22522. * Rd = sat.q63(Rd - t0 - t1);
  22523. * x=0
  22524. * ~~~
  22525. *
  22526. * \param [in] t long long type of value stored in t
  22527. * \param [in] a unsigned long long type of value stored in a
  22528. * \param [in] b unsigned long long type of value stored in b
  22529. * \return value stored in long long type
  22530. */
  22531. __STATIC_FORCEINLINE long long __RV_DKMSXDA32(long long t, unsigned long long a, unsigned long long b)
  22532. {
  22533. __ASM volatile("dkmsxda32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22534. return t;
  22535. }
  22536. /* ===== Inline Function End for DKMSXDA32 ===== */
  22537. /* ===== Inline Function Start for DSMDS32 ===== */
  22538. /**
  22539. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22540. * \brief DSMDS32 (Two Signed 32x32 with 64-bit Sub)
  22541. * \details
  22542. * **Type**: DSP
  22543. *
  22544. * **Syntax**:\n
  22545. * ~~~
  22546. * DSMDS32 Rd, Rs1, Rs2
  22547. * ~~~
  22548. *
  22549. * **Purpose**:\n
  22550. * Do two signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication. The
  22551. * results are written into Rd.
  22552. *
  22553. * **Description**:\n
  22554. * It multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit
  22555. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  22556. * Rs1 with the top 32-bit element of Rs2.
  22557. *
  22558. * **Operations**:\n
  22559. * ~~~
  22560. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22561. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22562. *
  22563. * t0 = op1b s* op2t;
  22564. * t1 = op1t s* op2b;
  22565. * Rd = t1 - t0;
  22566. * x=0
  22567. * ~~~
  22568. *
  22569. * \param [in] a unsigned long long type of value stored in a
  22570. * \param [in] b unsigned long long type of value stored in b
  22571. * \return value stored in long long type
  22572. */
  22573. __STATIC_FORCEINLINE long long __RV_DSMDS32(unsigned long long a, unsigned long long b)
  22574. {
  22575. long long result;
  22576. __ASM volatile("dsmds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  22577. return result;
  22578. }
  22579. /* ===== Inline Function End for DSMDS32 ===== */
  22580. /* ===== Inline Function Start for DSMDRS32 ===== */
  22581. /**
  22582. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22583. * \brief DSMDRS32 (Two Signed 32x32 with 64-bit Revered Sub)
  22584. * \details
  22585. * **Type**: DSP
  22586. *
  22587. * **Syntax**:\n
  22588. * ~~~
  22589. * DSMDRS32 Rd, Rs1, Rs2
  22590. * ~~~
  22591. *
  22592. * **Purpose**:\n
  22593. * Do two signed 32x32 and subtraction the top signed multiplication results and add bottom signed multiplication. The results are written into Rd
  22594. *
  22595. * **Description**:\n
  22596. * It multiplies the top 32-bit element of Rs1 with the top 32-bit
  22597. * element of Rs2 and then subtracts the result from the result of multiplying the bottom 32-bit
  22598. * element of Rs1 with the bottom 32-bit element of Rs2.
  22599. *
  22600. * **Operations**:\n
  22601. * ~~~
  22602. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22603. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22604. *
  22605. * t0 = op1b s* op2b;
  22606. * t1 = op1t s* op2t;
  22607. * Rd = t1 - t0;
  22608. * x=0
  22609. * ~~~
  22610. *
  22611. * \param [in] a unsigned long long type of value stored in a
  22612. * \param [in] b unsigned long long type of value stored in b
  22613. * \return value stored in long long type
  22614. */
  22615. __STATIC_FORCEINLINE long long __RV_DSMDRS32(unsigned long long a, unsigned long long b)
  22616. {
  22617. long long result;
  22618. __ASM volatile("dsmdrs32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  22619. return result;
  22620. }
  22621. /* ===== Inline Function End for DSMDRS32 ===== */
  22622. /* ===== Inline Function Start for DSMXDS32 ===== */
  22623. /**
  22624. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22625. * \brief DSMXDS32 (Two Cross Signed 32x32 with 64-bit Sub)
  22626. * \details
  22627. * **Type**: DSP
  22628. *
  22629. * **Syntax**:\n
  22630. * ~~~
  22631. * DSMXDS32 Rd, Rs1, Rs2
  22632. * ~~~
  22633. *
  22634. * **Purpose**:\n
  22635. * Do two cross signed 32x32 and add the top signed multiplication results and subtraction bottom signed multiplication.
  22636. * The results are written into Rd.
  22637. *
  22638. * **Description**:\n
  22639. * It multiplies the bottom 32-bit element of Rs1 with the top 32-bit
  22640. * element of Rs2 and then subtracts the result from the result of multiplying the top 32-bit element of
  22641. * Rs1 with the bottom 32-bit element of Rs2.
  22642. *
  22643. * **Operations**:\n
  22644. * ~~~
  22645. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22646. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22647. *
  22648. * t01 = op1b s* op2t;
  22649. * t10 = op1t s* op2b;
  22650. * Rd = t1 - t0;
  22651. * x=0
  22652. * ~~~
  22653. *
  22654. * \param [in] a unsigned long long type of value stored in a
  22655. * \param [in] b unsigned long long type of value stored in b
  22656. * \return value stored in long long type
  22657. */
  22658. __STATIC_FORCEINLINE long long __RV_DSMXDS32(unsigned long long a, unsigned long long b)
  22659. {
  22660. long long result;
  22661. __ASM volatile("dsmxds32 %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  22662. return result;
  22663. }
  22664. /* ===== Inline Function End for DSMXDS32 ===== */
  22665. /* ===== Inline Function Start for DSMALDA ===== */
  22666. /**
  22667. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22668. * \brief DSMALDA (Four Signed 16x16 with 64-bit Add)
  22669. * \details
  22670. * **Type**: DSP
  22671. *
  22672. * **Syntax**:\n
  22673. * ~~~
  22674. * DSMALDA Rd, Rs1, Rs2
  22675. * ~~~
  22676. *
  22677. * **Purpose**:\n
  22678. * Do four signed 16x16 and add signed multiplication results and a third register. The results are written into Rd.
  22679. *
  22680. * **Description**:\n
  22681. * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  22682. * content of Rs2 and then adds the result to the result of multiplying the top 16-bit content of Rs1 with
  22683. * the top 16-bit content of Rs2 with unlimited precision
  22684. *
  22685. * **Operations**:\n
  22686. * ~~~
  22687. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22688. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22689. *
  22690. * m0 = op1b.H[0] s* op2b.H[0];
  22691. * m1 = op1b.H[1] s* op2b.H[1];
  22692. * m2 = op1t.H[0] s* op2t.H[0];
  22693. * m3 = op1t.H[1] s* op2t.H[1];
  22694. *
  22695. * Rd = Rd + m0 + m1 + m2 + m3;
  22696. * x=0
  22697. * ~~~
  22698. *
  22699. * \param [in] t long long type of value stored in t
  22700. * \param [in] a unsigned long long type of value stored in a
  22701. * \param [in] b unsigned long long type of value stored in b
  22702. * \return value stored in long long type
  22703. */
  22704. __STATIC_FORCEINLINE long long __RV_DSMALDA(long long t, unsigned long long a, unsigned long long b)
  22705. {
  22706. __ASM volatile("dsmalda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22707. return t;
  22708. }
  22709. /* ===== Inline Function End for DSMALDA ===== */
  22710. /* ===== Inline Function Start for DSMALXDA ===== */
  22711. /**
  22712. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22713. * \brief DSMALXDA (Four Signed 16x16 with 64-bit Add)
  22714. * \details
  22715. * **Type**: DSP
  22716. *
  22717. * **Syntax**:\n
  22718. * ~~~
  22719. * DSMALXDA Rd, Rs1, Rs2
  22720. * ~~~
  22721. *
  22722. * **Purpose**:\n
  22723. * Do four cross signed 16x16 and add signed multiplication results and a third register. The results are written into Rd.
  22724. *
  22725. * **Description**:\n
  22726. * It multiplies the top 16-bit content of Rs1 with the bottom 16-bit
  22727. * content of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of Rs1
  22728. * with the top 16-bit content of Rs2 with unlimited precision.
  22729. *
  22730. * **Operations**:\n
  22731. * ~~~
  22732. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22733. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22734. *
  22735. * m0 = op1b.H[0] s* op2b.H[1];
  22736. * m1 = op1b.H[1] s* op2b.H[0];
  22737. * m2 = op1t.H[0] s* op2t.H[1];
  22738. * m3 = op1t.H[1] s* op2t.H[0];
  22739. *
  22740. * Rd = Rd + m0 + m1 + m2 + m3;
  22741. * x=0
  22742. * ~~~
  22743. *
  22744. * \param [in] t long long type of value stored in t
  22745. * \param [in] a unsigned long long type of value stored in a
  22746. * \param [in] b unsigned long long type of value stored in b
  22747. * \return value stored in long long type
  22748. */
  22749. __STATIC_FORCEINLINE long long __RV_DSMALXDA(long long t, unsigned long long a, unsigned long long b)
  22750. {
  22751. __ASM volatile("dsmalxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22752. return t;
  22753. }
  22754. /* ===== Inline Function End for DSMALXDA ===== */
  22755. /* ===== Inline Function Start for DSMALDS ===== */
  22756. /**
  22757. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22758. * \brief DSMALDS (Four Signed 16x16 with 64-bit Add and Sub)
  22759. * \details
  22760. * **Type**: DSP
  22761. *
  22762. * **Syntax**:\n
  22763. * ~~~
  22764. * DSMALDS Rd, Rs1, Rs2
  22765. * ~~~
  22766. *
  22767. * **Purpose**:\n
  22768. * Do four signed 16x16 and add and subtraction signed multiplication results and a third register. The results are written into Rd.
  22769. *
  22770. * **Description**:\n
  22771. * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  22772. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  22773. * Rs1 with the top 16-bit content of Rs2.
  22774. *
  22775. * **Operations**:\n
  22776. * ~~~
  22777. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22778. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22779. *
  22780. * m0 = op1b.H[1] s* op2b.H[1];
  22781. * m1 = op1b.H[0] s* op2b.H[0];
  22782. * m2 = op1t.H[1] s* op2t.H[1];
  22783. * m3 = op1t.H[0] s* op2t.H[0];
  22784. *
  22785. * Rd = Rd + m0 - m1 + m2 - m3;
  22786. * x=0
  22787. * ~~~
  22788. *
  22789. * \param [in] t long long type of value stored in t
  22790. * \param [in] a unsigned long long type of value stored in a
  22791. * \param [in] b unsigned long long type of value stored in b
  22792. * \return value stored in long long type
  22793. */
  22794. __STATIC_FORCEINLINE long long __RV_DSMALDS(long long t, unsigned long long a, unsigned long long b)
  22795. {
  22796. __ASM volatile("dsmalds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22797. return t;
  22798. }
  22799. /* ===== Inline Function End for DSMALDS ===== */
  22800. /* ===== Inline Function Start for DSMALDRS ===== */
  22801. /**
  22802. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22803. * \brief DSMALDRS (Four Signed 16x16 with 64-bit Add and Revered Sub)
  22804. * \details
  22805. * **Type**: DSP
  22806. *
  22807. * **Syntax**:\n
  22808. * ~~~
  22809. * DSMALDRS Rd, Rs1, Rs2
  22810. * ~~~
  22811. *
  22812. * **Purpose**:\n
  22813. * Do two signed 16x16 and add and revered subtraction signed multiplication results and a third register. The results are written into Rd.
  22814. *
  22815. * **Description**:\n
  22816. * It multiplies the top 16-bit content of Rs1 with the top 16-bit content
  22817. * of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of Rs1
  22818. * with the bottom 16-bit content of Rs2.
  22819. *
  22820. * **Operations**:\n
  22821. * ~~~
  22822. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22823. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22824. *
  22825. * m0 = op1b.H[0] s* op2b.H[0];
  22826. * m1 = op1b.H[1] s* op2b.H[1];
  22827. * m2 = op1t.H[0] s* op2t.H[0];
  22828. * m3 = op1t.H[1] s* op2t.H[1];
  22829. *
  22830. * Rd = Rd + m0 - m1 + m2 - m3;
  22831. * x=0
  22832. * ~~~
  22833. *
  22834. * \param [in] t long long type of value stored in t
  22835. * \param [in] a unsigned long long type of value stored in a
  22836. * \param [in] b unsigned long long type of value stored in b
  22837. * \return value stored in long long type
  22838. */
  22839. __STATIC_FORCEINLINE long long __RV_DSMALDRS(long long t, unsigned long long a, unsigned long long b)
  22840. {
  22841. __ASM volatile("dsmaldrs %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22842. return t;
  22843. }
  22844. /* ===== Inline Function End for DSMALDRS ===== */
  22845. /* ===== Inline Function Start for DSMALXDS ===== */
  22846. /**
  22847. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22848. * \brief DSMALXDS (Four Cross Signed 16x16 with 64-bit Add and Sub)
  22849. * \details
  22850. * **Type**: DSP
  22851. *
  22852. * **Syntax**:\n
  22853. * ~~~
  22854. * DSMALXDS Rd, Rs1, Rs2
  22855. * ~~~
  22856. *
  22857. * **Purpose**:\n
  22858. * Do four cross signed 16x16 and add and subtraction signed multiplication results and a third register. The results are written into Rd.
  22859. *
  22860. * **Description**:\n
  22861. * It multiplies the bottom 16-bit content of Rs1 with the top 16-bit
  22862. * content of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of
  22863. * Rs1 with the bottom 16-bit content of Rs2.
  22864. *
  22865. * **Operations**:\n
  22866. * ~~~
  22867. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22868. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22869. *
  22870. * m0 = op1b.H[1] s* op2b.H[0];
  22871. * m1 = op1b.H[0] s* op2b.H[1];
  22872. * m2 = op1t.H[1] s* op2t.H[0];
  22873. * m3 = op1t.H[0] s* op2t.H[1];
  22874. *
  22875. * Rd = Rd + m0 - m1 + m2 - m3;
  22876. * x=0
  22877. * ~~~
  22878. *
  22879. * \param [in] t long long type of value stored in t
  22880. * \param [in] a unsigned long long type of value stored in a
  22881. * \param [in] b unsigned long long type of value stored in b
  22882. * \return value stored in long long type
  22883. */
  22884. __STATIC_FORCEINLINE long long __RV_DSMALXDS(long long t, unsigned long long a, unsigned long long b)
  22885. {
  22886. __ASM volatile("dsmalxds %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22887. return t;
  22888. }
  22889. /* ===== Inline Function End for DSMALXDS ===== */
  22890. /* ===== Inline Function Start for DSMSLDA ===== */
  22891. /**
  22892. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22893. * \brief DSMSLDA (Four Signed 16x16 with 64-bit Sub)
  22894. * \details
  22895. * **Type**: DSP
  22896. *
  22897. * **Syntax**:\n
  22898. * ~~~
  22899. * DSMSLDA Rd, Rs1, Rs2
  22900. * ~~~
  22901. *
  22902. * **Purpose**:\n
  22903. * Do four signed 16x16 and subtraction signed multiplication results and add a third register. The results are written into Rd.
  22904. *
  22905. * **Description**:\n
  22906. * It multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit
  22907. * content Rs2 and multiplies the top 16-bit content of Rs1 with the top 16-bit content of Rs2.
  22908. *
  22909. * **Operations**:\n
  22910. * ~~~
  22911. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22912. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22913. *
  22914. * m0 = op1b.H[0] s* op2b.H[0];
  22915. * m1 = op1b.H[1] s* op2b.H[1];
  22916. * m2 = op1t.H[0] s* op2t.H[0];
  22917. * m3 = op1t.H[1] s* op2t.H[1];
  22918. *
  22919. * Rd = Rd - m0 - m1 - m2 - m3;
  22920. * x=0
  22921. * ~~~
  22922. *
  22923. * \param [in] t long long type of value stored in t
  22924. * \param [in] a unsigned long long type of value stored in a
  22925. * \param [in] b unsigned long long type of value stored in b
  22926. * \return value stored in long long type
  22927. */
  22928. __STATIC_FORCEINLINE long long __RV_DSMSLDA(long long t, unsigned long long a, unsigned long long b)
  22929. {
  22930. __ASM volatile("dsmslda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22931. return t;
  22932. }
  22933. /* ===== Inline Function End for DSMSLDA ===== */
  22934. /* ===== Inline Function Start for DSMSLXDA ===== */
  22935. /**
  22936. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22937. * \brief DSMSLXDA (Four Cross Signed 16x16 with 64-bit Sub)
  22938. * \details
  22939. * **Type**: DSP
  22940. *
  22941. * **Syntax**:\n
  22942. * ~~~
  22943. * DSMSLXDA Rd, Rs1, Rs2
  22944. * ~~~
  22945. *
  22946. * **Purpose**:\n
  22947. * Do four signed 16x16 and subtraction signed multiplication results and add a third register. The results are written into Rd.
  22948. *
  22949. * **Description**:\n
  22950. * It multiplies the top 16-bit content of Rs1 with the bottom 16-bit
  22951. * content of Rs2 and multiplies the bottom 16-bit content of Rs1 with the top 16-bit content of Rs2.
  22952. *
  22953. * **Operations**:\n
  22954. * ~~~
  22955. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  22956. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  22957. *
  22958. * m0 = op1b.H[0] s* op2b.H[1];
  22959. * m1 = op1b.H[1] s* op2b.H[0];
  22960. * m2 = op1t.H[0] s* op2t.H[1];
  22961. * m3 = op1t.H[1] s* op2t.H[0];
  22962. *
  22963. * Rd = Rd - m0 - m1 - m2 - m3;
  22964. * x=0
  22965. * ~~~
  22966. *
  22967. * \param [in] t long long type of value stored in t
  22968. * \param [in] a unsigned long long type of value stored in a
  22969. * \param [in] b unsigned long long type of value stored in b
  22970. * \return value stored in long long type
  22971. */
  22972. __STATIC_FORCEINLINE long long __RV_DSMSLXDA(long long t, unsigned long long a, unsigned long long b)
  22973. {
  22974. __ASM volatile("dsmslxda %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  22975. return t;
  22976. }
  22977. /* ===== Inline Function End for DSMSLXDA ===== */
  22978. /* ===== Inline Function Start for DDSMAQA ===== */
  22979. /**
  22980. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  22981. * \brief DDSMAQA (Eight Signed 8x8 with 64-bit Add)
  22982. * \details
  22983. * **Type**: DSP
  22984. *
  22985. * **Syntax**:\n
  22986. * ~~~
  22987. * DDSMAQA Rd, Rs1, Rs2
  22988. * ~~~
  22989. *
  22990. * **Purpose**:\n
  22991. * Do eight signed 8x8 and add signed multiplication results and a third register. The results are written into Rd.
  22992. *
  22993. * **Description**:\n
  22994. * Do eight signed 8-bit multiplications from eight 8-bit chunks of two registers; and then adds
  22995. * the eight 16-bit results and the content of 64-bit chunks of a third register.
  22996. *
  22997. * **Operations**:\n
  22998. * ~~~
  22999. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  23000. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  23001. *
  23002. * m0 = op1b.B[0] s* op2b.B[0];
  23003. * m1 = op1b.B[1] s* op2b.B[1];
  23004. * m2 = op1b.B[2] s* op2b.B[2];
  23005. * m3 = op1b.B[3] s* op2b.B[3];
  23006. * m4 = op1t.B[0] s* op2t.B[0];
  23007. * m5 = op1t.B[1] s* op2t.B[1];
  23008. * m6 = op1t.B[2] s* op2t.B[2];
  23009. * m7 = op1t.B[3] s* op2t.B[3];
  23010. *
  23011. * s0 = m0 + m1 + m2 + m3;
  23012. * s1 = m4 + m5 + m6 + m7;
  23013. * Rd = Rd + s0 + s1;
  23014. * x=0
  23015. * ~~~
  23016. *
  23017. * \param [in] t long long type of value stored in t
  23018. * \param [in] a unsigned long long type of value stored in a
  23019. * \param [in] b unsigned long long type of value stored in b
  23020. * \return value stored in long long type
  23021. */
  23022. __STATIC_FORCEINLINE long long __RV_DDSMAQA(long long t, unsigned long long a, unsigned long long b)
  23023. {
  23024. __ASM volatile("ddsmaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23025. return t;
  23026. }
  23027. /* ===== Inline Function End for DDSMAQA ===== */
  23028. /* ===== Inline Function Start for DDSMAQA.SU ===== */
  23029. /**
  23030. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23031. * \brief DDSMAQA.SU (Eight Signed 8 x Unsigned 8 with 64-bit Add)
  23032. * \details
  23033. * **Type**: DSP
  23034. *
  23035. * **Syntax**:\n
  23036. * ~~~
  23037. * DDSMAQA.SU Rd, Rs1, Rs2
  23038. * ~~~
  23039. *
  23040. * **Purpose**:\n
  23041. * Do eight signed 8 x unsigned 8 and add signed multiplication results and a third register. The results are written into Rd.
  23042. *
  23043. * **Description**:\n
  23044. * Do eight signed 8 x unsigned 8 and add signed multiplication results and a third register; and then adds
  23045. * the eight 16-bit results and the content of 64-bit chunks of a third register.
  23046. *
  23047. * **Operations**:\n
  23048. * ~~~
  23049. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  23050. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  23051. *
  23052. * m0 = op1b.B[0] su* op2b.B[0];
  23053. * m1 = op1b.B[1] su* op2b.B[1];
  23054. * m2 = op1b.B[2] su* op2b.B[2];
  23055. * m3 = op1b.B[3] su* op2b.B[3];
  23056. * m4 = op1t.B[0] su* op2t.B[0];
  23057. * m5 = op1t.B[1] su* op2t.B[1];
  23058. * m6 = op1t.B[2] su* op2t.B[2];
  23059. * m7 = op1t.B[3] su* op2t.B[3];
  23060. *
  23061. * s0 = m0 + m1 + m2 + m3;
  23062. * s1 = m4 + m5 + m6 + m7;
  23063. * Rd = Rd + s0 + s1;
  23064. * x=0
  23065. * ~~~
  23066. *
  23067. * \param [in] t long long type of value stored in t
  23068. * \param [in] a unsigned long long type of value stored in a
  23069. * \param [in] b unsigned long long type of value stored in b
  23070. * \return value stored in long long type
  23071. */
  23072. __STATIC_FORCEINLINE long long __RV_DDSMAQA_SU(long long t, unsigned long long a, unsigned long long b)
  23073. {
  23074. __ASM volatile("ddsmaqa.su %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23075. return t;
  23076. }
  23077. /* ===== Inline Function End for DDSMAQA.SU ===== */
  23078. /* ===== Inline Function Start for DDUMAQA ===== */
  23079. /**
  23080. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23081. * \brief DDUMAQA (Eight Unsigned 8x8 with 64-bit Unsigned Add)
  23082. * \details
  23083. * **Type**: DSP
  23084. *
  23085. * **Syntax**:\n
  23086. * ~~~
  23087. * DDUMAQA Rd, Rs1, Rs2
  23088. * ~~~
  23089. *
  23090. * **Purpose**:\n
  23091. * Do eight unsigned 8x8 and add unsigned multiplication results and a third register. The results are written into Rd.
  23092. *
  23093. * **Description**:\n
  23094. * Do eight unsigned 8x8 and add unsigned multiplication results and a third register; and then adds
  23095. * the eight 16-bit results and the content of 64-bit chunks of a third register.
  23096. *
  23097. * **Operations**:\n
  23098. * ~~~
  23099. * op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
  23100. * op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
  23101. *
  23102. * m0 = op1b.B[0] u* op2b.B[0];
  23103. * m1 = op1b.B[1] u* op2b.B[1];
  23104. * m2 = op1b.B[2] u* op2b.B[2];
  23105. * m3 = op1b.B[3] u* op2b.B[3];
  23106. * m4 = op1t.B[0] u* op2t.B[0];
  23107. * m5 = op1t.B[1] u* op2t.B[1];
  23108. * m6 = op1t.B[2] u* op2t.B[2];
  23109. * m7 = op1t.B[3] u* op2t.B[3];
  23110. *
  23111. * s0 = m0 + m1 + m2 + m3;
  23112. * s1 = m4 + m5 + m6 + m7;
  23113. * Rd = Rd + s0 + s1;
  23114. * x=0
  23115. * ~~~
  23116. *
  23117. * \param [in] t long long type of value stored in t
  23118. * \param [in] a unsigned long long type of value stored in a
  23119. * \param [in] b unsigned long long type of value stored in b
  23120. * \return value stored in long long type
  23121. */
  23122. __STATIC_FORCEINLINE long long __RV_DDUMAQA(long long t, unsigned long long a, unsigned long long b)
  23123. {
  23124. __ASM volatile("ddumaqa %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23125. return t;
  23126. }
  23127. /* ===== Inline Function End for DDUMAQA ===== */
  23128. /* ===== Inline Function Start for DSMA32.u ===== */
  23129. /**
  23130. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23131. * \brief DSMA32.u (64-bit SIMD 32-bit Signed Multiply Addition With Rounding and Clip)
  23132. * \details
  23133. * **Type**: DSP
  23134. *
  23135. * **Syntax**:\n
  23136. * ~~~
  23137. * DSMA32.u Rd, Rs1, Rs2
  23138. * ~~~
  23139. *
  23140. * **Purpose**:\n
  23141. * Do two signed 32x32 and add signed multiplication results with Rounding, then right shift 32-bit and clip q63 to q31.
  23142. * The result is written to Rd.
  23143. *
  23144. * **Description**:\n
  23145. * For the `DSMA32.u` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the top 32-bit Q31
  23146. * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with
  23147. * the bottom 32-bit Q31 content of 64-bit chunks in Rs2.
  23148. * Then, do the addtion for the results above and perform the addtional rounding operations, and then move the data to the right
  23149. * by 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
  23150. *
  23151. * **Operations**:\n
  23152. * ~~~
  23153. * Rd = (q31_t)((Rs1.W[x] s* Rs2.W[x] + Rs1.W[x + 1] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
  23154. * x=0
  23155. * ~~~
  23156. *
  23157. * \param [in] a unsigned long long type of value stored in a
  23158. * \param [in] b unsigned long long type of value stored in b
  23159. * \return value stored in long type
  23160. */
  23161. __STATIC_FORCEINLINE long __RV_DSMA32_U(unsigned long long a, unsigned long long b)
  23162. {
  23163. long result;
  23164. __ASM volatile("dsma32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  23165. return result;
  23166. }
  23167. /* ===== Inline Function End for DSMA32.u ===== */
  23168. /* ===== Inline Function Start for DSMXS32.u ===== */
  23169. /**
  23170. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23171. * \brief DSMXS32.u (64-bit SIMD 32-bit Signed Multiply Cross Subtraction With Rounding and Clip)
  23172. * \details
  23173. * **Type**: DSP
  23174. *
  23175. * **Syntax**:\n
  23176. * ~~~
  23177. * DSMXS32.u Rd, Rs1, Rs2
  23178. * ~~~
  23179. *
  23180. * **Purpose**:\n
  23181. * Do two cross signed 32x32 and sub signed multiplication results with Rounding, then right shift 32-bit and clip q63 to
  23182. * q31. The result is written to Rd.
  23183. *
  23184. * **Description**:\n
  23185. * For the `DSMXS32.u` instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit
  23186. * Q31 content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1
  23187. * with the top 32-bit Q31 content of 64-bit chunks in Rs2.
  23188. * Then, do the subtraction for the results above and perform the addtional rounding operations, and then move the data to the right by
  23189. * 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
  23190. *
  23191. * **Operations**:\n
  23192. * ~~~
  23193. * Rd = (q31_t)((Rs1.W[x + 1] s* Rs2.W[x] - Rs1.W[x] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
  23194. * x=0
  23195. * ~~~
  23196. *
  23197. * \param [in] a unsigned long long type of value stored in a
  23198. * \param [in] b unsigned long long type of value stored in b
  23199. * \return value stored in long type
  23200. */
  23201. __STATIC_FORCEINLINE long __RV_DSMXS32_U(unsigned long long a, unsigned long long b)
  23202. {
  23203. long result;
  23204. __ASM volatile("dsmxs32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  23205. return result;
  23206. }
  23207. /* ===== Inline Function End for DSMXS32.u ===== */
  23208. /* ===== Inline Function Start for DSMXA32.u ===== */
  23209. /**
  23210. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23211. * \brief DSMXA32.u (64-bit SIMD 32-bit Signed Cross Multiply Addition with Rounding and Clip)
  23212. * \details
  23213. * **Type**: DSP
  23214. *
  23215. * **Syntax**:\n
  23216. * ~~~
  23217. * DSMXA32.u Rd, Rs1, Rs2
  23218. * ~~~
  23219. *
  23220. * **Purpose**:\n
  23221. * Do two cross signed 32x32 and add signed multiplication results with Rounding, then right shift 32-bit and clip q63 to
  23222. * q31. The result is written to Rd.
  23223. *
  23224. * **Description**:\n
  23225. * For the `DSMXA32.u` instruction,multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit Q31
  23226. * content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with
  23227. * the top 32-bit Q31 content of 64-bit chunks in Rs2.
  23228. * Then, do the addtion for the results above and perform the addtional rounding operations, and then move the data to the right
  23229. * by 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
  23230. *
  23231. * **Operations**:\n
  23232. * ~~~
  23233. * Rd = (q31_t)((Rs1.W[x + 1] s* Rs2.W[x] + Rs1.W[x] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
  23234. * x=0
  23235. * ~~~
  23236. *
  23237. * \param [in] a unsigned long long type of value stored in a
  23238. * \param [in] b unsigned long long type of value stored in b
  23239. * \return value stored in long type
  23240. */
  23241. __STATIC_FORCEINLINE long __RV_DSMXA32_U(unsigned long long a, unsigned long long b)
  23242. {
  23243. long result;
  23244. __ASM volatile("dsmxa32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  23245. return result;
  23246. }
  23247. /* ===== Inline Function End for DSMXA32.u ===== */
  23248. /* ===== Inline Function Start for DSMS32.u ===== */
  23249. /**
  23250. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23251. * \brief DSMS32.u (64-bit SIMD 32-bit Signed Multiply Subtraction with Rounding and Clip)
  23252. * \details
  23253. * **Type**: DSP
  23254. *
  23255. * **Syntax**:\n
  23256. * ~~~
  23257. * DSMS32.u Rd, Rs1, Rs2
  23258. * ~~~
  23259. *
  23260. * **Purpose**:\n
  23261. * Do two signed 32x32 and sub signed multiplication results with Rounding, then right shift 32-bit and clip q63 to q31. The
  23262. * result is written to Rd.
  23263. *
  23264. * **Description**:\n
  23265. * For the `DSMS32.u` instruction, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit
  23266. * Q31 content of 64-bit chunks in Rs2. At the same time, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with
  23267. * the top 32-bit Q31 content of 64-bit chunks in Rs2.
  23268. * Then, do the subtraction for the results above and perform the addtional rounding operations, and then move the data to the right by
  23269. * 32-bit, and clip the 64-bit data into 32-bit.The result is written to Rd.
  23270. *
  23271. * **Operations**:\n
  23272. * ~~~
  23273. * Rd = (q31_t)((Rs1.W[x] s* Rs2.W[x] - Rs1.W[x + 1] s* Rs2.W[x + 1] + 0x80000000LL) s>> 32);
  23274. * x=0
  23275. * ~~~
  23276. *
  23277. * \param [in] a unsigned long long type of value stored in a
  23278. * \param [in] b unsigned long long type of value stored in b
  23279. * \return value stored in long type
  23280. */
  23281. __STATIC_FORCEINLINE long __RV_DSMS32_U(unsigned long long a, unsigned long long b)
  23282. {
  23283. long result;
  23284. __ASM volatile("dsms32.u %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));
  23285. return result;
  23286. }
  23287. /* ===== Inline Function End for DSMS32.u ===== */
  23288. /* ===== Inline Function Start for DSMADA16 ===== */
  23289. /**
  23290. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23291. * \brief DSMADA16 (Signed Multiply Two Halfs and Two Adds 32-bit)
  23292. * \details
  23293. * **Type**: SIMD
  23294. *
  23295. * **Syntax**:\n
  23296. * ~~~
  23297. * DSMADA16 Rd, Rs1, Rs2
  23298. * ~~~
  23299. *
  23300. * **Purpose**:\n
  23301. * Do two signed 16-bit multiplications of two 32-bit registers; and then adds the 32-bit results and the 32-bit value of an
  23302. * even/odd pair of registers together.
  23303. * * DSMADA16: rt pair+ top*top + bottom*bottom
  23304. *
  23305. * **Description**:\n
  23306. * This instruction multiplies the per 16-bit content of the 32-bit elements of Rs1 with the corresponding 16-bit content of
  23307. * the 32-bit elements of Rs2. The result is added to the 32-bit value of an even/odd pair of registers specified by Rd(4,1).
  23308. * The 32-bit addition result is written back to the register-pair. The 16-bit values of Rs1 and Rs2, and the 32-bit value of the
  23309. * register-pair are treated as signed integers.
  23310. *
  23311. * **Operations**:\n
  23312. * ~~~
  23313. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[0]);
  23314. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[1]);
  23315. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[0]);
  23316. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[1]);
  23317. * Rd.W = Rd.W + SE32(Mres0[0][31:0]) + SE32(Mres1[0][31:0]) + SE32(Mres0[1][31:0]) + SE32(Mres1[1][31:0]);
  23318. * ~~~
  23319. *
  23320. * \param [in] t long long type of value stored in t
  23321. * \param [in] a unsigned long long type of value stored in a
  23322. * \param [in] b unsigned long long type of value stored in b
  23323. * \return value stored in long type
  23324. */
  23325. __STATIC_FORCEINLINE long __RV_DSMADA16(long long t, unsigned long long a, unsigned long long b)
  23326. {
  23327. __ASM volatile("dsmada16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23328. return (long)t;
  23329. }
  23330. /* ===== Inline Function End for DSMADA16 ===== */
  23331. /* ===== Inline Function Start for DSMAXDA16 ===== */
  23332. /**
  23333. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23334. * \brief DSMAXDA16 (Signed Crossed Multiply Two Halfs and Two Adds 32-bit)
  23335. * \details
  23336. * **Type**: SIMD
  23337. *
  23338. * **Syntax**:\n
  23339. * ~~~
  23340. * DSMAXDA16 Rd, Rs1, Rs2
  23341. * ~~~
  23342. *
  23343. * **Purpose**:\n
  23344. * Do two signed 16-bit multiplications of two 32-bit registers; and then adds the 32-bit results and the 32-bit value of an
  23345. * even/odd pair of registers together.
  23346. * * DSMAXDA: rt pair+ top*bottom + bottom*top (all 32-bit elements)
  23347. *
  23348. * **Description**:\n
  23349. * This instruction crossly multiplies the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit
  23350. * elements of Rs2 and then adds the result to the result of multiplying the bottom 16-bit content of the 32-bit elements of
  23351. * Rs1 with the top 16-bit content of the 32-bit elements of Rs2 with unlimited precision. The result is added to the 64-bit
  23352. * value of an even/odd pair of registers specified by Rd(4,1).The 64-bit addition result is clipped to 32-bit result.
  23353. *
  23354. * **Operations**:\n
  23355. * ~~~
  23356. * Mres0[0][31:0] = (Rs1.W[0].H[0] * Rs2.W[0].H[1]);
  23357. * Mres1[0][31:0] = (Rs1.W[0].H[1] * Rs2.W[0].H[0]);
  23358. * Mres0[1][31:0] = (Rs1.W[1].H[0] * Rs2.W[1].H[1]);
  23359. * Mres1[1][31:0] = (Rs1.W[1].H[1] * Rs2.W[1].H[0]);
  23360. * Rd.W = Rd.W + SE32(Mres0[0][31:0]) + SE32(Mres1[0][31:0]) + SE32(Mres0[1][31:0]) + SE32(Mres1[1][31:0]);
  23361. * ~~~
  23362. *
  23363. * \param [in] t long long type of value stored in t
  23364. * \param [in] a unsigned long long type of value stored in a
  23365. * \param [in] b unsigned long long type of value stored in b
  23366. * \return value stored in long type
  23367. */
  23368. __STATIC_FORCEINLINE long __RV_DSMAXDA16(long long t, unsigned long long a, unsigned long long b)
  23369. {
  23370. __ASM volatile("dsmaxda16 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23371. return (long)t;
  23372. }
  23373. /* ===== Inline Function End for DSMAXDA16 ===== */
  23374. /* ===== Inline Function Start for DKSMS32.u ===== */
  23375. /**
  23376. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23377. * \brief DKSMS32.u (Two Signed Multiply Shift-clip and Saturation with Rounding)
  23378. * \details
  23379. * **Type**: SIMD
  23380. *
  23381. * **Syntax**:\n
  23382. * ~~~
  23383. * DKSMS32.u Rd, Rs1, Rs2
  23384. * ~~~
  23385. *
  23386. * **Purpose**:\n
  23387. * Computes saturated multiplication of two pairs of q31 type with shifted rounding.
  23388. *
  23389. * **Description**:\n
  23390. * Compute the multiplication of Rs1 and Rs2 of type q31_t, intercept [47:16] for the resulting 64-bit product
  23391. * to get the 32-bit number, then add 1 to it to do rounding, and finally saturate the result after rounding.
  23392. *
  23393. * **Operations**:\n
  23394. * ~~~
  23395. * Mres[x][63:0] = Rs1.W[x] s* Rs2.W[x];
  23396. * Round[x][32:0] = Mres[x][47:15] + 1;
  23397. * Rd.W[x] = sat.31(Rd.W[x] + Round[x][32:1]);
  23398. * x=1...0
  23399. * ~~~
  23400. *
  23401. * \param [in] t unsigned long long type of value stored in t
  23402. * \param [in] a unsigned long long type of value stored in a
  23403. * \param [in] b unsigned long long type of value stored in b
  23404. * \return value stored in unsigned long long type
  23405. */
  23406. __STATIC_FORCEINLINE unsigned long long __RV_DKSMS32_U(unsigned long long t, unsigned long long a, unsigned long long b)
  23407. {
  23408. __ASM volatile("dksms32.u %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23409. return t;
  23410. }
  23411. /* ===== Inline Function End for DKSMS32.u ===== */
  23412. /* ===== Inline Function Start for DMADA32 ===== */
  23413. /**
  23414. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23415. * \brief DMADA32 ((Two Cross Signed 32x32 with 64-bit Add and Clip to 32-bit)
  23416. * \details
  23417. * **Type**: SIMD
  23418. *
  23419. * **Syntax**:\n
  23420. * ~~~
  23421. * DMADA32 Rd, Rs1, Rs2
  23422. * ~~~
  23423. *
  23424. * **Purpose**:\n
  23425. * Do two cross signed 32x32 and add the signed multiplication results to q63, then clip the q63 result to q31 , the final results
  23426. * are written into Rd.
  23427. *
  23428. * **Description**:\n
  23429. * For the `DMADA32` instruction, it multiplies the top 32-bit element in Rs1 with the bottom 32-bit element in Rs2 and
  23430. * then adds the result to the result of multiplying the bottom 32-bit element in Rs1 with the top 32-bit element in Rs2, then
  23431. * clip the q63 result to q31.
  23432. *
  23433. * **Operations**:\n
  23434. * ~~~
  23435. * res = (q31_t)((((q63_t) Rd.w[0] << 32) + (q63_t)Rs1.w[0] s* Rs2.w[1] + (q63_t)Rs1.w[1] s* Rs2.w[0]) s>> 32);
  23436. * rd = res;
  23437. * ~~~
  23438. *
  23439. * \param [in] t long long type of value stored in t
  23440. * \param [in] a unsigned long long type of value stored in a
  23441. * \param [in] b unsigned long long type of value stored in b
  23442. * \return value stored in long type
  23443. */
  23444. __STATIC_FORCEINLINE long __RV_DMADA32(long long t, unsigned long long a, unsigned long long b)
  23445. {
  23446. __ASM volatile("dmada32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23447. return (long)t;
  23448. }
  23449. /* ===== Inline Function End for DMADA32 ===== */
  23450. /* ===== Inline Function Start for DSMALBB ===== */
  23451. /**
  23452. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23453. * \brief DSMALBB (Signed Multiply Bottom Halfs & Add 64-bit)
  23454. * \details
  23455. * **Type**: SIMD
  23456. *
  23457. * **Syntax**:\n
  23458. * ~~~
  23459. * DSMALBB Rd, Rs1, Rs2
  23460. * ~~~
  23461. *
  23462. * **Purpose**:\n
  23463. * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit
  23464. * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result
  23465. * is written back to the register-pair.
  23466. * * DSMALBB: rt pair + bottom*bottom (all 32-bit elements)
  23467. *
  23468. * **Description**:\n
  23469. * For the `DSMALBB` instruction, it multiplies the bottom 16-bit content of Rs1 with the bottom 16-bit content of Rs2.The
  23470. * multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd.
  23471. *
  23472. * **Operations**:\n
  23473. * ~~~
  23474. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[0];
  23475. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[0];
  23476. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  23477. * ~~~
  23478. *
  23479. * \param [in] t long long type of value stored in t
  23480. * \param [in] a unsigned long long type of value stored in a
  23481. * \param [in] b unsigned long long type of value stored in b
  23482. * \return value stored in long long type
  23483. */
  23484. __STATIC_FORCEINLINE long long __RV_DSMALBB(long long t, unsigned long long a, unsigned long long b)
  23485. {
  23486. __ASM volatile("dsmalbb %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23487. return t;
  23488. }
  23489. /* ===== Inline Function End for DSMALBB ===== */
  23490. /* ===== Inline Function Start for DSMALBT ===== */
  23491. /**
  23492. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23493. * \brief DSMALBT (Signed Multiply Bottom Half & Top Half & Add 64-bit)
  23494. * \details
  23495. * **Type**: SIMD
  23496. *
  23497. * **Syntax**:\n
  23498. * ~~~
  23499. * DSMALBT Rd, Rs1, Rs2
  23500. * ~~~
  23501. *
  23502. * **Purpose**:\n
  23503. * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit
  23504. * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result
  23505. * is written back to the register-pair.
  23506. * * DSMALBT: rt pair + bottom*top (all 32-bit elements)
  23507. *
  23508. * **Description**:\n
  23509. * For the `DSMALBT` instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
  23510. * content of the 32-bit elements of Rs2.
  23511. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
  23512. * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers
  23513. *
  23514. * **Operations**:\n
  23515. * ~~~
  23516. * Mres[0][31:0] = Rs1.W[0].H[0] * Rs2.W[0].H[1];
  23517. * Mres[1][31:0] = Rs1.W[1].H[0] * Rs2.W[1].H[1];
  23518. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  23519. * ~~~
  23520. *
  23521. * \param [in] t long long type of value stored in t
  23522. * \param [in] a unsigned long long type of value stored in a
  23523. * \param [in] b unsigned long long type of value stored in b
  23524. * \return value stored in long long type
  23525. */
  23526. __STATIC_FORCEINLINE long long __RV_DSMALBT(long long t, unsigned long long a, unsigned long long b)
  23527. {
  23528. __ASM volatile("dsmalbt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23529. return t;
  23530. }
  23531. /* ===== Inline Function End for DSMALBT ===== */
  23532. /* ===== Inline Function Start for DSMALTT ===== */
  23533. /**
  23534. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23535. * \brief DSMALTT (Signed Multiply Top Half & Add 64-bit)
  23536. * \details
  23537. * **Type**: SIMD
  23538. *
  23539. * **Syntax**:\n
  23540. * ~~~
  23541. * DSMALTT Rd, Rs1, Rs2
  23542. * ~~~
  23543. *
  23544. * **Purpose**:\n
  23545. * Multiply the signed 16-bit content of the 32-bit elements of a register with the 16-bit content of the corresponding 32-bit
  23546. * elements of another register and add the results with a 64-bit value of an even/odd pair of registers. The addition result
  23547. * is written back to the register-pair.
  23548. * * DSMALTT: DSMALTT rt pair + top*top (all 32-bit elements)
  23549. *
  23550. * **Description**:\n
  23551. * For the `DSMALTT` instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit
  23552. * content of the 32-bit elements of Rs2.
  23553. * The multiplication results are added with the 64-bit value of Rd. The 64-bit addition result is written back to Rd. The
  23554. * 16-bit values of Rs1 and Rs2, and the 64-bit value of Rd are treated as signed integers.
  23555. *
  23556. * **Operations**:\n
  23557. * ~~~
  23558. * Mres[0][31:0] = Rs1.W[0].H[1] * Rs2.W[0].H[1];
  23559. * Mres[1][31:0] = Rs1.W[1].H[1] * Rs2.W[1].H[1];
  23560. * Rd = Rd + SE64(Mres[0][31:0]) + SE64(Mres[1][31:0]);
  23561. * ~~~
  23562. *
  23563. * \param [in] t long long type of value stored in t
  23564. * \param [in] a unsigned long long type of value stored in a
  23565. * \param [in] b unsigned long long type of value stored in b
  23566. * \return value stored in long long type
  23567. */
  23568. __STATIC_FORCEINLINE long long __RV_DSMALTT(long long t, unsigned long long a, unsigned long long b)
  23569. {
  23570. __ASM volatile("dsmaltt %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23571. return t;
  23572. }
  23573. /* ===== Inline Function End for DSMALTT ===== */
  23574. /* ===== Inline Function Start for DKMABB32 ===== */
  23575. /**
  23576. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23577. * \brief DKMABB32 (Saturating Signed Multiply Bottom Words & Add)
  23578. * \details
  23579. * **Type**: SIMD
  23580. *
  23581. * **Syntax**:\n
  23582. * ~~~
  23583. * DKMABB32 Rd, Rs1, Rs2
  23584. * ~~~
  23585. *
  23586. * **Purpose**:\n
  23587. * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content
  23588. * of 64-bit data in the third register. The addition result may besaturated and is written to the third register.
  23589. * * DKMABB32: rd + bottom*bottom
  23590. *
  23591. * **Description**:\n
  23592. * For the `DKMABB32` instruction, it multiplies the bottom 32-bit element in Rs1 with the bottom 32-bit element in Rs2
  23593. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range
  23594. * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd.
  23595. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  23596. *
  23597. * **Operations**:\n
  23598. * ~~~
  23599. * res = Rd + (Rs1.W[0] * Rs2.W[0]);
  23600. * if (res > (2^63)-1) {
  23601. * res = (2^63)-1;
  23602. * OV = 1;
  23603. * } else if (res < -2^63) {
  23604. * res = -2^63;
  23605. * OV = 1;
  23606. * }
  23607. * Rd = res;
  23608. * ~~~
  23609. *
  23610. * \param [in] t long long type of value stored in t
  23611. * \param [in] a unsigned long long type of value stored in a
  23612. * \param [in] b unsigned long long type of value stored in b
  23613. * \return value stored in long long type
  23614. */
  23615. __STATIC_FORCEINLINE long long __RV_DKMABB32(long long t, unsigned long long a, unsigned long long b)
  23616. {
  23617. __ASM volatile("dkmabb32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23618. return t;
  23619. }
  23620. /* ===== Inline Function End for DKMABB32 ===== */
  23621. /* ===== Inline Function Start for DKMABT32 ===== */
  23622. /**
  23623. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23624. * \brief DKMABT32 (Saturating Signed Multiply Bottom & Top Words & Add)
  23625. * \details
  23626. * **Type**: SIMD
  23627. *
  23628. * **Syntax**:\n
  23629. * ~~~
  23630. * DKMABT32 Rd, Rs1, Rs2
  23631. * ~~~
  23632. *
  23633. * **Purpose**:\n
  23634. * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content
  23635. * of 64-bit data in the third register. The addition result may be saturated and is written to the third register.
  23636. * * DKMABT32: rd + bottom*top
  23637. *
  23638. * **Description**:\n
  23639. * For the `DKMABT32` instruction, it multiplies the bottom 32-bit element in Rs1 with the top 32-bit element in Rs2
  23640. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range
  23641. * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd.
  23642. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  23643. *
  23644. * **Operations**:\n
  23645. * ~~~
  23646. * res = Rd + (Rs1.W[0] * Rs2.W[1]);
  23647. * if (res > (2^63)-1) {
  23648. * res = (2^63)-1;
  23649. * OV = 1;
  23650. * } else if (res < -2^63) {
  23651. * res = -2^63;
  23652. * OV = 1;
  23653. * }
  23654. * Rd = res;
  23655. * ~~~
  23656. *
  23657. * \param [in] t long long type of value stored in t
  23658. * \param [in] a unsigned long long type of value stored in a
  23659. * \param [in] b unsigned long long type of value stored in b
  23660. * \return value stored in long long type
  23661. */
  23662. __STATIC_FORCEINLINE long long __RV_DKMABT32(long long t, unsigned long long a, unsigned long long b)
  23663. {
  23664. __ASM volatile("dkmabt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23665. return t;
  23666. }
  23667. /* ===== Inline Function End for DKMABT32 ===== */
  23668. /* ===== Inline Function Start for DKMATT32 ===== */
  23669. /**
  23670. * \ingroup NMSIS_Core_DSP_Intrinsic_NUCLEI_N3
  23671. * \brief DKMATT32 (Saturating Signed Multiply Bottom & Top Words & Add)
  23672. * \details
  23673. * **Type**: SIMD
  23674. *
  23675. * **Syntax**:\n
  23676. * ~~~
  23677. * DKMATT32 Rd, Rs1, Rs2
  23678. * ~~~
  23679. *
  23680. * **Purpose**:\n
  23681. * Multiply the signed 32-bit element in a register with the 32-bit element in another register and add the result to the content
  23682. * of 64-bit data in the third register. The addition result may be saturated and is written to the third register.
  23683. * * DKMATT32: rd + top*top
  23684. *
  23685. * **Description**:\n
  23686. * For the `DKMATT32` instruction, it multiplies the top 32-bit element in Rs1 with the top 32-bit element in Rs2
  23687. * The multiplication result is added to the content of 64-bit data in Rd. If the addition result is beyond the Q63 number range
  23688. * (-2^63 <= Q63 <= 2^63-1), it is saturated to the range and the OV bit is set to 1. The result after saturation is written to Rd.
  23689. * The 32-bit contents of Rs1 and Rs2 are treated as signed integers.
  23690. *
  23691. * **Operations**:\n
  23692. * ~~~
  23693. * res = Rd + (Rs1.W[1] * Rs2.W[1]);
  23694. * if (res > (2^63)-1) {
  23695. * res = (2^63)-1;
  23696. * OV = 1;
  23697. * } else if (res < -2^63) {
  23698. * res = -2^63;
  23699. * OV = 1;
  23700. * }
  23701. * Rd = res;
  23702. * ~~~
  23703. *
  23704. * \param [in] t long long type of value stored in t
  23705. * \param [in] a unsigned long long type of value stored in a
  23706. * \param [in] b unsigned long long type of value stored in b
  23707. * \return value stored in unsigned long long type
  23708. */
  23709. __STATIC_FORCEINLINE long long __RV_DKMATT32(long long t, unsigned long long a, unsigned long long b)
  23710. {
  23711. __ASM volatile("dkmatt32 %0, %1, %2" : "+r"(t) : "r"(a), "r"(b));
  23712. return t;
  23713. }
  23714. /* ===== Inline Function End for DKMATT32 ===== */
  23715. #endif /* __RISCV_XLEN == 32 */
  23716. #elif defined (__ICCRISCV__)
  23717. #if __riscv_xlen == 32
  23718. #include "iar_nds32_intrinsic.h"
  23719. #elif __riscv_xlen == 64
  23720. #include "iar_nds64_intrinsic.h"
  23721. #else
  23722. #error "Unexpected RISC-V XLEN size."
  23723. #endif /* __riscv_xlen == 32 */
  23724. #pragma language=save
  23725. #pragma language=extended
  23726. // Redefine those compatible instruction name supplied by IAR
  23727. #define __RV_CLROV __nds__clrov
  23728. #define __RV_RDOV __nds__rdov
  23729. #define __RV_ADD8 __nds__add8
  23730. #define __RV_SUB8 __nds__sub8
  23731. #define __RV_ADD16 __nds__add16
  23732. #define __RV_SUB16 __nds__sub16
  23733. #define __RV_ADD64 __nds__add64
  23734. #define __RV_SUB64 __nds__sub64
  23735. #define __RV_RADD8 __nds__radd8
  23736. #define __RV_RSUB8 __nds__rsub8
  23737. #define __RV_RADD16 __nds__radd16
  23738. #define __RV_RSUB16 __nds__rsub16
  23739. #define __RV_RADD64 __nds__radd64
  23740. #define __RV_RSUB64 __nds__rsub64
  23741. #define __RV_RADDW __nds__raddw
  23742. #define __RV_RSUBW __nds__rsubw
  23743. #define __RV_URADD8 __nds__uradd8
  23744. #define __RV_URSUB8 __nds__ursub8
  23745. #define __RV_URADD16 __nds__uradd16
  23746. #define __RV_URSUB16 __nds__ursub16
  23747. #define __RV_URADD64 __nds__uradd64
  23748. #define __RV_URSUB64 __nds__ursub64
  23749. #define __RV_URADDW __nds__uraddw
  23750. #define __RV_URSUBW __nds__ursubw
  23751. #define __RV_KADD8 __nds__kadd8
  23752. #define __RV_KSUB8 __nds__ksub8
  23753. #define __RV_KADD16 __nds__kadd16
  23754. #define __RV_KSUB16 __nds__ksub16
  23755. #define __RV_KADD64 __nds__kadd64
  23756. #define __RV_KSUB64 __nds__ksub64
  23757. #define __RV_KADDH __nds__kaddh
  23758. #define __RV_KSUBH __nds__ksubh
  23759. #define __RV_KADDW __nds__kaddw
  23760. #define __RV_KSUBW __nds__ksubw
  23761. #define __RV_UKADD8 __nds__ukadd8
  23762. #define __RV_UKSUB8 __nds__uksub8
  23763. #define __RV_UKADD16 __nds__ukadd16
  23764. #define __RV_UKSUB16 __nds__uksub16
  23765. #define __RV_UKADD64 __nds__ukadd64
  23766. #define __RV_UKSUB64 __nds__uksub64
  23767. #define __RV_UKADDH __nds__ukaddh
  23768. #define __RV_UKSUBH __nds__uksubh
  23769. #define __RV_UKADDW __nds__ukaddw
  23770. #define __RV_UKSUBW __nds__uksubw
  23771. #define __RV_CRAS16 __nds__cras16
  23772. #define __RV_CRSA16 __nds__crsa16
  23773. #define __RV_RCRAS16 __nds__rcras16
  23774. #define __RV_RCRSA16 __nds__rcrsa16
  23775. #define __RV_URCRAS16 __nds__urcras16
  23776. #define __RV_URCRSA16 __nds__urcrsa16
  23777. #define __RV_KCRAS16 __nds__kcras16
  23778. #define __RV_KCRSA16 __nds__kcrsa16
  23779. #define __RV_UKCRAS16 __nds__ukcras16
  23780. #define __RV_UKCRSA16 __nds__ukcrsa16
  23781. #define __RV_SRA8 __nds__sra8
  23782. #define __RV_SRAI8 __nds__sra8
  23783. #define __RV_SRA16 __nds__sra16
  23784. #define __RV_SRAI16 __nds__sra16
  23785. #define __RV_SRL8 __nds__srl8
  23786. #define __RV_SRL16 __nds__srl16
  23787. #define __RV_SLL8 __nds__sll8
  23788. #define __RV_SLL16 __nds__sll16
  23789. #define __RV_SRA_U __nds__sra_u
  23790. #define __RV_SRA8_U __nds__sra8_u
  23791. #define __RV_SRA16_U __nds__sra16_u
  23792. #define __RV_SRL8_U __nds__srl8_u
  23793. #define __RV_SRL16_U __nds__srl16_u
  23794. #define __RV_KSLL8 __nds__ksll8
  23795. #define __RV_KSLL16 __nds__ksll16
  23796. #define __RV_KSLLW __nds__ksllw
  23797. #define __RV_KSLRA8 __nds__kslra8
  23798. #define __RV_KSLRA8_U __nds__kslra8_u
  23799. #define __RV_KSLRA16 __nds__kslra16
  23800. #define __RV_KSLRA16_U __nds__kslra16_u
  23801. #define __RV_KSLRAW __nds__kslraw
  23802. #define __RV_KSLRAW_U __nds__kslraw_u
  23803. #define __RV_CMPEQ8 __nds__cmpeq8
  23804. #define __RV_CMPEQ16 __nds__cmpeq16
  23805. #define __RV_SCMPLE8 __nds__scmple8
  23806. #define __RV_SCMPLE16 __nds__scmple16
  23807. #define __RV_SCMPLT8 __nds__scmplt8
  23808. #define __RV_SCMPLT16 __nds__scmplt16
  23809. #define __RV_UCMPLE8 __nds__ucmple8
  23810. #define __RV_UCMPLE16 __nds__ucmple16
  23811. #define __RV_UCMPLT8 __nds__ucmplt8
  23812. #define __RV_UCMPLT16 __nds__ucmplt16
  23813. #define __RV_SMUL8 __nds__smul8
  23814. #define __RV_UMUL8 __nds__umul8
  23815. #define __RV_SMUL16 __nds__smul16
  23816. #define __RV_UMUL16 __nds__umul16
  23817. #define __RV_SMULX8 __nds__smulx8
  23818. #define __RV_UMULX8 __nds__umulx8
  23819. #define __RV_SMULX16 __nds__smulx16
  23820. #define __RV_UMULX16 __nds__umulx16
  23821. #define __RV_KHM8 __nds__khm8
  23822. #define __RV_KHMX8 __nds__khmx8
  23823. #define __RV_KHM16 __nds__khm16
  23824. #define __RV_KHMX16 __nds__khmx16
  23825. #define __RV_MULR64 __nds__mulr64
  23826. #define __RV_MULSR64 __nds__mulsr64
  23827. #define __RV_SMMUL __nds__smmul
  23828. #define __RV_SMMUL_U __nds__smmul_u
  23829. #define __RV_WEXT __nds__wext
  23830. #define __RV_SUNPKD810 __nds__sunpkd810
  23831. #define __RV_SUNPKD820 __nds__sunpkd820
  23832. #define __RV_SUNPKD830 __nds__sunpkd830
  23833. #define __RV_SUNPKD831 __nds__sunpkd831
  23834. #define __RV_SUNPKD832 __nds__sunpkd832
  23835. #define __RV_ZUNPKD810 __nds__zunpkd810
  23836. #define __RV_ZUNPKD820 __nds__zunpkd820
  23837. #define __RV_ZUNPKD830 __nds__zunpkd830
  23838. #define __RV_ZUNPKD831 __nds__zunpkd831
  23839. #define __RV_ZUNPKD832 __nds__zunpkd832
  23840. #define __RV_PKBB16 __nds__pkbb16
  23841. #define __RV_PKBT16 __nds__pkbt16
  23842. #define __RV_PKTT16 __nds__pktt16
  23843. #define __RV_PKTB16 __nds__pktb16
  23844. #define __RV_KMMAC __nds__kmmac
  23845. #define __RV_KMMAC_U __nds__kmmac_u
  23846. #define __RV_KMMSB __nds__kmmsb
  23847. #define __RV_KMMSB_U __nds__kmmsb_u
  23848. #define __RV_KWMMUL __nds__kwmmul
  23849. #define __RV_KWMMUL_U __nds__kwmmul_u
  23850. #define __RV_SMMWB __nds__smmwb
  23851. #define __RV_SMMWB_U __nds__smmwb_u
  23852. #define __RV_SMMWT __nds__smmwt
  23853. #define __RV_SMMWT_U __nds__smmwt_u
  23854. #define __RV_KMMAWB __nds__kmmawb
  23855. #define __RV_KMMAWB_U __nds__kmmawb_u
  23856. #define __RV_KMMAWT __nds__kmmawt
  23857. #define __RV_KMMAWT_U __nds__kmmawt_u
  23858. #define __RV_KMMWB2 __nds__kmmwb2
  23859. #define __RV_KMMWB2_U __nds__kmmwb2_u
  23860. #define __RV_KMMWT2 __nds__kmmwt2
  23861. #define __RV_KMMWT2_U __nds__kmmwt2_u
  23862. #define __RV_KMMAWB2 __nds__kmmawb2
  23863. #define __RV_KMMAWB2_U __nds__kmmawb2_u
  23864. #define __RV_KMMAWT2 __nds__kmmawt2
  23865. #define __RV_KMMAWT2_U __nds__kmmawt2_u
  23866. #define __RV_SMBB16 __nds__smbb16
  23867. #define __RV_SMBT16 __nds__smbt16
  23868. #define __RV_SMTT16 __nds__smtt16
  23869. #define __RV_KMDA __nds__kmda
  23870. #define __RV_KMXDA __nds__kmxda
  23871. #define __RV_SMDS __nds__smds
  23872. #define __RV_SMDRS __nds__smdrs
  23873. #define __RV_SMXDS __nds__smxds
  23874. #define __RV_KMABB __nds__kmabb
  23875. #define __RV_KMABT __nds__kmabt
  23876. #define __RV_KMATT __nds__kmatt
  23877. #define __RV_KMADA __nds__kmada
  23878. #define __RV_KMAXDA __nds__kmaxda
  23879. #define __RV_KMADS __nds__kmads
  23880. #define __RV_KMADRS __nds__kmadrs
  23881. #define __RV_KMAXDS __nds__kmaxds
  23882. #define __RV_KMSDA __nds__kmsda
  23883. #define __RV_KMSXDA __nds__kmsxda
  23884. #define __RV_SMAL __nds__smal
  23885. #define __RV_SMAQA __nds__smaqa
  23886. #define __RV_UMAQA __nds__umaqa
  23887. #define __RV_SMAQA_SU __nds__smaqa_su
  23888. #define __RV_SMAR64 __nds__smar64
  23889. #define __RV_SMSR64 __nds__smsr64
  23890. #define __RV_UMAR64 __nds__umar64
  23891. #define __RV_UMSR64 __nds__umsr64
  23892. #define __RV_KMAR64 __nds__kmar64
  23893. #define __RV_KMSR64 __nds__kmsr64
  23894. #define __RV_UKMAR64 __nds__ukmar64
  23895. #define __RV_UKMSR64 __nds__ukmsr64
  23896. #define __RV_SMALBB __nds__smalbb
  23897. #define __RV_SMALBT __nds__smalbt
  23898. #define __RV_SMALTT __nds__smaltt
  23899. #define __RV_SMALDA __nds__smalda
  23900. #define __RV_SMALXDA __nds__smalxda
  23901. #define __RV_SMALDS __nds__smalds
  23902. #define __RV_SMALDRS __nds__smaldrs
  23903. #define __RV_SMALXDS __nds__smalxds
  23904. #define __RV_SMSLDA __nds__smslda
  23905. #define __RV_SMSLXDA __nds__smslxda
  23906. #define __RV_MINW __nds__minw
  23907. #define __RV_MAXW __nds__maxw
  23908. #define __RV_SMIN8 __nds__smin8
  23909. #define __RV_SMAX8 __nds__smax8
  23910. #define __RV_SMIN16 __nds__smin16
  23911. #define __RV_SMAX16 __nds__smax16
  23912. #define __RV_UMIN8 __nds__umin8
  23913. #define __RV_UMAX8 __nds__umax8
  23914. #define __RV_UMIN16 __nds__umin16
  23915. #define __RV_UMAX16 __nds__umax16
  23916. #define __RV_KABS8 __nds__kabs8
  23917. #define __RV_KABS16 __nds__kabs16
  23918. #define __RV_KABSW __nds__kabsw
  23919. #define __RV_SCLIP8 __nds__sclip8
  23920. #define __RV_SCLIP16 __nds__sclip16
  23921. #define __RV_SCLIP32 __nds__sclip32
  23922. #define __RV_UCLIP8 __nds__uclip8
  23923. #define __RV_UCLIP16 __nds__uclip16
  23924. #define __RV_UCLIP32 __nds__uclip32
  23925. #define __RV_CLO8 __nds__clo8
  23926. #define __RV_CLO16 __nds__clo16
  23927. #define __RV_CLO32 __nds__clo32
  23928. #define __RV_CLZ8 __nds__clz8
  23929. #define __RV_CLZ16 __nds__clz16
  23930. #define __RV_CLZ32 __nds__clz32
  23931. #define __RV_CLRS8 __nds__clrs8
  23932. #define __RV_CLRS16 __nds__clrs16
  23933. #define __RV_CLRS32 __nds__clrs32
  23934. #define __RV_SWAP8 __nds__swap8
  23935. #define __RV_SWAP16 __nds__swap16
  23936. #define __RV_KHMBB __nds__khmbb
  23937. #define __RV_KHMBT __nds__khmbt
  23938. #define __RV_KHMTT __nds__khmtt
  23939. #define __RV_KDMBB __nds__kdmbb
  23940. #define __RV_KDMBT __nds__kdmbt
  23941. #define __RV_KDMTT __nds__kdmtt
  23942. #define __RV_KDMABB __nds__kdmabb
  23943. #define __RV_KDMABT __nds__kdmabt
  23944. #define __RV_KDMATT __nds__kdmatt
  23945. #define __RV_MADDR32 __nds__maddr32
  23946. #define __RV_MSUBR32 __nds__msubr32
  23947. #define __RV_PBSAD __nds__pbsad
  23948. #define __RV_PBSADA __nds__pbsada
  23949. #define __RV_AVE __nds__ave
  23950. #define __RV_BITREV __nds__bitrev
  23951. #define __RV_INSB __nds__insb
  23952. #if (__riscv_xlen == 64)
  23953. #define __RV_ADD32 __nds__add32
  23954. #define __RV_SUB32 __nds__sub32
  23955. #define __RV_RADD32 __nds__radd32
  23956. #define __RV_RSUB32 __nds__rsub32
  23957. #define __RV_URADD32 __nds__uradd32
  23958. #define __RV_URSUB32 __nds__ursub32
  23959. #define __RV_KADD32 __nds__kadd32
  23960. #define __RV_KSUB32 __nds__ksub32
  23961. #define __RV_UKADD32 __nds__ukadd32
  23962. #define __RV_UKSUB32 __nds__uksub32
  23963. #define __RV_CRAS32 __nds__cras32
  23964. #define __RV_CRSA32 __nds__crsa32
  23965. #define __RV_RCRAS32 __nds__rcras32
  23966. #define __RV_RCRSA32 __nds__rcrsa32
  23967. #define __RV_URCRAS32 __nds__urcras32
  23968. #define __RV_URCRSA32 __nds__urcrsa32
  23969. #define __RV_KCRAS32 __nds__kcras32
  23970. #define __RV_KCRSA32 __nds__kcrsa32
  23971. #define __RV_UKCRAS32 __nds__ukcras32
  23972. #define __RV_UKCRSA32 __nds__ukcrsa32
  23973. #define __RV_SRA32 __nds__sra32
  23974. #define __RV_SRAI32 __nds__sra32
  23975. #define __RV_SRL32 __nds__srl32
  23976. #define __RV_SLL32 __nds__sll32
  23977. #define __RV_SLLI32 __nds__sll32
  23978. #define __RV_SRAW_U __nds__sraw_u
  23979. #define __RV_SRA32_U __nds__sra32_u
  23980. #define __RV_SRL32_U __nds__srl32_u
  23981. #define __RV_KSLL32 __nds__ksll32
  23982. #define __RV_KSLRA32 __nds__kslra32
  23983. #define __RV_KSLRA32_U __nds__kslra32_u
  23984. #define __RV_SMBB32 __nds__smbb32
  23985. #define __RV_SMBT32 __nds__smbt32
  23986. #define __RV_SMTT32 __nds__smtt32
  23987. #define __RV_PKBB32 __nds__pkbb32
  23988. #define __RV_PKBT32 __nds__pkbt32
  23989. #define __RV_PKTT32 __nds__pktt32
  23990. #define __RV_PKTB32 __nds__pktb32
  23991. #define __RV_SMIN32 __nds__smin32
  23992. #define __RV_SMAX32 __nds__smax32
  23993. #define __RV_UMIN32 __nds__umin32
  23994. #define __RV_UMAX32 __nds__umax32
  23995. #define __RV_KABS32 __nds__kabs32
  23996. #define __RV_KHMBB16 __nds__khmbb16
  23997. #define __RV_KHMBT16 __nds__khmbt16
  23998. #define __RV_KHMTT16 __nds__khmtt16
  23999. #define __RV_KDMBB16 __nds__kdmbb16
  24000. #define __RV_KDMBT16 __nds__kdmbt16
  24001. #define __RV_KDMTT16 __nds__kdmtt16
  24002. #define __RV_KDMABB16 __nds__kdmabb16
  24003. #define __RV_KDMABT16 __nds__kdmabt16
  24004. #define __RV_KDMATT16 __nds__kdmatt16
  24005. #define __RV_KMABB32 __nds__kmabb32
  24006. #define __RV_KMABT32 __nds__kmabt32
  24007. #define __RV_KMATT32 __nds__kmatt32
  24008. #define __RV_KMDA32 __nds__kmda32
  24009. #define __RV_KMXDA32 __nds__kmxda32
  24010. #define __RV_KMADA32 __nds__kmada32
  24011. #define __RV_KMAXDA32 __nds__kmaxda32
  24012. #define __RV_KMADS32 __nds__kmads32
  24013. #define __RV_KMADRS32 __nds__kmadrs32
  24014. #define __RV_KMAXDS32 __nds__kmaxds32
  24015. #define __RV_KMSDA32 __nds__kmsda32
  24016. #define __RV_KMSXDA32 __nds__kmsxda32
  24017. #define __RV_SMDS32 __nds__smds32
  24018. #define __RV_SMDRS32 __nds__smdrs32
  24019. #define __RV_SMXDS32 __nds__smxds32
  24020. #endif /* __riscv_xlen == 64 */
  24021. // For now, the P-extention version of IAR IDE is 0.5.0, but Nuclei's supports 0.5.4
  24022. // so Nuclei supplies a workround to add custom instructions of those not natively
  24023. // supported by the IAR Assembler. Note that __RV_BPICK remains to be implemented in future.
  24024. // And we only implement Xxldsp Nuclei custom instruction set, bpick not implemented, expdxx
  24025. // implemented in c, not via .insn variant
  24026. #pragma inline=forced_no_body
  24027. unsigned long __RV_STAS16(unsigned long a, unsigned long b) {
  24028. unsigned long r;
  24029. __asm(".insn r 0x7F, 0x2, 0x7A, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24030. return r;
  24031. }
  24032. #pragma inline=forced_no_body
  24033. unsigned long __RV_RSTAS16(unsigned long a, unsigned long b) {
  24034. unsigned long r;
  24035. __asm(".insn r 0x7F, 0x2, 0x5A, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24036. return r;
  24037. }
  24038. #pragma inline=forced_no_body
  24039. unsigned long __RV_KSTAS16(unsigned long a, unsigned long b) {
  24040. unsigned long r;
  24041. __asm(".insn r 0x7F, 0x2, 0x62, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24042. return r;
  24043. }
  24044. #pragma inline=forced_no_body
  24045. unsigned long __RV_URSTAS16(unsigned long a, unsigned long b) {
  24046. unsigned long r;
  24047. __asm(".insn r 0x7F, 0x2, 0x6A, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24048. return r;
  24049. }
  24050. #pragma inline=forced_no_body
  24051. unsigned long __RV_UKSTAS16(unsigned long a, unsigned long b) {
  24052. unsigned long r;
  24053. __asm(".insn r 0x7F, 0x2, 0x72, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24054. return r;
  24055. }
  24056. #pragma inline=forced_no_body
  24057. unsigned long __RV_STSA16(unsigned long a, unsigned long b) {
  24058. unsigned long r;
  24059. __asm(".insn r 0x7F, 0x2, 0x7B, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24060. return r;
  24061. }
  24062. #pragma inline=forced_no_body
  24063. unsigned long __RV_RSTSA16(unsigned long a, unsigned long b) {
  24064. unsigned long r;
  24065. __asm(".insn r 0x7F, 0x2, 0x5B, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24066. return r;
  24067. }
  24068. #pragma inline=forced_no_body
  24069. unsigned long __RV_KSTSA16(unsigned long a, unsigned long b) {
  24070. unsigned long r;
  24071. __asm(".insn r 0x7F, 0x2, 0x63, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24072. return r;
  24073. }
  24074. #pragma inline=forced_no_body
  24075. unsigned long __RV_URSTSA16(unsigned long a, unsigned long b) {
  24076. unsigned long r;
  24077. __asm(".insn r 0x7F, 0x2, 0x6B, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24078. return r;
  24079. }
  24080. #pragma inline=forced_no_body
  24081. unsigned long __RV_UKSTSA16(unsigned long a, unsigned long b) {
  24082. unsigned long r;
  24083. __asm(".insn r 0x7F, 0x2, 0x73, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24084. return r;
  24085. }
  24086. // #pragma inline=forced_no_body
  24087. // unsigned long __RV_BPICK(unsigned long a, unsigned long b, unsigned long c) {
  24088. // TODO: remains to be done
  24089. // }
  24090. // RV64 only
  24091. #pragma inline=forced_no_body
  24092. unsigned long __RV_STAS32(unsigned long a, unsigned long b) {
  24093. unsigned long r;
  24094. __asm(".insn r 0x7F, 0x2, 0x78, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24095. return r;
  24096. }
  24097. #pragma inline=forced_no_body
  24098. unsigned long __RV_RSTAS32(unsigned long a, unsigned long b) {
  24099. unsigned long r;
  24100. __asm(".insn r 0x7F, 0x2, 0x58, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24101. return r;
  24102. }
  24103. #pragma inline=forced_no_body
  24104. unsigned long __RV_KSTAS32(unsigned long a, unsigned long b) {
  24105. unsigned long r;
  24106. __asm(".insn r 0x7F, 0x2, 0x60, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24107. return r;
  24108. }
  24109. #pragma inline=forced_no_body
  24110. unsigned long __RV_URSTAS32(unsigned long a, unsigned long b) {
  24111. unsigned long r;
  24112. __asm(".insn r 0x7F, 0x2, 0x68, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24113. return r;
  24114. }
  24115. #pragma inline=forced_no_body
  24116. unsigned long __RV_UKSTAS32(unsigned long a, unsigned long b) {
  24117. unsigned long r;
  24118. __asm(".insn r 0x7F, 0x2, 0x70, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24119. return r;
  24120. }
  24121. #pragma inline=forced_no_body
  24122. unsigned long __RV_STSA32(unsigned long a, unsigned long b) {
  24123. unsigned long r;
  24124. __asm(".insn r 0x7F, 0x2, 0x79, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24125. return r;
  24126. }
  24127. #pragma inline=forced_no_body
  24128. unsigned long __RV_RSTSA32(unsigned long a, unsigned long b) {
  24129. unsigned long r;
  24130. __asm(".insn r 0x7F, 0x2, 0x59, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24131. return r;
  24132. }
  24133. #pragma inline=forced_no_body
  24134. unsigned long __RV_KSTSA32(unsigned long a, unsigned long b) {
  24135. unsigned long r;
  24136. __asm(".insn r 0x7F, 0x2, 0x61, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24137. return r;
  24138. }
  24139. #pragma inline=forced_no_body
  24140. unsigned long __RV_URSTSA32(unsigned long a, unsigned long b) {
  24141. unsigned long r;
  24142. __asm(".insn r 0x7F, 0x2, 0x69, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24143. return r;
  24144. }
  24145. #pragma inline=forced_no_body
  24146. unsigned long __RV_UKSTSA32(unsigned long a, unsigned long b) {
  24147. unsigned long r;
  24148. __asm(".insn r 0x7F, 0x2, 0x71, %0,%1,%2":"=r"(r) : "r"(a), "r"(b) );
  24149. return r;
  24150. }
  24151. #pragma inline=forced_no_body
  24152. unsigned long __RV_EXPD80(unsigned long a)
  24153. {
  24154. return __EXPD_BYTE((uint8_t)(a & 0xff));
  24155. }
  24156. #pragma inline=forced_no_body
  24157. unsigned long __RV_EXPD81(unsigned long a)
  24158. {
  24159. return __EXPD_BYTE((uint8_t)((a >> 8) & 0xff));
  24160. }
  24161. #pragma inline=forced_no_body
  24162. unsigned long __RV_EXPD82(unsigned long a)
  24163. {
  24164. return __EXPD_BYTE((uint8_t)((a >> 16) & 0xff));
  24165. }
  24166. #pragma inline=forced_no_body
  24167. unsigned long __RV_EXPD83(unsigned long a)
  24168. {
  24169. return __EXPD_BYTE((uint8_t)((a >> 24) & 0xff));
  24170. }
  24171. #if __RISCV_XLEN == 64
  24172. // RV64 only
  24173. #pragma inline=forced_no_body
  24174. unsigned long __RV_EXPD84(unsigned long a)
  24175. {
  24176. return __EXPD_BYTE((uint8_t)((a >> 32) & 0xff));
  24177. }
  24178. #pragma inline=forced_no_body
  24179. unsigned long __RV_EXPD85(unsigned long a)
  24180. {
  24181. return __EXPD_BYTE((uint8_t)((a >> 40) & 0xff));
  24182. }
  24183. #pragma inline=forced_no_body
  24184. unsigned long __RV_EXPD86(unsigned long a)
  24185. {
  24186. return __EXPD_BYTE((uint8_t)((a >> 48) & 0xff));
  24187. }
  24188. #pragma inline=forced_no_body
  24189. unsigned long __RV_EXPD87(unsigned long a)
  24190. {
  24191. return __EXPD_BYTE((uint8_t)((a >> 56) & 0xff));
  24192. }
  24193. #endif
  24194. #pragma language=restore
  24195. #else
  24196. #error Unknown compiler
  24197. #endif /* __ICCRISCV__ */
  24198. /* XXXXX ARM Compatiable SIMD API XXXXX */
  24199. /** \brief Q setting quad 8-bit saturating addition. */
  24200. #define __QADD8(x, y) __RV_KADD8(x, y)
  24201. /** \brief Q setting quad 8-bit saturating subtract. */
  24202. #define __QSUB8(x, y) __RV_KSUB8((x), (y))
  24203. /** \brief Q setting dual 16-bit saturating addition. */
  24204. #define __QADD16(x, y) __RV_KADD16((x), (y))
  24205. /** \brief Dual 16-bit signed addition with halved results. */
  24206. #define __SHADD16(x, y) __RV_RADD16((x), (y))
  24207. /** \brief Q setting dual 16-bit saturating subtract. */
  24208. #define __QSUB16(x, y) __RV_KSUB16((x), (y))
  24209. /** \brief Dual 16-bit signed subtraction with halved results. */
  24210. #define __SHSUB16(x, y) __RV_RSUB16((x), (y))
  24211. /** \brief Q setting dual 16-bit add and subtract with exchange. */
  24212. #define __QASX(x, y) __RV_KCRAS16((x), (y))
  24213. /** \brief Dual 16-bit signed addition and subtraction with halved results.*/
  24214. #define __SHASX(x, y) __RV_RCRAS16((x), (y))
  24215. /** \brief Q setting dual 16-bit subtract and add with exchange. */
  24216. #define __QSAX(x, y) __RV_KCRSA16((x), (y))
  24217. /** \brief Dual 16-bit signed subtraction and addition with halved results.*/
  24218. #define __SHSAX(x, y) __RV_RCRSA16((x), (y))
  24219. /** \brief Dual 16-bit signed multiply with exchange returning difference. */
  24220. #define __SMUSDX(x, y) __RV_SMXDS((y), (x))
  24221. /** \brief Q setting sum of dual 16-bit signed multiply with exchange. */
  24222. __STATIC_FORCEINLINE long __SMUADX (unsigned long op1, unsigned long op2)
  24223. {
  24224. return __RV_KMXDA(op1, op2);
  24225. }
  24226. /** \brief Q setting saturating add. */
  24227. #define __QADD(x, y) __RV_KADDW((x), (y))
  24228. /** \brief Q setting saturating subtract. */
  24229. #define __QSUB(x, y) __RV_KSUBW((x), (y))
  24230. /** \brief Q setting dual 16-bit signed multiply with single 32-bit accumulator. */
  24231. __STATIC_FORCEINLINE long __SMLAD(unsigned long op1, unsigned long op2, long acc)
  24232. {
  24233. return __RV_KMADA(acc, op1, op2);
  24234. }
  24235. /** \brief Q setting pre-exchanged dual 16-bit signed multiply with single 32-bit accumulator. */
  24236. __STATIC_FORCEINLINE long __SMLADX(unsigned long op1, unsigned long op2, long acc)
  24237. {
  24238. return __RV_KMAXDA(acc, op1, op2);
  24239. }
  24240. /** \brief Q setting dual 16-bit signed multiply with exchange subtract with 32-bit accumulate. */
  24241. __STATIC_FORCEINLINE long __SMLSDX(unsigned long op1, unsigned long op2, long acc)
  24242. {
  24243. return (acc - __RV_SMXDS(op1, op2));
  24244. }
  24245. /** \brief Dual 16-bit signed multiply with single 64-bit accumulator. */
  24246. __STATIC_FORCEINLINE long long __SMLALD(unsigned long op1, unsigned long op2, long long acc)
  24247. {
  24248. return __RV_SMALDA(acc, op1, op2);
  24249. }
  24250. /** \brief Dual 16-bit signed multiply with exchange with single 64-bit accumulator. */
  24251. __STATIC_FORCEINLINE long long __SMLALDX(unsigned long op1, unsigned long op2, long long acc)
  24252. {
  24253. return __RV_SMALXDA(acc, op1, op2);
  24254. }
  24255. /** \brief Q setting sum of dual 16-bit signed multiply. */
  24256. __STATIC_FORCEINLINE long __SMUAD(unsigned long op1, unsigned long op2)
  24257. {
  24258. return __RV_KMDA(op1, op2);
  24259. }
  24260. /** \brief Dual 16-bit signed multiply returning difference. */
  24261. __STATIC_FORCEINLINE long __SMUSD(unsigned long op1, unsigned long op2)
  24262. {
  24263. return __RV_SMDRS(op1, op2);
  24264. }
  24265. /** \brief Dual extract 8-bits and sign extend each to 16-bits. */
  24266. #define __SXTB16(x) __RV_SUNPKD820(x)
  24267. /** \brief Dual extracted 8-bit to 16-bit signed addition. TODO Need test */
  24268. __STATIC_FORCEINLINE unsigned long __SXTAB16(unsigned long op1, unsigned long op2)
  24269. {
  24270. return __RV_ADD16(op1, __RV_SUNPKD820(op2));
  24271. }
  24272. #define __SXTAB16_RORn(ARG1, ARG2, ROTATE) __SXTAB16(ARG1, __ROR(ARG2, ROTATE))
  24273. /** \brief 32-bit signed multiply with 32-bit truncated accumulator. */
  24274. __STATIC_FORCEINLINE long __SMMLA(long op1, long op2, long acc)
  24275. {
  24276. long mul;
  24277. mul = __RV_SMMUL(op1, op2);
  24278. return (acc + mul);
  24279. }
  24280. #define __DKHM8 __RV_DKHM8
  24281. #define __DKHM16 __RV_DKHM16
  24282. #define __DKSUB16 __RV_DKSUB16
  24283. #define __SMAQA __RV_SMAQA
  24284. #define __MULSR64 __RV_MULSR64
  24285. #define __DQADD8 __RV_DKADD8
  24286. #define __DQSUB8 __RV_DKSUB8
  24287. #define __DKADD16 __RV_DKADD16
  24288. #define __PKBB16 __RV_PKBB16
  24289. #define __DKSLRA16 __RV_DKSLRA16
  24290. #define __DKSLRA8 __RV_DKSLRA8
  24291. #define __KABSW __RV_KABSW
  24292. #define __DKABS8 __RV_DKABS8
  24293. #define __DKABS16 __RV_DKABS16
  24294. #define __SMALDA __RV_SMALDA
  24295. #define __SMSLDA __RV_SMSLDA
  24296. #define __SMALBB __RV_SMALBB
  24297. #define __SUB64 __RV_SUB64
  24298. #define __ADD64 __RV_ADD64
  24299. #define __SMBB16 __RV_SMBB16
  24300. #define __SMBT16 __RV_SMBT16
  24301. #define __SMTT16 __RV_SMTT16
  24302. #define __EXPD80 __RV_EXPD80
  24303. #define __SMAX8 __RV_SMAX8
  24304. #define __SMAX16 __RV_SMAX16
  24305. #define __PKTT16 __RV_PKTT16
  24306. #define __KADD16 __RV_KADD16
  24307. #define __SADD16 __RV_ADD16
  24308. #define __SSUB8 __RV_KSUB8
  24309. #define __SADD8 __RV_KADD8
  24310. #define __USAT16 __RV_UCLIP16
  24311. #define __SMALTT __RV_SMALTT
  24312. /** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3. */
  24313. #define __PKHBT(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_PKTB16(ARG2, ARG1) : \
  24314. (ARG3 == 16) ? __RV_PKBB16(ARG2, ARG1) : \
  24315. (((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \
  24316. ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)))
  24317. /** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3. */
  24318. #define __PKHTB(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_PKTB16(ARG1, ARG2) : \
  24319. (ARG3 == 16) ? __RV_PKTT16(ARG1, ARG2) : \
  24320. (((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \
  24321. ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)))
  24322. #if __RISCV_XLEN == 64
  24323. /** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3,
  24324. and also combines the [47:32] of val1 with bits[63:48] of val2 with the val3, finally pack the two new 32-bits to 64-bit. */
  24325. #define __PKHBT64(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_PKTB16(ARG2, ARG1) : \
  24326. (ARG3 == 16) ? __RV_PKBB16(ARG2, ARG1) : \
  24327. ((int64_t)((((uint32_t)((uint64_t)ARG1 >> 32)) & 0x0000FFFFUL) | \
  24328. ((((uint32_t)((uint64_t)ARG2 >> 32)) << (ARG3)) & 0xFFFF0000UL)) << 32) | \
  24329. ((int64_t)(((((uint32_t)(ARG1))) & 0x0000FFFFUL) | \
  24330. ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)) & 0xFFFFFFFFUL))
  24331. /** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3,
  24332. and also combines bits [63:48] of val1 with bits [47:32] of val2 with the val3, finally pack the two new 32bits to 64bits. */
  24333. #define __PKHTB64(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_PKTB16(ARG1, ARG2) : \
  24334. (ARG3 == 16) ? __RV_PKTT16(ARG1, ARG2) : \
  24335. ((uint64_t)(((uint32_t)((uint64_t)ARG1 >> 32) & 0xFFFF0000UL) | \
  24336. ((((uint32_t)((uint64_t)ARG2 >> 32)) >> (ARG3)) & 0x0000FFFFUL)) << 32) | \
  24337. ((uint64_t)(((uint32_t)(ARG1) & 0xFFFF0000UL) | \
  24338. ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)) & 0xFFFFFFFFUL))
  24339. #else
  24340. /** \brief Halfword packing instruction. Combines bits[15:0] of val1 with bits[31:16] of val2 levitated with the val3,
  24341. and also combines the [47:32] of val1 with bits[63:48] of val2 with the val3, finally pack the two new 32-bits to 64-bit. */
  24342. #define __PKHBT64(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_DPKTB16(ARG2, ARG1) : \
  24343. (ARG3 == 16) ? __RV_DPKBB16(ARG2, ARG1) : \
  24344. ((int64_t)((((uint32_t)((uint64_t)ARG1 >> 32)) & 0x0000FFFFUL) | \
  24345. ((((uint32_t)((uint64_t)ARG2 >> 32)) << (ARG3)) & 0xFFFF0000UL)) << 32) | \
  24346. ((int64_t)(((((uint32_t)(ARG1))) & 0x0000FFFFUL) | \
  24347. ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL)) & 0xFFFFFFFFUL))
  24348. /** \brief Halfword packing instruction. Combines bits[31:16] of val1 with bits[15:0] of val2 right-shifted with the val3,
  24349. and also combines bits [63:48] of val1 with bits [47:32] of val2 with the val3, finally pack the two new 32bits to 64bits. */
  24350. #define __PKHTB64(ARG1, ARG2, ARG3) ((ARG3 == 0) ? __RV_DPKTB16(ARG1, ARG2) : \
  24351. (ARG3 == 16) ? __RV_DPKTT16(ARG1, ARG2) : \
  24352. ((uint64_t)(((uint32_t)((uint64_t)ARG1 >> 32) & 0xFFFF0000UL) | \
  24353. ((((uint32_t)((uint64_t)ARG2 >> 32)) >> (ARG3)) & 0x0000FFFFUL)) << 32) | \
  24354. ((uint64_t)(((uint32_t)(ARG1) & 0xFFFF0000UL) | \
  24355. ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL)) & 0xFFFFFFFFUL))
  24356. #endif /* __RISCV_XLEN == 64 */
  24357. /** first rotate then extract. This is more suitable for arm compiler for it can rotate and extract in one command*/
  24358. #define __SXTB16_RORn(ARG1, ARG2) __RV_SUNPKD820(__ROR(ARG1, ARG2))
  24359. #endif /* defined(__DSP_PRESENT) && (__DSP_PRESENT == 1) */
  24360. #ifdef __cplusplus
  24361. }
  24362. #endif
  24363. #endif /* __CORE_FEATURE_DSP__ */