none.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. /******************************************************************************
  2. * @file none.h
  3. * @brief Intrinsincs when no DSP extension available
  4. * @version V1.9.0
  5. * @date 20. July 2020
  6. ******************************************************************************/
  7. /*
  8. * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
  9. * Copyright (c) 2019 Nuclei Limited. All rights reserved.
  10. *
  11. * SPDX-License-Identifier: Apache-2.0
  12. *
  13. * Licensed under the Apache License, Version 2.0 (the License); you may
  14. * not use this file except in compliance with the License.
  15. * You may obtain a copy of the License at
  16. *
  17. * www.apache.org/licenses/LICENSE-2.0
  18. *
  19. * Unless required by applicable law or agreed to in writing, software
  20. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  21. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  22. * See the License for the specific language governing permissions and
  23. * limitations under the License.
  24. */
  25. /*
  26. Definitions in this file are allowing to reuse some versions of the
  27. NMSIS-DSP to build on a core (M0 for instance) or a host where
  28. DSP extension are not available.
  29. Ideally a pure C version should have been used instead.
  30. But those are not always available or use a restricted set
  31. of intrinsics.
  32. */
  33. #ifndef NONE_H_
  34. #define NONE_H_
  35. #include "riscv_math_types.h"
  36. #ifdef __cplusplus
  37. extern "C"
  38. {
  39. #endif
  40. /*
  41. Normally those kind of definitions are in a compiler file
  42. in Core or Core_A.
  43. But for MSVC compiler it is a bit special. The goal is very specific
  44. to NMSIS-DSP and only to allow the use of this library from other
  45. systems like Python or Matlab.
  46. MSVC is not going to be used to cross-compile to RISCV. So, having a MSVC
  47. compiler file in Core or Core_A would not make sense.
  48. */
  49. #if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) || defined(__APPLE_CC__)
  50. __STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data)
  51. {
  52. if (data == 0U) { return 32U; }
  53. uint32_t count = 0U;
  54. uint32_t mask = 0x80000000U;
  55. while ((data & mask) == 0U)
  56. {
  57. count += 1U;
  58. mask = mask >> 1U;
  59. }
  60. return count;
  61. }
  62. __STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
  63. {
  64. if ((sat >= 1U) && (sat <= 32U))
  65. {
  66. const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
  67. const int32_t min = -1 - max ;
  68. if (val > max)
  69. {
  70. return max;
  71. }
  72. else if (val < min)
  73. {
  74. return min;
  75. }
  76. }
  77. return val;
  78. }
  79. __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
  80. {
  81. if (sat <= 31U)
  82. {
  83. const uint32_t max = ((1U << sat) - 1U);
  84. if (val > (int32_t)max)
  85. {
  86. return max;
  87. }
  88. else if (val < 0)
  89. {
  90. return 0U;
  91. }
  92. }
  93. return (uint32_t)val;
  94. }
  95. /**
  96. \brief Rotate Right in unsigned value (32 bit)
  97. \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
  98. \param [in] op1 Value to rotate
  99. \param [in] op2 Number of Bits to rotate
  100. \return Rotated value
  101. */
  102. __STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
  103. {
  104. op2 %= 32U;
  105. if (op2 == 0U)
  106. {
  107. return op1;
  108. }
  109. return (op1 >> op2) | (op1 << (32U - op2));
  110. }
  111. #endif
  112. /**
  113. * @brief Clips Q63 to Q31 values.
  114. */
  115. __STATIC_FORCEINLINE q31_t clip_q63_to_q31(
  116. q63_t x)
  117. {
  118. return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
  119. ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
  120. }
  121. /**
  122. * @brief Clips Q63 to Q15 values.
  123. */
  124. __STATIC_FORCEINLINE q15_t clip_q63_to_q15(
  125. q63_t x)
  126. {
  127. return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
  128. ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
  129. }
  130. /**
  131. * @brief Clips Q31 to Q7 values.
  132. */
  133. __STATIC_FORCEINLINE q7_t clip_q31_to_q7(
  134. q31_t x)
  135. {
  136. return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
  137. ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
  138. }
  139. /**
  140. * @brief Clips Q31 to Q15 values.
  141. */
  142. __STATIC_FORCEINLINE q15_t clip_q31_to_q15(
  143. q31_t x)
  144. {
  145. return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
  146. ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
  147. }
  148. /**
  149. * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
  150. */
  151. __STATIC_FORCEINLINE q63_t mult32x64(
  152. q63_t x,
  153. q31_t y)
  154. {
  155. return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
  156. (((q63_t) (x >> 32) * y) ) );
  157. }
  158. /* SMMLAR */
  159. #define multAcc_32x32_keep32_R(a, x, y) \
  160. a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
  161. /* SMMLSR */
  162. #define multSub_32x32_keep32_R(a, x, y) \
  163. a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
  164. /* SMMULR */
  165. #define mult_32x32_keep32_R(a, x, y) \
  166. a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
  167. /* SMMLA */
  168. #define multAcc_32x32_keep32(a, x, y) \
  169. a += (q31_t) (((q63_t) x * y) >> 32)
  170. /* SMMLS */
  171. #define multSub_32x32_keep32(a, x, y) \
  172. a -= (q31_t) (((q63_t) x * y) >> 32)
  173. /* SMMUL */
  174. #define mult_32x32_keep32(a, x, y) \
  175. a = (q31_t) (((q63_t) x * y ) >> 32)
  176. #ifndef RISCV_MATH_DSP
  177. /**
  178. * @brief definition to pack two 16 bit values.
  179. */
  180. #define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0x0000FFFF) | \
  181. (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000) )
  182. #define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0xFFFF0000) | \
  183. (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF) )
  184. #endif
  185. /**
  186. * @brief definition to pack four 8 bit values.
  187. */
  188. #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) << 0) & (int32_t)0x000000FF) | \
  189. (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \
  190. (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \
  191. (((int32_t)(v3) << 24) & (int32_t)0xFF000000) )
  192. /*
  193. * @brief C custom defined intrinsic functions
  194. */
  195. #if !defined (RISCV_MATH_DSP)
  196. /*
  197. * @brief C custom defined QADD8
  198. */
  199. __STATIC_FORCEINLINE uint32_t __QADD8(
  200. uint32_t x,
  201. uint32_t y)
  202. {
  203. q31_t r, s, t, u;
  204. r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
  205. s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
  206. t = __SSAT(((((q31_t)x << 8) >> 24) + (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF;
  207. u = __SSAT(((((q31_t)x ) >> 24) + (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF;
  208. return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r )));
  209. }
  210. /*
  211. * @brief C custom defined QSUB8
  212. */
  213. __STATIC_FORCEINLINE uint32_t __QSUB8(
  214. uint32_t x,
  215. uint32_t y)
  216. {
  217. q31_t r, s, t, u;
  218. r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
  219. s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
  220. t = __SSAT(((((q31_t)x << 8) >> 24) - (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF;
  221. u = __SSAT(((((q31_t)x ) >> 24) - (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF;
  222. return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r )));
  223. }
  224. /*
  225. * @brief C custom defined QADD16
  226. */
  227. __STATIC_FORCEINLINE uint32_t __QADD16(
  228. uint32_t x,
  229. uint32_t y)
  230. {
  231. /* q31_t r, s; without initialisation 'riscv_offset_q15 test' fails but 'intrinsic' tests pass! */
  232. q31_t r = 0, s = 0;
  233. r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
  234. s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
  235. return ((uint32_t)((s << 16) | (r )));
  236. }
  237. /*
  238. * @brief C custom defined SHADD16
  239. */
  240. __STATIC_FORCEINLINE uint32_t __SHADD16(
  241. uint32_t x,
  242. uint32_t y)
  243. {
  244. q31_t r, s;
  245. r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  246. s = (((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  247. return ((uint32_t)((s << 16) | (r )));
  248. }
  249. /*
  250. * @brief C custom defined QSUB16
  251. */
  252. __STATIC_FORCEINLINE uint32_t __QSUB16(
  253. uint32_t x,
  254. uint32_t y)
  255. {
  256. q31_t r, s;
  257. r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
  258. s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
  259. return ((uint32_t)((s << 16) | (r )));
  260. }
  261. /*
  262. * @brief C custom defined SHSUB16
  263. */
  264. __STATIC_FORCEINLINE uint32_t __SHSUB16(
  265. uint32_t x,
  266. uint32_t y)
  267. {
  268. q31_t r, s;
  269. r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  270. s = (((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  271. return ((uint32_t)((s << 16) | (r )));
  272. }
  273. /*
  274. * @brief C custom defined QASX
  275. */
  276. __STATIC_FORCEINLINE uint32_t __QASX(
  277. uint32_t x,
  278. uint32_t y)
  279. {
  280. q31_t r, s;
  281. r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
  282. s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
  283. return ((uint32_t)((s << 16) | (r )));
  284. }
  285. /*
  286. * @brief C custom defined SHASX
  287. */
  288. __STATIC_FORCEINLINE uint32_t __SHASX(
  289. uint32_t x,
  290. uint32_t y)
  291. {
  292. q31_t r, s;
  293. r = (((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  294. s = (((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  295. return ((uint32_t)((s << 16) | (r )));
  296. }
  297. /*
  298. * @brief C custom defined QSAX
  299. */
  300. __STATIC_FORCEINLINE uint32_t __QSAX(
  301. uint32_t x,
  302. uint32_t y)
  303. {
  304. q31_t r, s;
  305. r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF;
  306. s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
  307. return ((uint32_t)((s << 16) | (r )));
  308. }
  309. /*
  310. * @brief C custom defined SHSAX
  311. */
  312. __STATIC_FORCEINLINE uint32_t __SHSAX(
  313. uint32_t x,
  314. uint32_t y)
  315. {
  316. q31_t r, s;
  317. r = (((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  318. s = (((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
  319. return ((uint32_t)((s << 16) | (r )));
  320. }
  321. /*
  322. * @brief C custom defined SMUSDX
  323. */
  324. __STATIC_FORCEINLINE uint32_t __SMUSDX(
  325. uint32_t x,
  326. uint32_t y)
  327. {
  328. return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) -
  329. ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) ));
  330. }
  331. /*
  332. * @brief C custom defined SMUADX
  333. */
  334. __STATIC_FORCEINLINE uint32_t __SMUADX(
  335. uint32_t x,
  336. uint32_t y)
  337. {
  338. return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) +
  339. ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) ));
  340. }
  341. /*
  342. * @brief C custom defined QADD
  343. */
  344. __STATIC_FORCEINLINE int32_t __QADD(
  345. int32_t x,
  346. int32_t y)
  347. {
  348. return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y)));
  349. }
  350. /*
  351. * @brief C custom defined QSUB
  352. */
  353. __STATIC_FORCEINLINE int32_t __QSUB(
  354. int32_t x,
  355. int32_t y)
  356. {
  357. return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y)));
  358. }
  359. /*
  360. * @brief C custom defined SMLAD
  361. */
  362. __STATIC_FORCEINLINE uint32_t __SMLAD(
  363. uint32_t x,
  364. uint32_t y,
  365. uint32_t sum)
  366. {
  367. return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
  368. ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) +
  369. ( ((q31_t)sum ) ) ));
  370. }
  371. /*
  372. * @brief C custom defined SMLADX
  373. */
  374. __STATIC_FORCEINLINE uint32_t __SMLADX(
  375. uint32_t x,
  376. uint32_t y,
  377. uint32_t sum)
  378. {
  379. return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) +
  380. ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) +
  381. ( ((q31_t)sum ) ) ));
  382. }
  383. /*
  384. * @brief C custom defined SMLSDX
  385. */
  386. __STATIC_FORCEINLINE uint32_t __SMLSDX(
  387. uint32_t x,
  388. uint32_t y,
  389. uint32_t sum)
  390. {
  391. return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) -
  392. ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) +
  393. ( ((q31_t)sum ) ) ));
  394. }
  395. /*
  396. * @brief C custom defined SMLALD
  397. */
  398. __STATIC_FORCEINLINE uint64_t __SMLALD(
  399. uint32_t x,
  400. uint32_t y,
  401. uint64_t sum)
  402. {
  403. /* return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */
  404. return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
  405. ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) +
  406. ( ((q63_t)sum ) ) ));
  407. }
  408. /*
  409. * @brief C custom defined SMLALDX
  410. */
  411. __STATIC_FORCEINLINE uint64_t __SMLALDX(
  412. uint32_t x,
  413. uint32_t y,
  414. uint64_t sum)
  415. {
  416. /* return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */
  417. return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) +
  418. ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) +
  419. ( ((q63_t)sum ) ) ));
  420. }
  421. /*
  422. * @brief C custom defined SMUAD
  423. */
  424. __STATIC_FORCEINLINE uint32_t __SMUAD(
  425. uint32_t x,
  426. uint32_t y)
  427. {
  428. return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
  429. ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) ));
  430. }
  431. /*
  432. * @brief C custom defined SMUSD
  433. */
  434. __STATIC_FORCEINLINE uint32_t __SMUSD(
  435. uint32_t x,
  436. uint32_t y)
  437. {
  438. return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) -
  439. ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) ));
  440. }
  441. /*
  442. * @brief C custom defined SXTB16
  443. */
  444. __STATIC_FORCEINLINE uint32_t __SXTB16(
  445. uint32_t x)
  446. {
  447. return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) |
  448. ((((q31_t)x << 8) >> 8) & (q31_t)0xFFFF0000) ));
  449. }
  450. /*
  451. * @brief C custom defined SMMLA
  452. */
  453. __STATIC_FORCEINLINE int32_t __SMMLA(
  454. int32_t x,
  455. int32_t y,
  456. int32_t sum)
  457. {
  458. return (sum + (int32_t) (((int64_t) x * y) >> 32));
  459. }
  460. #endif /* !defined (RISCV_MATH_DSP) */
  461. #if !(defined (RISCV_MATH_DSP) && defined(NUCLEI_DSP_DEFAULT))
  462. /* __EXPD_BYTE defined in core_compatiable.h from NMSIS 1.2.0 */
  463. #define __EXPD80(x) __EXPD_BYTE((uint8_t)((x) & 0xff))
  464. #endif /* !(defined (RISCV_MATH_DSP) && defined(NUCLEI_DSP_DEFAULT)) */
  465. #ifdef __cplusplus
  466. }
  467. #endif
  468. #endif /* ifndef _TRANSFORM_FUNCTIONS_H_ */