arm_cfft_radix4_f16.c 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_cfft_radix4_f16.c
  4. * Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
  5. *
  6. * $Date: 23 April 2021
  7. * $Revision: V1.9.0
  8. *
  9. * Target Processor: Cortex-M and Cortex-A cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/transform_functions_f16.h"
  29. #if defined(ARM_FLOAT16_SUPPORTED)
  30. extern void arm_bitreversal_f16(
  31. float16_t * pSrc,
  32. uint16_t fftSize,
  33. uint16_t bitRevFactor,
  34. const uint16_t * pBitRevTab);
  35. void arm_radix4_butterfly_f16(
  36. float16_t * pSrc,
  37. uint16_t fftLen,
  38. const float16_t * pCoef,
  39. uint16_t twidCoefModifier);
  40. void arm_radix4_butterfly_inverse_f16(
  41. float16_t * pSrc,
  42. uint16_t fftLen,
  43. const float16_t * pCoef,
  44. uint16_t twidCoefModifier,
  45. float16_t onebyfftLen);
  46. void arm_cfft_radix4by2_f16(
  47. float16_t * pSrc,
  48. uint32_t fftLen,
  49. const float16_t * pCoef);
  50. /**
  51. @addtogroup ComplexFFTDeprecated
  52. @{
  53. */
  54. /*
  55. * @brief Core function for the floating-point CFFT butterfly process.
  56. * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
  57. * @param[in] fftLen length of the FFT.
  58. * @param[in] *pCoef points to the twiddle coefficient buffer.
  59. * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  60. */
  61. void arm_cfft_radix4by2_f16(
  62. float16_t * pSrc,
  63. uint32_t fftLen,
  64. const float16_t * pCoef)
  65. {
  66. uint32_t i, l;
  67. uint32_t n2, ia;
  68. float16_t xt, yt, cosVal, sinVal;
  69. float16_t p0, p1,p2,p3,a0,a1;
  70. n2 = fftLen >> 1;
  71. ia = 0;
  72. for (i = 0; i < n2; i++)
  73. {
  74. cosVal = pCoef[2*ia];
  75. sinVal = pCoef[2*ia + 1];
  76. ia++;
  77. l = i + n2;
  78. /* Butterfly implementation */
  79. a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
  80. xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
  81. yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
  82. a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
  83. p0 = (_Float16)xt * (_Float16)cosVal;
  84. p1 = (_Float16)yt * (_Float16)sinVal;
  85. p2 = (_Float16)yt * (_Float16)cosVal;
  86. p3 = (_Float16)xt * (_Float16)sinVal;
  87. pSrc[2 * i] = a0;
  88. pSrc[2 * i + 1] = a1;
  89. pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
  90. pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
  91. }
  92. // first col
  93. arm_radix4_butterfly_f16( pSrc, n2, (float16_t*)pCoef, 2U);
  94. // second col
  95. arm_radix4_butterfly_f16( pSrc + fftLen, n2, (float16_t*)pCoef, 2U);
  96. }
  97. /**
  98. @brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
  99. @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f16 and will be removed in the future.
  100. @param[in] S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
  101. @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
  102. */
  103. void arm_cfft_radix4_f16(
  104. const arm_cfft_radix4_instance_f16 * S,
  105. float16_t * pSrc)
  106. {
  107. if (S->ifftFlag == 1U)
  108. {
  109. /* Complex IFFT radix-4 */
  110. arm_radix4_butterfly_inverse_f16(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
  111. }
  112. else
  113. {
  114. /* Complex FFT radix-4 */
  115. arm_radix4_butterfly_f16(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
  116. }
  117. if (S->bitReverseFlag == 1U)
  118. {
  119. /* Bit Reversal */
  120. arm_bitreversal_f16(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
  121. }
  122. }
  123. /**
  124. @} end of ComplexFFTDeprecated group
  125. */
  126. /* ----------------------------------------------------------------------
  127. * Internal helper function used by the FFTs
  128. * ---------------------------------------------------------------------- */
  129. /*
  130. * @brief Core function for the floating-point CFFT butterfly process.
  131. * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
  132. * @param[in] fftLen length of the FFT.
  133. * @param[in] *pCoef points to the twiddle coefficient buffer.
  134. * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  135. */
  136. void arm_radix4_butterfly_f16(
  137. float16_t * pSrc,
  138. uint16_t fftLen,
  139. const float16_t * pCoef,
  140. uint16_t twidCoefModifier)
  141. {
  142. float16_t co1, co2, co3, si1, si2, si3;
  143. uint32_t ia1, ia2, ia3;
  144. uint32_t i0, i1, i2, i3;
  145. uint32_t n1, n2, j, k;
  146. #if defined (ARM_MATH_DSP)
  147. /* Run the below code for Cortex-M4 and Cortex-M3 */
  148. float16_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
  149. float16_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
  150. Ybminusd;
  151. float16_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
  152. float16_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
  153. float16_t *ptr1;
  154. float16_t p0,p1,p2,p3,p4,p5;
  155. float16_t a0,a1,a2,a3,a4,a5,a6,a7;
  156. /* Initializations for the first stage */
  157. n2 = fftLen;
  158. n1 = n2;
  159. /* n2 = fftLen/4 */
  160. n2 >>= 2U;
  161. i0 = 0U;
  162. ia1 = 0U;
  163. j = n2;
  164. /* Calculation of first stage */
  165. do
  166. {
  167. /* index calculation for the input as, */
  168. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  169. i1 = i0 + n2;
  170. i2 = i1 + n2;
  171. i3 = i2 + n2;
  172. xaIn = pSrc[(2U * i0)];
  173. yaIn = pSrc[(2U * i0) + 1U];
  174. xbIn = pSrc[(2U * i1)];
  175. ybIn = pSrc[(2U * i1) + 1U];
  176. xcIn = pSrc[(2U * i2)];
  177. ycIn = pSrc[(2U * i2) + 1U];
  178. xdIn = pSrc[(2U * i3)];
  179. ydIn = pSrc[(2U * i3) + 1U];
  180. /* xa + xc */
  181. Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
  182. /* xb + xd */
  183. Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
  184. /* ya + yc */
  185. Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
  186. /* yb + yd */
  187. Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
  188. /* index calculation for the coefficients */
  189. ia2 = ia1 + ia1;
  190. co2 = pCoef[ia2 * 2U];
  191. si2 = pCoef[(ia2 * 2U) + 1U];
  192. /* xa - xc */
  193. Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
  194. /* xb - xd */
  195. Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
  196. /* ya - yc */
  197. Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
  198. /* yb - yd */
  199. Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
  200. /* xa' = xa + xb + xc + xd */
  201. pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
  202. /* ya' = ya + yb + yc + yd */
  203. pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
  204. /* (xa - xc) + (yb - yd) */
  205. Xb12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
  206. /* (ya - yc) + (xb - xd) */
  207. Yb12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
  208. /* (xa + xc) - (xb + xd) */
  209. Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
  210. /* (ya + yc) - (yb + yd) */
  211. Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
  212. /* (xa - xc) - (yb - yd) */
  213. Xd12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
  214. /* (ya - yc) + (xb - xd) */
  215. Yd12C_out = ((_Float16)Xbminusd + (_Float16)Yaminusc);
  216. co1 = pCoef[ia1 * 2U];
  217. si1 = pCoef[(ia1 * 2U) + 1U];
  218. /* index calculation for the coefficients */
  219. ia3 = ia2 + ia1;
  220. co3 = pCoef[ia3 * 2U];
  221. si3 = pCoef[(ia3 * 2U) + 1U];
  222. Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
  223. Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
  224. Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
  225. Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
  226. Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
  227. Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
  228. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  229. //Xb12_out -= Yb12C_out * si1;
  230. p0 = (_Float16)Yb12C_out * (_Float16)si1;
  231. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  232. //Yb12_out += Xb12C_out * si1;
  233. p1 = (_Float16)Xb12C_out * (_Float16)si1;
  234. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  235. //Xc12_out -= Yc12C_out * si2;
  236. p2 = (_Float16)Yc12C_out * (_Float16)si2;
  237. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  238. //Yc12_out += Xc12C_out * si2;
  239. p3 = (_Float16)Xc12C_out * (_Float16)si2;
  240. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  241. //Xd12_out -= Yd12C_out * si3;
  242. p4 = (_Float16)Yd12C_out * (_Float16)si3;
  243. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  244. //Yd12_out += Xd12C_out * si3;
  245. p5 = (_Float16)Xd12C_out * (_Float16)si3;
  246. Xb12_out += (_Float16)p0;
  247. Yb12_out -= (_Float16)p1;
  248. Xc12_out += (_Float16)p2;
  249. Yc12_out -= (_Float16)p3;
  250. Xd12_out += (_Float16)p4;
  251. Yd12_out -= (_Float16)p5;
  252. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  253. pSrc[2U * i1] = Xc12_out;
  254. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  255. pSrc[(2U * i1) + 1U] = Yc12_out;
  256. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  257. pSrc[2U * i2] = Xb12_out;
  258. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  259. pSrc[(2U * i2) + 1U] = Yb12_out;
  260. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  261. pSrc[2U * i3] = Xd12_out;
  262. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  263. pSrc[(2U * i3) + 1U] = Yd12_out;
  264. /* Twiddle coefficients index modifier */
  265. ia1 += twidCoefModifier;
  266. /* Updating input index */
  267. i0++;
  268. }
  269. while (--j);
  270. twidCoefModifier <<= 2U;
  271. /* Calculation of second stage to excluding last stage */
  272. for (k = fftLen >> 2U; k > 4U; k >>= 2U)
  273. {
  274. /* Initializations for the first stage */
  275. n1 = n2;
  276. n2 >>= 2U;
  277. ia1 = 0U;
  278. /* Calculation of first stage */
  279. j = 0;
  280. do
  281. {
  282. /* index calculation for the coefficients */
  283. ia2 = ia1 + ia1;
  284. ia3 = ia2 + ia1;
  285. co1 = pCoef[ia1 * 2U];
  286. si1 = pCoef[(ia1 * 2U) + 1U];
  287. co2 = pCoef[ia2 * 2U];
  288. si2 = pCoef[(ia2 * 2U) + 1U];
  289. co3 = pCoef[ia3 * 2U];
  290. si3 = pCoef[(ia3 * 2U) + 1U];
  291. /* Twiddle coefficients index modifier */
  292. ia1 += twidCoefModifier;
  293. i0 = j;
  294. do
  295. {
  296. /* index calculation for the input as, */
  297. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  298. i1 = i0 + n2;
  299. i2 = i1 + n2;
  300. i3 = i2 + n2;
  301. xaIn = pSrc[(2U * i0)];
  302. yaIn = pSrc[(2U * i0) + 1U];
  303. xbIn = pSrc[(2U * i1)];
  304. ybIn = pSrc[(2U * i1) + 1U];
  305. xcIn = pSrc[(2U * i2)];
  306. ycIn = pSrc[(2U * i2) + 1U];
  307. xdIn = pSrc[(2U * i3)];
  308. ydIn = pSrc[(2U * i3) + 1U];
  309. /* xa - xc */
  310. Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
  311. /* (xb - xd) */
  312. Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
  313. /* ya - yc */
  314. Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
  315. /* (yb - yd) */
  316. Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
  317. /* xa + xc */
  318. Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
  319. /* xb + xd */
  320. Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
  321. /* ya + yc */
  322. Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
  323. /* yb + yd */
  324. Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
  325. /* (xa - xc) + (yb - yd) */
  326. Xb12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
  327. /* (ya - yc) - (xb - xd) */
  328. Yb12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
  329. /* xa + xc -(xb + xd) */
  330. Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
  331. /* (ya + yc) - (yb + yd) */
  332. Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
  333. /* (xa - xc) - (yb - yd) */
  334. Xd12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
  335. /* (ya - yc) + (xb - xd) */
  336. Yd12C_out = ((_Float16)Xbminusd + (_Float16)Yaminusc);
  337. pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
  338. pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
  339. Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
  340. Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
  341. Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
  342. Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
  343. Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
  344. Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
  345. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  346. //Xb12_out -= Yb12C_out * si1;
  347. p0 = (_Float16)Yb12C_out * (_Float16)si1;
  348. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  349. //Yb12_out += Xb12C_out * si1;
  350. p1 = (_Float16)Xb12C_out * (_Float16)si1;
  351. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  352. //Xc12_out -= Yc12C_out * si2;
  353. p2 = (_Float16)Yc12C_out * (_Float16)si2;
  354. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  355. //Yc12_out += Xc12C_out * si2;
  356. p3 = (_Float16)Xc12C_out * (_Float16)si2;
  357. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  358. //Xd12_out -= Yd12C_out * si3;
  359. p4 = (_Float16)Yd12C_out * (_Float16)si3;
  360. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  361. //Yd12_out += Xd12C_out * si3;
  362. p5 = (_Float16)Xd12C_out * (_Float16)si3;
  363. Xb12_out += (_Float16)p0;
  364. Yb12_out -= (_Float16)p1;
  365. Xc12_out += (_Float16)p2;
  366. Yc12_out -= (_Float16)p3;
  367. Xd12_out += (_Float16)p4;
  368. Yd12_out -= (_Float16)p5;
  369. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  370. pSrc[2U * i1] = Xc12_out;
  371. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  372. pSrc[(2U * i1) + 1U] = Yc12_out;
  373. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  374. pSrc[2U * i2] = Xb12_out;
  375. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  376. pSrc[(2U * i2) + 1U] = Yb12_out;
  377. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  378. pSrc[2U * i3] = Xd12_out;
  379. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  380. pSrc[(2U * i3) + 1U] = Yd12_out;
  381. i0 += n1;
  382. } while (i0 < fftLen);
  383. j++;
  384. } while (j <= (n2 - 1U));
  385. twidCoefModifier <<= 2U;
  386. }
  387. j = fftLen >> 2;
  388. ptr1 = &pSrc[0];
  389. /* Calculations of last stage */
  390. do
  391. {
  392. xaIn = ptr1[0];
  393. yaIn = ptr1[1];
  394. xbIn = ptr1[2];
  395. ybIn = ptr1[3];
  396. xcIn = ptr1[4];
  397. ycIn = ptr1[5];
  398. xdIn = ptr1[6];
  399. ydIn = ptr1[7];
  400. /* xa + xc */
  401. Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
  402. /* xa - xc */
  403. Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
  404. /* ya + yc */
  405. Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
  406. /* ya - yc */
  407. Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
  408. /* xb + xd */
  409. Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
  410. /* yb + yd */
  411. Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
  412. /* (xb-xd) */
  413. Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
  414. /* (yb-yd) */
  415. Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
  416. /* xa' = xa + xb + xc + xd */
  417. a0 = ((_Float16)Xaplusc + (_Float16)Xbplusd);
  418. /* ya' = ya + yb + yc + yd */
  419. a1 = ((_Float16)Yaplusc + (_Float16)Ybplusd);
  420. /* xc' = (xa-xb+xc-xd) */
  421. a2 = ((_Float16)Xaplusc - (_Float16)Xbplusd);
  422. /* yc' = (ya-yb+yc-yd) */
  423. a3 = ((_Float16)Yaplusc - (_Float16)Ybplusd);
  424. /* xb' = (xa+yb-xc-yd) */
  425. a4 = ((_Float16)Xaminusc + (_Float16)Ybminusd);
  426. /* yb' = (ya-xb-yc+xd) */
  427. a5 = ((_Float16)Yaminusc - (_Float16)Xbminusd);
  428. /* xd' = (xa-yb-xc+yd)) */
  429. a6 = ((_Float16)Xaminusc - (_Float16)Ybminusd);
  430. /* yd' = (ya+xb-yc-xd) */
  431. a7 = ((_Float16)Xbminusd + (_Float16)Yaminusc);
  432. ptr1[0] = a0;
  433. ptr1[1] = a1;
  434. ptr1[2] = a2;
  435. ptr1[3] = a3;
  436. ptr1[4] = a4;
  437. ptr1[5] = a5;
  438. ptr1[6] = a6;
  439. ptr1[7] = a7;
  440. /* increment pointer by 8 */
  441. ptr1 += 8U;
  442. } while (--j);
  443. #else
  444. float16_t t1, t2, r1, r2, s1, s2;
  445. /* Run the below code for Cortex-M0 */
  446. /* Initializations for the fft calculation */
  447. n2 = fftLen;
  448. n1 = n2;
  449. for (k = fftLen; k > 1U; k >>= 2U)
  450. {
  451. /* Initializations for the fft calculation */
  452. n1 = n2;
  453. n2 >>= 2U;
  454. ia1 = 0U;
  455. /* FFT Calculation */
  456. j = 0;
  457. do
  458. {
  459. /* index calculation for the coefficients */
  460. ia2 = ia1 + ia1;
  461. ia3 = ia2 + ia1;
  462. co1 = pCoef[ia1 * 2U];
  463. si1 = pCoef[(ia1 * 2U) + 1U];
  464. co2 = pCoef[ia2 * 2U];
  465. si2 = pCoef[(ia2 * 2U) + 1U];
  466. co3 = pCoef[ia3 * 2U];
  467. si3 = pCoef[(ia3 * 2U) + 1U];
  468. /* Twiddle coefficients index modifier */
  469. ia1 = ia1 + twidCoefModifier;
  470. i0 = j;
  471. do
  472. {
  473. /* index calculation for the input as, */
  474. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  475. i1 = i0 + n2;
  476. i2 = i1 + n2;
  477. i3 = i2 + n2;
  478. /* xa + xc */
  479. r1 = (_Float16)pSrc[(2U * i0)] + (_Float16)pSrc[(2U * i2)];
  480. /* xa - xc */
  481. r2 = (_Float16)pSrc[(2U * i0)] - (_Float16)pSrc[(2U * i2)];
  482. /* ya + yc */
  483. s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
  484. /* ya - yc */
  485. s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
  486. /* xb + xd */
  487. t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
  488. /* xa' = xa + xb + xc + xd */
  489. pSrc[2U * i0] = (_Float16)r1 + (_Float16)t1;
  490. /* xa + xc -(xb + xd) */
  491. r1 = (_Float16)r1 - (_Float16)t1;
  492. /* yb + yd */
  493. t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
  494. /* ya' = ya + yb + yc + yd */
  495. pSrc[(2U * i0) + 1U] = (_Float16)s1 + (_Float16)t2;
  496. /* (ya + yc) - (yb + yd) */
  497. s1 = (_Float16)s1 - (_Float16)t2;
  498. /* (yb - yd) */
  499. t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
  500. /* (xb - xd) */
  501. t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
  502. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  503. pSrc[2U * i1] = ((_Float16)r1 * (_Float16)co2) + ((_Float16)s1 * (_Float16)si2);
  504. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  505. pSrc[(2U * i1) + 1U] = ((_Float16)s1 * (_Float16)co2) - ((_Float16)r1 * (_Float16)si2);
  506. /* (xa - xc) + (yb - yd) */
  507. r1 = (_Float16)r2 + (_Float16)t1;
  508. /* (xa - xc) - (yb - yd) */
  509. r2 = (_Float16)r2 - (_Float16)t1;
  510. /* (ya - yc) - (xb - xd) */
  511. s1 = (_Float16)s2 - (_Float16)t2;
  512. /* (ya - yc) + (xb - xd) */
  513. s2 = (_Float16)s2 + (_Float16)t2;
  514. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  515. pSrc[2U * i2] = ((_Float16)r1 * (_Float16)co1) + ((_Float16)s1 * (_Float16)si1);
  516. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  517. pSrc[(2U * i2) + 1U] = ((_Float16)s1 * (_Float16)co1) - ((_Float16)r1 * (_Float16)si1);
  518. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  519. pSrc[2U * i3] = ((_Float16)r2 * (_Float16)co3) + ((_Float16)s2 * (_Float16)si3);
  520. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  521. pSrc[(2U * i3) + 1U] = ((_Float16)s2 * (_Float16)co3) - ((_Float16)r2 * (_Float16)si3);
  522. i0 += n1;
  523. } while ( i0 < fftLen);
  524. j++;
  525. } while (j <= (n2 - 1U));
  526. twidCoefModifier <<= 2U;
  527. }
  528. #endif /* #if defined (ARM_MATH_DSP) */
  529. }
  530. /*
  531. * @brief Core function for the floating-point CIFFT butterfly process.
  532. * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
  533. * @param[in] fftLen length of the FFT.
  534. * @param[in] *pCoef points to twiddle coefficient buffer.
  535. * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  536. * @param[in] onebyfftLen value of 1/fftLen.
  537. */
  538. void arm_radix4_butterfly_inverse_f16(
  539. float16_t * pSrc,
  540. uint16_t fftLen,
  541. const float16_t * pCoef,
  542. uint16_t twidCoefModifier,
  543. float16_t onebyfftLen)
  544. {
  545. float16_t co1, co2, co3, si1, si2, si3;
  546. uint32_t ia1, ia2, ia3;
  547. uint32_t i0, i1, i2, i3;
  548. uint32_t n1, n2, j, k;
  549. #if defined (ARM_MATH_DSP)
  550. float16_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
  551. float16_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
  552. Ybminusd;
  553. float16_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
  554. float16_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
  555. float16_t *ptr1;
  556. float16_t p0,p1,p2,p3,p4,p5,p6,p7;
  557. float16_t a0,a1,a2,a3,a4,a5,a6,a7;
  558. /* Initializations for the first stage */
  559. n2 = fftLen;
  560. n1 = n2;
  561. /* n2 = fftLen/4 */
  562. n2 >>= 2U;
  563. i0 = 0U;
  564. ia1 = 0U;
  565. j = n2;
  566. /* Calculation of first stage */
  567. do
  568. {
  569. /* index calculation for the input as, */
  570. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  571. i1 = i0 + n2;
  572. i2 = i1 + n2;
  573. i3 = i2 + n2;
  574. /* Butterfly implementation */
  575. xaIn = pSrc[(2U * i0)];
  576. yaIn = pSrc[(2U * i0) + 1U];
  577. xcIn = pSrc[(2U * i2)];
  578. ycIn = pSrc[(2U * i2) + 1U];
  579. xbIn = pSrc[(2U * i1)];
  580. ybIn = pSrc[(2U * i1) + 1U];
  581. xdIn = pSrc[(2U * i3)];
  582. ydIn = pSrc[(2U * i3) + 1U];
  583. /* xa + xc */
  584. Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
  585. /* xb + xd */
  586. Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
  587. /* ya + yc */
  588. Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
  589. /* yb + yd */
  590. Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
  591. /* index calculation for the coefficients */
  592. ia2 = ia1 + ia1;
  593. co2 = pCoef[ia2 * 2U];
  594. si2 = pCoef[(ia2 * 2U) + 1U];
  595. /* xa - xc */
  596. Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
  597. /* xb - xd */
  598. Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
  599. /* ya - yc */
  600. Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
  601. /* yb - yd */
  602. Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
  603. /* xa' = xa + xb + xc + xd */
  604. pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
  605. /* ya' = ya + yb + yc + yd */
  606. pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
  607. /* (xa - xc) - (yb - yd) */
  608. Xb12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
  609. /* (ya - yc) + (xb - xd) */
  610. Yb12C_out = ((_Float16)Yaminusc + (_Float16)Xbminusd);
  611. /* (xa + xc) - (xb + xd) */
  612. Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
  613. /* (ya + yc) - (yb + yd) */
  614. Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
  615. /* (xa - xc) + (yb - yd) */
  616. Xd12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
  617. /* (ya - yc) - (xb - xd) */
  618. Yd12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
  619. co1 = pCoef[ia1 * 2U];
  620. si1 = pCoef[(ia1 * 2U) + 1U];
  621. /* index calculation for the coefficients */
  622. ia3 = ia2 + ia1;
  623. co3 = pCoef[ia3 * 2U];
  624. si3 = pCoef[(ia3 * 2U) + 1U];
  625. Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
  626. Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
  627. Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
  628. Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
  629. Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
  630. Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
  631. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  632. //Xb12_out -= Yb12C_out * si1;
  633. p0 = (_Float16)Yb12C_out * (_Float16)si1;
  634. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  635. //Yb12_out += Xb12C_out * si1;
  636. p1 = (_Float16)Xb12C_out * (_Float16)si1;
  637. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  638. //Xc12_out -= Yc12C_out * si2;
  639. p2 = (_Float16)Yc12C_out * (_Float16)si2;
  640. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  641. //Yc12_out += Xc12C_out * si2;
  642. p3 = (_Float16)Xc12C_out * (_Float16)si2;
  643. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  644. //Xd12_out -= Yd12C_out * si3;
  645. p4 = (_Float16)Yd12C_out * (_Float16)si3;
  646. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  647. //Yd12_out += Xd12C_out * si3;
  648. p5 =(_Float16) Xd12C_out * (_Float16)si3;
  649. Xb12_out -= (_Float16)p0;
  650. Yb12_out += (_Float16)p1;
  651. Xc12_out -= (_Float16)p2;
  652. Yc12_out += (_Float16)p3;
  653. Xd12_out -= (_Float16)p4;
  654. Yd12_out += (_Float16)p5;
  655. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  656. pSrc[2U * i1] = Xc12_out;
  657. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  658. pSrc[(2U * i1) + 1U] = Yc12_out;
  659. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  660. pSrc[2U * i2] = Xb12_out;
  661. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  662. pSrc[(2U * i2) + 1U] = Yb12_out;
  663. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  664. pSrc[2U * i3] = Xd12_out;
  665. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  666. pSrc[(2U * i3) + 1U] = Yd12_out;
  667. /* Twiddle coefficients index modifier */
  668. ia1 = ia1 + twidCoefModifier;
  669. /* Updating input index */
  670. i0 = i0 + 1U;
  671. } while (--j);
  672. twidCoefModifier <<= 2U;
  673. /* Calculation of second stage to excluding last stage */
  674. for (k = fftLen >> 2U; k > 4U; k >>= 2U)
  675. {
  676. /* Initializations for the first stage */
  677. n1 = n2;
  678. n2 >>= 2U;
  679. ia1 = 0U;
  680. /* Calculation of first stage */
  681. j = 0;
  682. do
  683. {
  684. /* index calculation for the coefficients */
  685. ia2 = ia1 + ia1;
  686. ia3 = ia2 + ia1;
  687. co1 = pCoef[ia1 * 2U];
  688. si1 = pCoef[(ia1 * 2U) + 1U];
  689. co2 = pCoef[ia2 * 2U];
  690. si2 = pCoef[(ia2 * 2U) + 1U];
  691. co3 = pCoef[ia3 * 2U];
  692. si3 = pCoef[(ia3 * 2U) + 1U];
  693. /* Twiddle coefficients index modifier */
  694. ia1 = ia1 + twidCoefModifier;
  695. i0 = j;
  696. do
  697. {
  698. /* index calculation for the input as, */
  699. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  700. i1 = i0 + n2;
  701. i2 = i1 + n2;
  702. i3 = i2 + n2;
  703. xaIn = pSrc[(2U * i0)];
  704. yaIn = pSrc[(2U * i0) + 1U];
  705. xbIn = pSrc[(2U * i1)];
  706. ybIn = pSrc[(2U * i1) + 1U];
  707. xcIn = pSrc[(2U * i2)];
  708. ycIn = pSrc[(2U * i2) + 1U];
  709. xdIn = pSrc[(2U * i3)];
  710. ydIn = pSrc[(2U * i3) + 1U];
  711. /* xa - xc */
  712. Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
  713. /* (xb - xd) */
  714. Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
  715. /* ya - yc */
  716. Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
  717. /* (yb - yd) */
  718. Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
  719. /* xa + xc */
  720. Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
  721. /* xb + xd */
  722. Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
  723. /* ya + yc */
  724. Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
  725. /* yb + yd */
  726. Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
  727. /* (xa - xc) - (yb - yd) */
  728. Xb12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
  729. /* (ya - yc) + (xb - xd) */
  730. Yb12C_out = ((_Float16)Yaminusc + (_Float16)Xbminusd);
  731. /* xa + xc -(xb + xd) */
  732. Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
  733. /* (ya + yc) - (yb + yd) */
  734. Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
  735. /* (xa - xc) + (yb - yd) */
  736. Xd12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
  737. /* (ya - yc) - (xb - xd) */
  738. Yd12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
  739. pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
  740. pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
  741. Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
  742. Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
  743. Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
  744. Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
  745. Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
  746. Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
  747. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  748. //Xb12_out -= Yb12C_out * si1;
  749. p0 = (_Float16)Yb12C_out * (_Float16)si1;
  750. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  751. //Yb12_out += Xb12C_out * si1;
  752. p1 = (_Float16)Xb12C_out * (_Float16)si1;
  753. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  754. //Xc12_out -= Yc12C_out * si2;
  755. p2 = (_Float16)Yc12C_out * (_Float16)si2;
  756. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  757. //Yc12_out += Xc12C_out * si2;
  758. p3 = (_Float16)Xc12C_out * (_Float16)si2;
  759. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  760. //Xd12_out -= Yd12C_out * si3;
  761. p4 = (_Float16)Yd12C_out * (_Float16)si3;
  762. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  763. //Yd12_out += Xd12C_out * si3;
  764. p5 = (_Float16)Xd12C_out * (_Float16)si3;
  765. Xb12_out -= (_Float16)p0;
  766. Yb12_out += (_Float16)p1;
  767. Xc12_out -= (_Float16)p2;
  768. Yc12_out += (_Float16)p3;
  769. Xd12_out -= (_Float16)p4;
  770. Yd12_out += (_Float16)p5;
  771. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  772. pSrc[2U * i1] = Xc12_out;
  773. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  774. pSrc[(2U * i1) + 1U] = Yc12_out;
  775. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  776. pSrc[2U * i2] = Xb12_out;
  777. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  778. pSrc[(2U * i2) + 1U] = Yb12_out;
  779. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  780. pSrc[2U * i3] = Xd12_out;
  781. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  782. pSrc[(2U * i3) + 1U] = Yd12_out;
  783. i0 += n1;
  784. } while (i0 < fftLen);
  785. j++;
  786. } while (j <= (n2 - 1U));
  787. twidCoefModifier <<= 2U;
  788. }
  789. /* Initializations of last stage */
  790. j = fftLen >> 2;
  791. ptr1 = &pSrc[0];
  792. /* Calculations of last stage */
  793. do
  794. {
  795. xaIn = ptr1[0];
  796. yaIn = ptr1[1];
  797. xbIn = ptr1[2];
  798. ybIn = ptr1[3];
  799. xcIn = ptr1[4];
  800. ycIn = ptr1[5];
  801. xdIn = ptr1[6];
  802. ydIn = ptr1[7];
  803. /* Butterfly implementation */
  804. /* xa + xc */
  805. Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
  806. /* xa - xc */
  807. Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
  808. /* ya + yc */
  809. Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
  810. /* ya - yc */
  811. Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
  812. /* xb + xd */
  813. Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
  814. /* yb + yd */
  815. Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
  816. /* (xb-xd) */
  817. Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
  818. /* (yb-yd) */
  819. Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
  820. /* xa' = (xa+xb+xc+xd) * onebyfftLen */
  821. a0 = ((_Float16)Xaplusc + (_Float16)Xbplusd);
  822. /* ya' = (ya+yb+yc+yd) * onebyfftLen */
  823. a1 = ((_Float16)Yaplusc + (_Float16)Ybplusd);
  824. /* xc' = (xa-xb+xc-xd) * onebyfftLen */
  825. a2 = ((_Float16)Xaplusc - (_Float16)Xbplusd);
  826. /* yc' = (ya-yb+yc-yd) * onebyfftLen */
  827. a3 = ((_Float16)Yaplusc - (_Float16)Ybplusd);
  828. /* xb' = (xa-yb-xc+yd) * onebyfftLen */
  829. a4 = ((_Float16)Xaminusc - (_Float16)Ybminusd);
  830. /* yb' = (ya+xb-yc-xd) * onebyfftLen */
  831. a5 = ((_Float16)Yaminusc + (_Float16)Xbminusd);
  832. /* xd' = (xa-yb-xc+yd) * onebyfftLen */
  833. a6 = ((_Float16)Xaminusc + (_Float16)Ybminusd);
  834. /* yd' = (ya-xb-yc+xd) * onebyfftLen */
  835. a7 = ((_Float16)Yaminusc - (_Float16)Xbminusd);
  836. p0 = (_Float16)a0 * (_Float16)onebyfftLen;
  837. p1 = (_Float16)a1 * (_Float16)onebyfftLen;
  838. p2 = (_Float16)a2 * (_Float16)onebyfftLen;
  839. p3 = (_Float16)a3 * (_Float16)onebyfftLen;
  840. p4 = (_Float16)a4 * (_Float16)onebyfftLen;
  841. p5 = (_Float16)a5 * (_Float16)onebyfftLen;
  842. p6 = (_Float16)a6 * (_Float16)onebyfftLen;
  843. p7 = (_Float16)a7 * (_Float16)onebyfftLen;
  844. /* xa' = (xa+xb+xc+xd) * onebyfftLen */
  845. ptr1[0] = p0;
  846. /* ya' = (ya+yb+yc+yd) * onebyfftLen */
  847. ptr1[1] = p1;
  848. /* xc' = (xa-xb+xc-xd) * onebyfftLen */
  849. ptr1[2] = p2;
  850. /* yc' = (ya-yb+yc-yd) * onebyfftLen */
  851. ptr1[3] = p3;
  852. /* xb' = (xa-yb-xc+yd) * onebyfftLen */
  853. ptr1[4] = p4;
  854. /* yb' = (ya+xb-yc-xd) * onebyfftLen */
  855. ptr1[5] = p5;
  856. /* xd' = (xa-yb-xc+yd) * onebyfftLen */
  857. ptr1[6] = p6;
  858. /* yd' = (ya-xb-yc+xd) * onebyfftLen */
  859. ptr1[7] = p7;
  860. /* increment source pointer by 8 for next calculations */
  861. ptr1 = ptr1 + 8U;
  862. } while (--j);
  863. #else
  864. float16_t t1, t2, r1, r2, s1, s2;
  865. /* Run the below code for Cortex-M0 */
  866. /* Initializations for the first stage */
  867. n2 = fftLen;
  868. n1 = n2;
  869. /* Calculation of first stage */
  870. for (k = fftLen; k > 4U; k >>= 2U)
  871. {
  872. /* Initializations for the first stage */
  873. n1 = n2;
  874. n2 >>= 2U;
  875. ia1 = 0U;
  876. /* Calculation of first stage */
  877. j = 0;
  878. do
  879. {
  880. /* index calculation for the coefficients */
  881. ia2 = ia1 + ia1;
  882. ia3 = ia2 + ia1;
  883. co1 = pCoef[ia1 * 2U];
  884. si1 = pCoef[(ia1 * 2U) + 1U];
  885. co2 = pCoef[ia2 * 2U];
  886. si2 = pCoef[(ia2 * 2U) + 1U];
  887. co3 = pCoef[ia3 * 2U];
  888. si3 = pCoef[(ia3 * 2U) + 1U];
  889. /* Twiddle coefficients index modifier */
  890. ia1 = ia1 + twidCoefModifier;
  891. i0 = j;
  892. do
  893. {
  894. /* index calculation for the input as, */
  895. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  896. i1 = i0 + n2;
  897. i2 = i1 + n2;
  898. i3 = i2 + n2;
  899. /* xa + xc */
  900. r1 = (_Float16)pSrc[(2U * i0)] + (_Float16)pSrc[(2U * i2)];
  901. /* xa - xc */
  902. r2 = (_Float16)pSrc[(2U * i0)] - (_Float16)pSrc[(2U * i2)];
  903. /* ya + yc */
  904. s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
  905. /* ya - yc */
  906. s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
  907. /* xb + xd */
  908. t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
  909. /* xa' = xa + xb + xc + xd */
  910. pSrc[2U * i0] = (_Float16)r1 + (_Float16)t1;
  911. /* xa + xc -(xb + xd) */
  912. r1 = (_Float16)r1 - (_Float16)t1;
  913. /* yb + yd */
  914. t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
  915. /* ya' = ya + yb + yc + yd */
  916. pSrc[(2U * i0) + 1U] = (_Float16)s1 + (_Float16)t2;
  917. /* (ya + yc) - (yb + yd) */
  918. s1 = (_Float16)s1 - (_Float16)t2;
  919. /* (yb - yd) */
  920. t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
  921. /* (xb - xd) */
  922. t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
  923. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  924. pSrc[2U * i1] = ((_Float16)r1 * (_Float16)co2) - ((_Float16)s1 * (_Float16)si2);
  925. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  926. pSrc[(2U * i1) + 1U] = ((_Float16)s1 * (_Float16)co2) + ((_Float16)r1 * (_Float16)si2);
  927. /* (xa - xc) - (yb - yd) */
  928. r1 = (_Float16)r2 - (_Float16)t1;
  929. /* (xa - xc) + (yb - yd) */
  930. r2 = (_Float16)r2 + (_Float16)t1;
  931. /* (ya - yc) + (xb - xd) */
  932. s1 = (_Float16)s2 + (_Float16)t2;
  933. /* (ya - yc) - (xb - xd) */
  934. s2 = (_Float16)s2 - (_Float16)t2;
  935. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  936. pSrc[2U * i2] = ((_Float16)r1 * (_Float16)co1) - ((_Float16)s1 * (_Float16)si1);
  937. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  938. pSrc[(2U * i2) + 1U] = ((_Float16)s1 * (_Float16)co1) + ((_Float16)r1 * (_Float16)si1);
  939. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  940. pSrc[2U * i3] = ((_Float16)r2 * (_Float16)co3) - ((_Float16)s2 * (_Float16)si3);
  941. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  942. pSrc[(2U * i3) + 1U] = ((_Float16)s2 * (_Float16)co3) + ((_Float16)r2 * (_Float16)si3);
  943. i0 += n1;
  944. } while ( i0 < fftLen);
  945. j++;
  946. } while (j <= (n2 - 1U));
  947. twidCoefModifier <<= 2U;
  948. }
  949. /* Initializations of last stage */
  950. n1 = n2;
  951. n2 >>= 2U;
  952. /* Calculations of last stage */
  953. for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
  954. {
  955. /* index calculation for the input as, */
  956. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  957. i1 = i0 + n2;
  958. i2 = i1 + n2;
  959. i3 = i2 + n2;
  960. /* Butterfly implementation */
  961. /* xa + xc */
  962. r1 = (_Float16)pSrc[2U * i0] + (_Float16)pSrc[2U * i2];
  963. /* xa - xc */
  964. r2 = (_Float16)pSrc[2U * i0] - (_Float16)pSrc[2U * i2];
  965. /* ya + yc */
  966. s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
  967. /* ya - yc */
  968. s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
  969. /* xc + xd */
  970. t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
  971. /* xa' = xa + xb + xc + xd */
  972. pSrc[2U * i0] = ((_Float16)r1 + (_Float16)t1) * (_Float16)onebyfftLen;
  973. /* (xa + xb) - (xc + xd) */
  974. r1 = (_Float16)r1 - (_Float16)t1;
  975. /* yb + yd */
  976. t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
  977. /* ya' = ya + yb + yc + yd */
  978. pSrc[(2U * i0) + 1U] = ((_Float16)s1 + (_Float16)t2) * (_Float16)onebyfftLen;
  979. /* (ya + yc) - (yb + yd) */
  980. s1 = (_Float16)s1 - (_Float16)t2;
  981. /* (yb-yd) */
  982. t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
  983. /* (xb-xd) */
  984. t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
  985. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  986. pSrc[2U * i1] = (_Float16)r1 * (_Float16)onebyfftLen;
  987. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  988. pSrc[(2U * i1) + 1U] = (_Float16)s1 * (_Float16)onebyfftLen;
  989. /* (xa - xc) - (yb-yd) */
  990. r1 = (_Float16)r2 - (_Float16)t1;
  991. /* (xa - xc) + (yb-yd) */
  992. r2 = (_Float16)r2 + (_Float16)t1;
  993. /* (ya - yc) + (xb-xd) */
  994. s1 = (_Float16)s2 + (_Float16)t2;
  995. /* (ya - yc) - (xb-xd) */
  996. s2 = (_Float16)s2 - (_Float16)t2;
  997. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  998. pSrc[2U * i2] = (_Float16)r1 * (_Float16)onebyfftLen;
  999. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  1000. pSrc[(2U * i2) + 1U] = (_Float16)s1 * (_Float16)onebyfftLen;
  1001. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  1002. pSrc[2U * i3] = (_Float16)r2 * (_Float16)onebyfftLen;
  1003. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  1004. pSrc[(2U * i3) + 1U] = (_Float16)s2 * (_Float16)onebyfftLen;
  1005. }
  1006. #endif /* #if defined (ARM_MATH_DSP) */
  1007. }
  1008. #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */