arm_cfft_radix4_f32.c 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_cfft_radix4_f32.c
  4. * Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
  5. *
  6. * $Date: 23 April 2021
  7. * $Revision: V1.9.0
  8. *
  9. * Target Processor: Cortex-M and Cortex-A cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/transform_functions.h"
  29. extern void arm_bitreversal_f32(
  30. float32_t * pSrc,
  31. uint16_t fftSize,
  32. uint16_t bitRevFactor,
  33. const uint16_t * pBitRevTab);
  34. void arm_radix4_butterfly_f32(
  35. float32_t * pSrc,
  36. uint16_t fftLen,
  37. const float32_t * pCoef,
  38. uint16_t twidCoefModifier);
  39. void arm_radix4_butterfly_inverse_f32(
  40. float32_t * pSrc,
  41. uint16_t fftLen,
  42. const float32_t * pCoef,
  43. uint16_t twidCoefModifier,
  44. float32_t onebyfftLen);
  45. /**
  46. @addtogroup ComplexFFTDeprecated
  47. @{
  48. */
  49. /**
  50. @brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
  51. @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
  52. @param[in] S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
  53. @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
  54. */
  55. void arm_cfft_radix4_f32(
  56. const arm_cfft_radix4_instance_f32 * S,
  57. float32_t * pSrc)
  58. {
  59. if (S->ifftFlag == 1U)
  60. {
  61. /* Complex IFFT radix-4 */
  62. arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
  63. }
  64. else
  65. {
  66. /* Complex FFT radix-4 */
  67. arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
  68. }
  69. if (S->bitReverseFlag == 1U)
  70. {
  71. /* Bit Reversal */
  72. arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
  73. }
  74. }
  75. /**
  76. @} end of ComplexFFTDeprecated group
  77. */
  78. /* ----------------------------------------------------------------------
  79. * Internal helper function used by the FFTs
  80. * ---------------------------------------------------------------------- */
  81. /**
  82. brief Core function for the floating-point CFFT butterfly process.
  83. param[in,out] pSrc points to the in-place buffer of floating-point data type
  84. param[in] fftLen length of the FFT
  85. param[in] pCoef points to the twiddle coefficient buffer
  86. param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
  87. return none
  88. */
  89. void arm_radix4_butterfly_f32(
  90. float32_t * pSrc,
  91. uint16_t fftLen,
  92. const float32_t * pCoef,
  93. uint16_t twidCoefModifier)
  94. {
  95. float32_t co1, co2, co3, si1, si2, si3;
  96. uint32_t ia1, ia2, ia3;
  97. uint32_t i0, i1, i2, i3;
  98. uint32_t n1, n2, j, k;
  99. #if defined (ARM_MATH_LOOPUNROLL)
  100. float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
  101. float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
  102. Ybminusd;
  103. float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
  104. float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
  105. float32_t *ptr1;
  106. float32_t p0,p1,p2,p3,p4,p5;
  107. float32_t a0,a1,a2,a3,a4,a5,a6,a7;
  108. /* Initializations for the first stage */
  109. n2 = fftLen;
  110. n1 = n2;
  111. /* n2 = fftLen/4 */
  112. n2 >>= 2U;
  113. i0 = 0U;
  114. ia1 = 0U;
  115. j = n2;
  116. /* Calculation of first stage */
  117. do
  118. {
  119. /* index calculation for the input as, */
  120. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  121. i1 = i0 + n2;
  122. i2 = i1 + n2;
  123. i3 = i2 + n2;
  124. xaIn = pSrc[(2U * i0)];
  125. yaIn = pSrc[(2U * i0) + 1U];
  126. xbIn = pSrc[(2U * i1)];
  127. ybIn = pSrc[(2U * i1) + 1U];
  128. xcIn = pSrc[(2U * i2)];
  129. ycIn = pSrc[(2U * i2) + 1U];
  130. xdIn = pSrc[(2U * i3)];
  131. ydIn = pSrc[(2U * i3) + 1U];
  132. /* xa + xc */
  133. Xaplusc = xaIn + xcIn;
  134. /* xb + xd */
  135. Xbplusd = xbIn + xdIn;
  136. /* ya + yc */
  137. Yaplusc = yaIn + ycIn;
  138. /* yb + yd */
  139. Ybplusd = ybIn + ydIn;
  140. /* index calculation for the coefficients */
  141. ia2 = ia1 + ia1;
  142. co2 = pCoef[ia2 * 2U];
  143. si2 = pCoef[(ia2 * 2U) + 1U];
  144. /* xa - xc */
  145. Xaminusc = xaIn - xcIn;
  146. /* xb - xd */
  147. Xbminusd = xbIn - xdIn;
  148. /* ya - yc */
  149. Yaminusc = yaIn - ycIn;
  150. /* yb - yd */
  151. Ybminusd = ybIn - ydIn;
  152. /* xa' = xa + xb + xc + xd */
  153. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  154. /* ya' = ya + yb + yc + yd */
  155. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  156. /* (xa - xc) + (yb - yd) */
  157. Xb12C_out = (Xaminusc + Ybminusd);
  158. /* (ya - yc) + (xb - xd) */
  159. Yb12C_out = (Yaminusc - Xbminusd);
  160. /* (xa + xc) - (xb + xd) */
  161. Xc12C_out = (Xaplusc - Xbplusd);
  162. /* (ya + yc) - (yb + yd) */
  163. Yc12C_out = (Yaplusc - Ybplusd);
  164. /* (xa - xc) - (yb - yd) */
  165. Xd12C_out = (Xaminusc - Ybminusd);
  166. /* (ya - yc) + (xb - xd) */
  167. Yd12C_out = (Xbminusd + Yaminusc);
  168. co1 = pCoef[ia1 * 2U];
  169. si1 = pCoef[(ia1 * 2U) + 1U];
  170. /* index calculation for the coefficients */
  171. ia3 = ia2 + ia1;
  172. co3 = pCoef[ia3 * 2U];
  173. si3 = pCoef[(ia3 * 2U) + 1U];
  174. Xb12_out = Xb12C_out * co1;
  175. Yb12_out = Yb12C_out * co1;
  176. Xc12_out = Xc12C_out * co2;
  177. Yc12_out = Yc12C_out * co2;
  178. Xd12_out = Xd12C_out * co3;
  179. Yd12_out = Yd12C_out * co3;
  180. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  181. //Xb12_out -= Yb12C_out * si1;
  182. p0 = Yb12C_out * si1;
  183. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  184. //Yb12_out += Xb12C_out * si1;
  185. p1 = Xb12C_out * si1;
  186. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  187. //Xc12_out -= Yc12C_out * si2;
  188. p2 = Yc12C_out * si2;
  189. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  190. //Yc12_out += Xc12C_out * si2;
  191. p3 = Xc12C_out * si2;
  192. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  193. //Xd12_out -= Yd12C_out * si3;
  194. p4 = Yd12C_out * si3;
  195. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  196. //Yd12_out += Xd12C_out * si3;
  197. p5 = Xd12C_out * si3;
  198. Xb12_out += p0;
  199. Yb12_out -= p1;
  200. Xc12_out += p2;
  201. Yc12_out -= p3;
  202. Xd12_out += p4;
  203. Yd12_out -= p5;
  204. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  205. pSrc[2U * i1] = Xc12_out;
  206. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  207. pSrc[(2U * i1) + 1U] = Yc12_out;
  208. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  209. pSrc[2U * i2] = Xb12_out;
  210. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  211. pSrc[(2U * i2) + 1U] = Yb12_out;
  212. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  213. pSrc[2U * i3] = Xd12_out;
  214. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  215. pSrc[(2U * i3) + 1U] = Yd12_out;
  216. /* Twiddle coefficients index modifier */
  217. ia1 += twidCoefModifier;
  218. /* Updating input index */
  219. i0++;
  220. }
  221. while (--j);
  222. twidCoefModifier <<= 2U;
  223. /* Calculation of second stage to excluding last stage */
  224. for (k = fftLen >> 2U; k > 4U; k >>= 2U)
  225. {
  226. /* Initializations for the first stage */
  227. n1 = n2;
  228. n2 >>= 2U;
  229. ia1 = 0U;
  230. /* Calculation of first stage */
  231. j = 0;
  232. do
  233. {
  234. /* index calculation for the coefficients */
  235. ia2 = ia1 + ia1;
  236. ia3 = ia2 + ia1;
  237. co1 = pCoef[(ia1 * 2U)];
  238. si1 = pCoef[(ia1 * 2U) + 1U];
  239. co2 = pCoef[(ia2 * 2U)];
  240. si2 = pCoef[(ia2 * 2U) + 1U];
  241. co3 = pCoef[(ia3 * 2U)];
  242. si3 = pCoef[(ia3 * 2U) + 1U];
  243. /* Twiddle coefficients index modifier */
  244. ia1 += twidCoefModifier;
  245. i0 = j;
  246. do
  247. {
  248. /* index calculation for the input as, */
  249. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  250. i1 = i0 + n2;
  251. i2 = i1 + n2;
  252. i3 = i2 + n2;
  253. xaIn = pSrc[(2U * i0)];
  254. yaIn = pSrc[(2U * i0) + 1U];
  255. xbIn = pSrc[(2U * i1)];
  256. ybIn = pSrc[(2U * i1) + 1U];
  257. xcIn = pSrc[(2U * i2)];
  258. ycIn = pSrc[(2U * i2) + 1U];
  259. xdIn = pSrc[(2U * i3)];
  260. ydIn = pSrc[(2U * i3) + 1U];
  261. /* xa - xc */
  262. Xaminusc = xaIn - xcIn;
  263. /* (xb - xd) */
  264. Xbminusd = xbIn - xdIn;
  265. /* ya - yc */
  266. Yaminusc = yaIn - ycIn;
  267. /* (yb - yd) */
  268. Ybminusd = ybIn - ydIn;
  269. /* xa + xc */
  270. Xaplusc = xaIn + xcIn;
  271. /* xb + xd */
  272. Xbplusd = xbIn + xdIn;
  273. /* ya + yc */
  274. Yaplusc = yaIn + ycIn;
  275. /* yb + yd */
  276. Ybplusd = ybIn + ydIn;
  277. /* (xa - xc) + (yb - yd) */
  278. Xb12C_out = (Xaminusc + Ybminusd);
  279. /* (ya - yc) - (xb - xd) */
  280. Yb12C_out = (Yaminusc - Xbminusd);
  281. /* xa + xc -(xb + xd) */
  282. Xc12C_out = (Xaplusc - Xbplusd);
  283. /* (ya + yc) - (yb + yd) */
  284. Yc12C_out = (Yaplusc - Ybplusd);
  285. /* (xa - xc) - (yb - yd) */
  286. Xd12C_out = (Xaminusc - Ybminusd);
  287. /* (ya - yc) + (xb - xd) */
  288. Yd12C_out = (Xbminusd + Yaminusc);
  289. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  290. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  291. Xb12_out = Xb12C_out * co1;
  292. Yb12_out = Yb12C_out * co1;
  293. Xc12_out = Xc12C_out * co2;
  294. Yc12_out = Yc12C_out * co2;
  295. Xd12_out = Xd12C_out * co3;
  296. Yd12_out = Yd12C_out * co3;
  297. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  298. //Xb12_out -= Yb12C_out * si1;
  299. p0 = Yb12C_out * si1;
  300. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  301. //Yb12_out += Xb12C_out * si1;
  302. p1 = Xb12C_out * si1;
  303. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  304. //Xc12_out -= Yc12C_out * si2;
  305. p2 = Yc12C_out * si2;
  306. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  307. //Yc12_out += Xc12C_out * si2;
  308. p3 = Xc12C_out * si2;
  309. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  310. //Xd12_out -= Yd12C_out * si3;
  311. p4 = Yd12C_out * si3;
  312. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  313. //Yd12_out += Xd12C_out * si3;
  314. p5 = Xd12C_out * si3;
  315. Xb12_out += p0;
  316. Yb12_out -= p1;
  317. Xc12_out += p2;
  318. Yc12_out -= p3;
  319. Xd12_out += p4;
  320. Yd12_out -= p5;
  321. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  322. pSrc[2U * i1] = Xc12_out;
  323. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  324. pSrc[(2U * i1) + 1U] = Yc12_out;
  325. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  326. pSrc[2U * i2] = Xb12_out;
  327. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  328. pSrc[(2U * i2) + 1U] = Yb12_out;
  329. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  330. pSrc[2U * i3] = Xd12_out;
  331. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  332. pSrc[(2U * i3) + 1U] = Yd12_out;
  333. i0 += n1;
  334. } while (i0 < fftLen);
  335. j++;
  336. } while (j <= (n2 - 1U));
  337. twidCoefModifier <<= 2U;
  338. }
  339. j = fftLen >> 2;
  340. ptr1 = &pSrc[0];
  341. /* Calculations of last stage */
  342. do
  343. {
  344. xaIn = ptr1[0];
  345. yaIn = ptr1[1];
  346. xbIn = ptr1[2];
  347. ybIn = ptr1[3];
  348. xcIn = ptr1[4];
  349. ycIn = ptr1[5];
  350. xdIn = ptr1[6];
  351. ydIn = ptr1[7];
  352. /* xa + xc */
  353. Xaplusc = xaIn + xcIn;
  354. /* xa - xc */
  355. Xaminusc = xaIn - xcIn;
  356. /* ya + yc */
  357. Yaplusc = yaIn + ycIn;
  358. /* ya - yc */
  359. Yaminusc = yaIn - ycIn;
  360. /* xb + xd */
  361. Xbplusd = xbIn + xdIn;
  362. /* yb + yd */
  363. Ybplusd = ybIn + ydIn;
  364. /* (xb-xd) */
  365. Xbminusd = xbIn - xdIn;
  366. /* (yb-yd) */
  367. Ybminusd = ybIn - ydIn;
  368. /* xa' = xa + xb + xc + xd */
  369. a0 = (Xaplusc + Xbplusd);
  370. /* ya' = ya + yb + yc + yd */
  371. a1 = (Yaplusc + Ybplusd);
  372. /* xc' = (xa-xb+xc-xd) */
  373. a2 = (Xaplusc - Xbplusd);
  374. /* yc' = (ya-yb+yc-yd) */
  375. a3 = (Yaplusc - Ybplusd);
  376. /* xb' = (xa+yb-xc-yd) */
  377. a4 = (Xaminusc + Ybminusd);
  378. /* yb' = (ya-xb-yc+xd) */
  379. a5 = (Yaminusc - Xbminusd);
  380. /* xd' = (xa-yb-xc+yd)) */
  381. a6 = (Xaminusc - Ybminusd);
  382. /* yd' = (ya+xb-yc-xd) */
  383. a7 = (Xbminusd + Yaminusc);
  384. ptr1[0] = a0;
  385. ptr1[1] = a1;
  386. ptr1[2] = a2;
  387. ptr1[3] = a3;
  388. ptr1[4] = a4;
  389. ptr1[5] = a5;
  390. ptr1[6] = a6;
  391. ptr1[7] = a7;
  392. /* increment pointer by 8 */
  393. ptr1 += 8U;
  394. } while (--j);
  395. #else
  396. float32_t t1, t2, r1, r2, s1, s2;
  397. /* Initializations for the fft calculation */
  398. n2 = fftLen;
  399. n1 = n2;
  400. for (k = fftLen; k > 1U; k >>= 2U)
  401. {
  402. /* Initializations for the fft calculation */
  403. n1 = n2;
  404. n2 >>= 2U;
  405. ia1 = 0U;
  406. /* FFT Calculation */
  407. j = 0;
  408. do
  409. {
  410. /* index calculation for the coefficients */
  411. ia2 = ia1 + ia1;
  412. ia3 = ia2 + ia1;
  413. co1 = pCoef[ia1 * 2U];
  414. si1 = pCoef[(ia1 * 2U) + 1U];
  415. co2 = pCoef[ia2 * 2U];
  416. si2 = pCoef[(ia2 * 2U) + 1U];
  417. co3 = pCoef[ia3 * 2U];
  418. si3 = pCoef[(ia3 * 2U) + 1U];
  419. /* Twiddle coefficients index modifier */
  420. ia1 = ia1 + twidCoefModifier;
  421. i0 = j;
  422. do
  423. {
  424. /* index calculation for the input as, */
  425. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  426. i1 = i0 + n2;
  427. i2 = i1 + n2;
  428. i3 = i2 + n2;
  429. /* xa + xc */
  430. r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
  431. /* xa - xc */
  432. r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
  433. /* ya + yc */
  434. s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
  435. /* ya - yc */
  436. s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
  437. /* xb + xd */
  438. t1 = pSrc[2U * i1] + pSrc[2U * i3];
  439. /* xa' = xa + xb + xc + xd */
  440. pSrc[2U * i0] = r1 + t1;
  441. /* xa + xc -(xb + xd) */
  442. r1 = r1 - t1;
  443. /* yb + yd */
  444. t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
  445. /* ya' = ya + yb + yc + yd */
  446. pSrc[(2U * i0) + 1U] = s1 + t2;
  447. /* (ya + yc) - (yb + yd) */
  448. s1 = s1 - t2;
  449. /* (yb - yd) */
  450. t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
  451. /* (xb - xd) */
  452. t2 = pSrc[2U * i1] - pSrc[2U * i3];
  453. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  454. pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
  455. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  456. pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
  457. /* (xa - xc) + (yb - yd) */
  458. r1 = r2 + t1;
  459. /* (xa - xc) - (yb - yd) */
  460. r2 = r2 - t1;
  461. /* (ya - yc) - (xb - xd) */
  462. s1 = s2 - t2;
  463. /* (ya - yc) + (xb - xd) */
  464. s2 = s2 + t2;
  465. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  466. pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
  467. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  468. pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
  469. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  470. pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
  471. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  472. pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
  473. i0 += n1;
  474. } while ( i0 < fftLen);
  475. j++;
  476. } while (j <= (n2 - 1U));
  477. twidCoefModifier <<= 2U;
  478. }
  479. #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  480. }
  481. /**
  482. brief Core function for the floating-point CIFFT butterfly process.
  483. param[in,out] pSrc points to the in-place buffer of floating-point data type
  484. param[in] fftLen length of the FFT
  485. param[in] pCoef points to twiddle coefficient buffer
  486. param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  487. param[in] onebyfftLen value of 1/fftLen
  488. return none
  489. */
  490. void arm_radix4_butterfly_inverse_f32(
  491. float32_t * pSrc,
  492. uint16_t fftLen,
  493. const float32_t * pCoef,
  494. uint16_t twidCoefModifier,
  495. float32_t onebyfftLen)
  496. {
  497. float32_t co1, co2, co3, si1, si2, si3;
  498. uint32_t ia1, ia2, ia3;
  499. uint32_t i0, i1, i2, i3;
  500. uint32_t n1, n2, j, k;
  501. #if defined (ARM_MATH_LOOPUNROLL)
  502. float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
  503. float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
  504. Ybminusd;
  505. float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
  506. float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
  507. float32_t *ptr1;
  508. float32_t p0,p1,p2,p3,p4,p5,p6,p7;
  509. float32_t a0,a1,a2,a3,a4,a5,a6,a7;
  510. /* Initializations for the first stage */
  511. n2 = fftLen;
  512. n1 = n2;
  513. /* n2 = fftLen/4 */
  514. n2 >>= 2U;
  515. i0 = 0U;
  516. ia1 = 0U;
  517. j = n2;
  518. /* Calculation of first stage */
  519. do
  520. {
  521. /* index calculation for the input as, */
  522. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  523. i1 = i0 + n2;
  524. i2 = i1 + n2;
  525. i3 = i2 + n2;
  526. /* Butterfly implementation */
  527. xaIn = pSrc[(2U * i0)];
  528. yaIn = pSrc[(2U * i0) + 1U];
  529. xcIn = pSrc[(2U * i2)];
  530. ycIn = pSrc[(2U * i2) + 1U];
  531. xbIn = pSrc[(2U * i1)];
  532. ybIn = pSrc[(2U * i1) + 1U];
  533. xdIn = pSrc[(2U * i3)];
  534. ydIn = pSrc[(2U * i3) + 1U];
  535. /* xa + xc */
  536. Xaplusc = xaIn + xcIn;
  537. /* xb + xd */
  538. Xbplusd = xbIn + xdIn;
  539. /* ya + yc */
  540. Yaplusc = yaIn + ycIn;
  541. /* yb + yd */
  542. Ybplusd = ybIn + ydIn;
  543. /* index calculation for the coefficients */
  544. ia2 = ia1 + ia1;
  545. co2 = pCoef[ia2 * 2U];
  546. si2 = pCoef[(ia2 * 2U) + 1U];
  547. /* xa - xc */
  548. Xaminusc = xaIn - xcIn;
  549. /* xb - xd */
  550. Xbminusd = xbIn - xdIn;
  551. /* ya - yc */
  552. Yaminusc = yaIn - ycIn;
  553. /* yb - yd */
  554. Ybminusd = ybIn - ydIn;
  555. /* xa' = xa + xb + xc + xd */
  556. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  557. /* ya' = ya + yb + yc + yd */
  558. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  559. /* (xa - xc) - (yb - yd) */
  560. Xb12C_out = (Xaminusc - Ybminusd);
  561. /* (ya - yc) + (xb - xd) */
  562. Yb12C_out = (Yaminusc + Xbminusd);
  563. /* (xa + xc) - (xb + xd) */
  564. Xc12C_out = (Xaplusc - Xbplusd);
  565. /* (ya + yc) - (yb + yd) */
  566. Yc12C_out = (Yaplusc - Ybplusd);
  567. /* (xa - xc) + (yb - yd) */
  568. Xd12C_out = (Xaminusc + Ybminusd);
  569. /* (ya - yc) - (xb - xd) */
  570. Yd12C_out = (Yaminusc - Xbminusd);
  571. co1 = pCoef[ia1 * 2U];
  572. si1 = pCoef[(ia1 * 2U) + 1U];
  573. /* index calculation for the coefficients */
  574. ia3 = ia2 + ia1;
  575. co3 = pCoef[ia3 * 2U];
  576. si3 = pCoef[(ia3 * 2U) + 1U];
  577. Xb12_out = Xb12C_out * co1;
  578. Yb12_out = Yb12C_out * co1;
  579. Xc12_out = Xc12C_out * co2;
  580. Yc12_out = Yc12C_out * co2;
  581. Xd12_out = Xd12C_out * co3;
  582. Yd12_out = Yd12C_out * co3;
  583. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  584. //Xb12_out -= Yb12C_out * si1;
  585. p0 = Yb12C_out * si1;
  586. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  587. //Yb12_out += Xb12C_out * si1;
  588. p1 = Xb12C_out * si1;
  589. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  590. //Xc12_out -= Yc12C_out * si2;
  591. p2 = Yc12C_out * si2;
  592. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  593. //Yc12_out += Xc12C_out * si2;
  594. p3 = Xc12C_out * si2;
  595. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  596. //Xd12_out -= Yd12C_out * si3;
  597. p4 = Yd12C_out * si3;
  598. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  599. //Yd12_out += Xd12C_out * si3;
  600. p5 = Xd12C_out * si3;
  601. Xb12_out -= p0;
  602. Yb12_out += p1;
  603. Xc12_out -= p2;
  604. Yc12_out += p3;
  605. Xd12_out -= p4;
  606. Yd12_out += p5;
  607. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  608. pSrc[2U * i1] = Xc12_out;
  609. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  610. pSrc[(2U * i1) + 1U] = Yc12_out;
  611. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  612. pSrc[2U * i2] = Xb12_out;
  613. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  614. pSrc[(2U * i2) + 1U] = Yb12_out;
  615. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  616. pSrc[2U * i3] = Xd12_out;
  617. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  618. pSrc[(2U * i3) + 1U] = Yd12_out;
  619. /* Twiddle coefficients index modifier */
  620. ia1 = ia1 + twidCoefModifier;
  621. /* Updating input index */
  622. i0 = i0 + 1U;
  623. } while (--j);
  624. twidCoefModifier <<= 2U;
  625. /* Calculation of second stage to excluding last stage */
  626. for (k = fftLen >> 2U; k > 4U; k >>= 2U)
  627. {
  628. /* Initializations for the first stage */
  629. n1 = n2;
  630. n2 >>= 2U;
  631. ia1 = 0U;
  632. /* Calculation of first stage */
  633. j = 0;
  634. do
  635. {
  636. /* index calculation for the coefficients */
  637. ia2 = ia1 + ia1;
  638. ia3 = ia2 + ia1;
  639. co1 = pCoef[ia1 * 2U];
  640. si1 = pCoef[(ia1 * 2U) + 1U];
  641. co2 = pCoef[ia2 * 2U];
  642. si2 = pCoef[(ia2 * 2U) + 1U];
  643. co3 = pCoef[ia3 * 2U];
  644. si3 = pCoef[(ia3 * 2U) + 1U];
  645. /* Twiddle coefficients index modifier */
  646. ia1 = ia1 + twidCoefModifier;
  647. i0 = j;
  648. do
  649. {
  650. /* index calculation for the input as, */
  651. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  652. i1 = i0 + n2;
  653. i2 = i1 + n2;
  654. i3 = i2 + n2;
  655. xaIn = pSrc[(2U * i0)];
  656. yaIn = pSrc[(2U * i0) + 1U];
  657. xbIn = pSrc[(2U * i1)];
  658. ybIn = pSrc[(2U * i1) + 1U];
  659. xcIn = pSrc[(2U * i2)];
  660. ycIn = pSrc[(2U * i2) + 1U];
  661. xdIn = pSrc[(2U * i3)];
  662. ydIn = pSrc[(2U * i3) + 1U];
  663. /* xa - xc */
  664. Xaminusc = xaIn - xcIn;
  665. /* (xb - xd) */
  666. Xbminusd = xbIn - xdIn;
  667. /* ya - yc */
  668. Yaminusc = yaIn - ycIn;
  669. /* (yb - yd) */
  670. Ybminusd = ybIn - ydIn;
  671. /* xa + xc */
  672. Xaplusc = xaIn + xcIn;
  673. /* xb + xd */
  674. Xbplusd = xbIn + xdIn;
  675. /* ya + yc */
  676. Yaplusc = yaIn + ycIn;
  677. /* yb + yd */
  678. Ybplusd = ybIn + ydIn;
  679. /* (xa - xc) - (yb - yd) */
  680. Xb12C_out = (Xaminusc - Ybminusd);
  681. /* (ya - yc) + (xb - xd) */
  682. Yb12C_out = (Yaminusc + Xbminusd);
  683. /* xa + xc -(xb + xd) */
  684. Xc12C_out = (Xaplusc - Xbplusd);
  685. /* (ya + yc) - (yb + yd) */
  686. Yc12C_out = (Yaplusc - Ybplusd);
  687. /* (xa - xc) + (yb - yd) */
  688. Xd12C_out = (Xaminusc + Ybminusd);
  689. /* (ya - yc) - (xb - xd) */
  690. Yd12C_out = (Yaminusc - Xbminusd);
  691. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  692. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  693. Xb12_out = Xb12C_out * co1;
  694. Yb12_out = Yb12C_out * co1;
  695. Xc12_out = Xc12C_out * co2;
  696. Yc12_out = Yc12C_out * co2;
  697. Xd12_out = Xd12C_out * co3;
  698. Yd12_out = Yd12C_out * co3;
  699. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  700. //Xb12_out -= Yb12C_out * si1;
  701. p0 = Yb12C_out * si1;
  702. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  703. //Yb12_out += Xb12C_out * si1;
  704. p1 = Xb12C_out * si1;
  705. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  706. //Xc12_out -= Yc12C_out * si2;
  707. p2 = Yc12C_out * si2;
  708. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  709. //Yc12_out += Xc12C_out * si2;
  710. p3 = Xc12C_out * si2;
  711. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  712. //Xd12_out -= Yd12C_out * si3;
  713. p4 = Yd12C_out * si3;
  714. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  715. //Yd12_out += Xd12C_out * si3;
  716. p5 = Xd12C_out * si3;
  717. Xb12_out -= p0;
  718. Yb12_out += p1;
  719. Xc12_out -= p2;
  720. Yc12_out += p3;
  721. Xd12_out -= p4;
  722. Yd12_out += p5;
  723. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  724. pSrc[2U * i1] = Xc12_out;
  725. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  726. pSrc[(2U * i1) + 1U] = Yc12_out;
  727. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  728. pSrc[2U * i2] = Xb12_out;
  729. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  730. pSrc[(2U * i2) + 1U] = Yb12_out;
  731. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  732. pSrc[2U * i3] = Xd12_out;
  733. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  734. pSrc[(2U * i3) + 1U] = Yd12_out;
  735. i0 += n1;
  736. } while (i0 < fftLen);
  737. j++;
  738. } while (j <= (n2 - 1U));
  739. twidCoefModifier <<= 2U;
  740. }
  741. /* Initializations of last stage */
  742. j = fftLen >> 2;
  743. ptr1 = &pSrc[0];
  744. /* Calculations of last stage */
  745. do
  746. {
  747. xaIn = ptr1[0];
  748. yaIn = ptr1[1];
  749. xbIn = ptr1[2];
  750. ybIn = ptr1[3];
  751. xcIn = ptr1[4];
  752. ycIn = ptr1[5];
  753. xdIn = ptr1[6];
  754. ydIn = ptr1[7];
  755. /* Butterfly implementation */
  756. /* xa + xc */
  757. Xaplusc = xaIn + xcIn;
  758. /* xa - xc */
  759. Xaminusc = xaIn - xcIn;
  760. /* ya + yc */
  761. Yaplusc = yaIn + ycIn;
  762. /* ya - yc */
  763. Yaminusc = yaIn - ycIn;
  764. /* xb + xd */
  765. Xbplusd = xbIn + xdIn;
  766. /* yb + yd */
  767. Ybplusd = ybIn + ydIn;
  768. /* (xb-xd) */
  769. Xbminusd = xbIn - xdIn;
  770. /* (yb-yd) */
  771. Ybminusd = ybIn - ydIn;
  772. /* xa' = (xa+xb+xc+xd) * onebyfftLen */
  773. a0 = (Xaplusc + Xbplusd);
  774. /* ya' = (ya+yb+yc+yd) * onebyfftLen */
  775. a1 = (Yaplusc + Ybplusd);
  776. /* xc' = (xa-xb+xc-xd) * onebyfftLen */
  777. a2 = (Xaplusc - Xbplusd);
  778. /* yc' = (ya-yb+yc-yd) * onebyfftLen */
  779. a3 = (Yaplusc - Ybplusd);
  780. /* xb' = (xa-yb-xc+yd) * onebyfftLen */
  781. a4 = (Xaminusc - Ybminusd);
  782. /* yb' = (ya+xb-yc-xd) * onebyfftLen */
  783. a5 = (Yaminusc + Xbminusd);
  784. /* xd' = (xa-yb-xc+yd) * onebyfftLen */
  785. a6 = (Xaminusc + Ybminusd);
  786. /* yd' = (ya-xb-yc+xd) * onebyfftLen */
  787. a7 = (Yaminusc - Xbminusd);
  788. p0 = a0 * onebyfftLen;
  789. p1 = a1 * onebyfftLen;
  790. p2 = a2 * onebyfftLen;
  791. p3 = a3 * onebyfftLen;
  792. p4 = a4 * onebyfftLen;
  793. p5 = a5 * onebyfftLen;
  794. p6 = a6 * onebyfftLen;
  795. p7 = a7 * onebyfftLen;
  796. /* xa' = (xa+xb+xc+xd) * onebyfftLen */
  797. ptr1[0] = p0;
  798. /* ya' = (ya+yb+yc+yd) * onebyfftLen */
  799. ptr1[1] = p1;
  800. /* xc' = (xa-xb+xc-xd) * onebyfftLen */
  801. ptr1[2] = p2;
  802. /* yc' = (ya-yb+yc-yd) * onebyfftLen */
  803. ptr1[3] = p3;
  804. /* xb' = (xa-yb-xc+yd) * onebyfftLen */
  805. ptr1[4] = p4;
  806. /* yb' = (ya+xb-yc-xd) * onebyfftLen */
  807. ptr1[5] = p5;
  808. /* xd' = (xa-yb-xc+yd) * onebyfftLen */
  809. ptr1[6] = p6;
  810. /* yd' = (ya-xb-yc+xd) * onebyfftLen */
  811. ptr1[7] = p7;
  812. /* increment source pointer by 8 for next calculations */
  813. ptr1 = ptr1 + 8U;
  814. } while (--j);
  815. #else
  816. float32_t t1, t2, r1, r2, s1, s2;
  817. /* Initializations for the first stage */
  818. n2 = fftLen;
  819. n1 = n2;
  820. /* Calculation of first stage */
  821. for (k = fftLen; k > 4U; k >>= 2U)
  822. {
  823. /* Initializations for the first stage */
  824. n1 = n2;
  825. n2 >>= 2U;
  826. ia1 = 0U;
  827. /* Calculation of first stage */
  828. j = 0;
  829. do
  830. {
  831. /* index calculation for the coefficients */
  832. ia2 = ia1 + ia1;
  833. ia3 = ia2 + ia1;
  834. co1 = pCoef[ia1 * 2U];
  835. si1 = pCoef[(ia1 * 2U) + 1U];
  836. co2 = pCoef[ia2 * 2U];
  837. si2 = pCoef[(ia2 * 2U) + 1U];
  838. co3 = pCoef[ia3 * 2U];
  839. si3 = pCoef[(ia3 * 2U) + 1U];
  840. /* Twiddle coefficients index modifier */
  841. ia1 = ia1 + twidCoefModifier;
  842. i0 = j;
  843. do
  844. {
  845. /* index calculation for the input as, */
  846. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  847. i1 = i0 + n2;
  848. i2 = i1 + n2;
  849. i3 = i2 + n2;
  850. /* xa + xc */
  851. r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
  852. /* xa - xc */
  853. r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
  854. /* ya + yc */
  855. s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
  856. /* ya - yc */
  857. s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
  858. /* xb + xd */
  859. t1 = pSrc[2U * i1] + pSrc[2U * i3];
  860. /* xa' = xa + xb + xc + xd */
  861. pSrc[2U * i0] = r1 + t1;
  862. /* xa + xc -(xb + xd) */
  863. r1 = r1 - t1;
  864. /* yb + yd */
  865. t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
  866. /* ya' = ya + yb + yc + yd */
  867. pSrc[(2U * i0) + 1U] = s1 + t2;
  868. /* (ya + yc) - (yb + yd) */
  869. s1 = s1 - t2;
  870. /* (yb - yd) */
  871. t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
  872. /* (xb - xd) */
  873. t2 = pSrc[2U * i1] - pSrc[2U * i3];
  874. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  875. pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
  876. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  877. pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
  878. /* (xa - xc) - (yb - yd) */
  879. r1 = r2 - t1;
  880. /* (xa - xc) + (yb - yd) */
  881. r2 = r2 + t1;
  882. /* (ya - yc) + (xb - xd) */
  883. s1 = s2 + t2;
  884. /* (ya - yc) - (xb - xd) */
  885. s2 = s2 - t2;
  886. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  887. pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
  888. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  889. pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
  890. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  891. pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
  892. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  893. pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
  894. i0 += n1;
  895. } while ( i0 < fftLen);
  896. j++;
  897. } while (j <= (n2 - 1U));
  898. twidCoefModifier <<= 2U;
  899. }
  900. /* Initializations of last stage */
  901. n1 = n2;
  902. n2 >>= 2U;
  903. /* Calculations of last stage */
  904. for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
  905. {
  906. /* index calculation for the input as, */
  907. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  908. i1 = i0 + n2;
  909. i2 = i1 + n2;
  910. i3 = i2 + n2;
  911. /* Butterfly implementation */
  912. /* xa + xc */
  913. r1 = pSrc[2U * i0] + pSrc[2U * i2];
  914. /* xa - xc */
  915. r2 = pSrc[2U * i0] - pSrc[2U * i2];
  916. /* ya + yc */
  917. s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
  918. /* ya - yc */
  919. s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
  920. /* xc + xd */
  921. t1 = pSrc[2U * i1] + pSrc[2U * i3];
  922. /* xa' = xa + xb + xc + xd */
  923. pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
  924. /* (xa + xb) - (xc + xd) */
  925. r1 = r1 - t1;
  926. /* yb + yd */
  927. t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
  928. /* ya' = ya + yb + yc + yd */
  929. pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
  930. /* (ya + yc) - (yb + yd) */
  931. s1 = s1 - t2;
  932. /* (yb-yd) */
  933. t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
  934. /* (xb-xd) */
  935. t2 = pSrc[2U * i1] - pSrc[2U * i3];
  936. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  937. pSrc[2U * i1] = r1 * onebyfftLen;
  938. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  939. pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
  940. /* (xa - xc) - (yb-yd) */
  941. r1 = r2 - t1;
  942. /* (xa - xc) + (yb-yd) */
  943. r2 = r2 + t1;
  944. /* (ya - yc) + (xb-xd) */
  945. s1 = s2 + t2;
  946. /* (ya - yc) - (xb-xd) */
  947. s2 = s2 - t2;
  948. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  949. pSrc[2U * i2] = r1 * onebyfftLen;
  950. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  951. pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
  952. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  953. pSrc[2U * i3] = r2 * onebyfftLen;
  954. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  955. pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
  956. }
  957. #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  958. }