arm_cfft_radix4_f32.c 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_cfft_radix4_f32.c
  4. * Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
  5. *
  6. * $Date: 23 April 2021
  7. * $Revision: V1.9.0
  8. *
  9. * Target Processor: Cortex-M and Cortex-A cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/transform_functions.h"
  29. extern void arm_bitreversal_f32(
  30. float32_t * pSrc,
  31. uint16_t fftSize,
  32. uint16_t bitRevFactor,
  33. const uint16_t * pBitRevTab);
  34. void arm_radix4_butterfly_f32(
  35. float32_t * pSrc,
  36. uint16_t fftLen,
  37. const float32_t * pCoef,
  38. uint16_t twidCoefModifier);
  39. void arm_radix4_butterfly_inverse_f32(
  40. float32_t * pSrc,
  41. uint16_t fftLen,
  42. const float32_t * pCoef,
  43. uint16_t twidCoefModifier,
  44. float32_t onebyfftLen);
  45. /**
  46. @ingroup groupTransforms
  47. */
  48. /**
  49. @addtogroup ComplexFFT
  50. @{
  51. */
  52. /**
  53. @brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
  54. @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
  55. @param[in] S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
  56. @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
  57. @return none
  58. */
  59. void arm_cfft_radix4_f32(
  60. const arm_cfft_radix4_instance_f32 * S,
  61. float32_t * pSrc)
  62. {
  63. if (S->ifftFlag == 1U)
  64. {
  65. /* Complex IFFT radix-4 */
  66. arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
  67. }
  68. else
  69. {
  70. /* Complex FFT radix-4 */
  71. arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
  72. }
  73. if (S->bitReverseFlag == 1U)
  74. {
  75. /* Bit Reversal */
  76. arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
  77. }
  78. }
  79. /**
  80. @} end of ComplexFFT group
  81. */
  82. /* ----------------------------------------------------------------------
  83. * Internal helper function used by the FFTs
  84. * ---------------------------------------------------------------------- */
  85. /**
  86. brief Core function for the floating-point CFFT butterfly process.
  87. param[in,out] pSrc points to the in-place buffer of floating-point data type
  88. param[in] fftLen length of the FFT
  89. param[in] pCoef points to the twiddle coefficient buffer
  90. param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
  91. return none
  92. */
  93. void arm_radix4_butterfly_f32(
  94. float32_t * pSrc,
  95. uint16_t fftLen,
  96. const float32_t * pCoef,
  97. uint16_t twidCoefModifier)
  98. {
  99. float32_t co1, co2, co3, si1, si2, si3;
  100. uint32_t ia1, ia2, ia3;
  101. uint32_t i0, i1, i2, i3;
  102. uint32_t n1, n2, j, k;
  103. #if defined (ARM_MATH_LOOPUNROLL)
  104. float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
  105. float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
  106. Ybminusd;
  107. float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
  108. float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
  109. float32_t *ptr1;
  110. float32_t p0,p1,p2,p3,p4,p5;
  111. float32_t a0,a1,a2,a3,a4,a5,a6,a7;
  112. /* Initializations for the first stage */
  113. n2 = fftLen;
  114. n1 = n2;
  115. /* n2 = fftLen/4 */
  116. n2 >>= 2U;
  117. i0 = 0U;
  118. ia1 = 0U;
  119. j = n2;
  120. /* Calculation of first stage */
  121. do
  122. {
  123. /* index calculation for the input as, */
  124. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  125. i1 = i0 + n2;
  126. i2 = i1 + n2;
  127. i3 = i2 + n2;
  128. xaIn = pSrc[(2U * i0)];
  129. yaIn = pSrc[(2U * i0) + 1U];
  130. xbIn = pSrc[(2U * i1)];
  131. ybIn = pSrc[(2U * i1) + 1U];
  132. xcIn = pSrc[(2U * i2)];
  133. ycIn = pSrc[(2U * i2) + 1U];
  134. xdIn = pSrc[(2U * i3)];
  135. ydIn = pSrc[(2U * i3) + 1U];
  136. /* xa + xc */
  137. Xaplusc = xaIn + xcIn;
  138. /* xb + xd */
  139. Xbplusd = xbIn + xdIn;
  140. /* ya + yc */
  141. Yaplusc = yaIn + ycIn;
  142. /* yb + yd */
  143. Ybplusd = ybIn + ydIn;
  144. /* index calculation for the coefficients */
  145. ia2 = ia1 + ia1;
  146. co2 = pCoef[ia2 * 2U];
  147. si2 = pCoef[(ia2 * 2U) + 1U];
  148. /* xa - xc */
  149. Xaminusc = xaIn - xcIn;
  150. /* xb - xd */
  151. Xbminusd = xbIn - xdIn;
  152. /* ya - yc */
  153. Yaminusc = yaIn - ycIn;
  154. /* yb - yd */
  155. Ybminusd = ybIn - ydIn;
  156. /* xa' = xa + xb + xc + xd */
  157. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  158. /* ya' = ya + yb + yc + yd */
  159. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  160. /* (xa - xc) + (yb - yd) */
  161. Xb12C_out = (Xaminusc + Ybminusd);
  162. /* (ya - yc) + (xb - xd) */
  163. Yb12C_out = (Yaminusc - Xbminusd);
  164. /* (xa + xc) - (xb + xd) */
  165. Xc12C_out = (Xaplusc - Xbplusd);
  166. /* (ya + yc) - (yb + yd) */
  167. Yc12C_out = (Yaplusc - Ybplusd);
  168. /* (xa - xc) - (yb - yd) */
  169. Xd12C_out = (Xaminusc - Ybminusd);
  170. /* (ya - yc) + (xb - xd) */
  171. Yd12C_out = (Xbminusd + Yaminusc);
  172. co1 = pCoef[ia1 * 2U];
  173. si1 = pCoef[(ia1 * 2U) + 1U];
  174. /* index calculation for the coefficients */
  175. ia3 = ia2 + ia1;
  176. co3 = pCoef[ia3 * 2U];
  177. si3 = pCoef[(ia3 * 2U) + 1U];
  178. Xb12_out = Xb12C_out * co1;
  179. Yb12_out = Yb12C_out * co1;
  180. Xc12_out = Xc12C_out * co2;
  181. Yc12_out = Yc12C_out * co2;
  182. Xd12_out = Xd12C_out * co3;
  183. Yd12_out = Yd12C_out * co3;
  184. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  185. //Xb12_out -= Yb12C_out * si1;
  186. p0 = Yb12C_out * si1;
  187. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  188. //Yb12_out += Xb12C_out * si1;
  189. p1 = Xb12C_out * si1;
  190. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  191. //Xc12_out -= Yc12C_out * si2;
  192. p2 = Yc12C_out * si2;
  193. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  194. //Yc12_out += Xc12C_out * si2;
  195. p3 = Xc12C_out * si2;
  196. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  197. //Xd12_out -= Yd12C_out * si3;
  198. p4 = Yd12C_out * si3;
  199. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  200. //Yd12_out += Xd12C_out * si3;
  201. p5 = Xd12C_out * si3;
  202. Xb12_out += p0;
  203. Yb12_out -= p1;
  204. Xc12_out += p2;
  205. Yc12_out -= p3;
  206. Xd12_out += p4;
  207. Yd12_out -= p5;
  208. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  209. pSrc[2U * i1] = Xc12_out;
  210. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  211. pSrc[(2U * i1) + 1U] = Yc12_out;
  212. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  213. pSrc[2U * i2] = Xb12_out;
  214. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  215. pSrc[(2U * i2) + 1U] = Yb12_out;
  216. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  217. pSrc[2U * i3] = Xd12_out;
  218. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  219. pSrc[(2U * i3) + 1U] = Yd12_out;
  220. /* Twiddle coefficients index modifier */
  221. ia1 += twidCoefModifier;
  222. /* Updating input index */
  223. i0++;
  224. }
  225. while (--j);
  226. twidCoefModifier <<= 2U;
  227. /* Calculation of second stage to excluding last stage */
  228. for (k = fftLen >> 2U; k > 4U; k >>= 2U)
  229. {
  230. /* Initializations for the first stage */
  231. n1 = n2;
  232. n2 >>= 2U;
  233. ia1 = 0U;
  234. /* Calculation of first stage */
  235. j = 0;
  236. do
  237. {
  238. /* index calculation for the coefficients */
  239. ia2 = ia1 + ia1;
  240. ia3 = ia2 + ia1;
  241. co1 = pCoef[(ia1 * 2U)];
  242. si1 = pCoef[(ia1 * 2U) + 1U];
  243. co2 = pCoef[(ia2 * 2U)];
  244. si2 = pCoef[(ia2 * 2U) + 1U];
  245. co3 = pCoef[(ia3 * 2U)];
  246. si3 = pCoef[(ia3 * 2U) + 1U];
  247. /* Twiddle coefficients index modifier */
  248. ia1 += twidCoefModifier;
  249. i0 = j;
  250. do
  251. {
  252. /* index calculation for the input as, */
  253. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  254. i1 = i0 + n2;
  255. i2 = i1 + n2;
  256. i3 = i2 + n2;
  257. xaIn = pSrc[(2U * i0)];
  258. yaIn = pSrc[(2U * i0) + 1U];
  259. xbIn = pSrc[(2U * i1)];
  260. ybIn = pSrc[(2U * i1) + 1U];
  261. xcIn = pSrc[(2U * i2)];
  262. ycIn = pSrc[(2U * i2) + 1U];
  263. xdIn = pSrc[(2U * i3)];
  264. ydIn = pSrc[(2U * i3) + 1U];
  265. /* xa - xc */
  266. Xaminusc = xaIn - xcIn;
  267. /* (xb - xd) */
  268. Xbminusd = xbIn - xdIn;
  269. /* ya - yc */
  270. Yaminusc = yaIn - ycIn;
  271. /* (yb - yd) */
  272. Ybminusd = ybIn - ydIn;
  273. /* xa + xc */
  274. Xaplusc = xaIn + xcIn;
  275. /* xb + xd */
  276. Xbplusd = xbIn + xdIn;
  277. /* ya + yc */
  278. Yaplusc = yaIn + ycIn;
  279. /* yb + yd */
  280. Ybplusd = ybIn + ydIn;
  281. /* (xa - xc) + (yb - yd) */
  282. Xb12C_out = (Xaminusc + Ybminusd);
  283. /* (ya - yc) - (xb - xd) */
  284. Yb12C_out = (Yaminusc - Xbminusd);
  285. /* xa + xc -(xb + xd) */
  286. Xc12C_out = (Xaplusc - Xbplusd);
  287. /* (ya + yc) - (yb + yd) */
  288. Yc12C_out = (Yaplusc - Ybplusd);
  289. /* (xa - xc) - (yb - yd) */
  290. Xd12C_out = (Xaminusc - Ybminusd);
  291. /* (ya - yc) + (xb - xd) */
  292. Yd12C_out = (Xbminusd + Yaminusc);
  293. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  294. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  295. Xb12_out = Xb12C_out * co1;
  296. Yb12_out = Yb12C_out * co1;
  297. Xc12_out = Xc12C_out * co2;
  298. Yc12_out = Yc12C_out * co2;
  299. Xd12_out = Xd12C_out * co3;
  300. Yd12_out = Yd12C_out * co3;
  301. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  302. //Xb12_out -= Yb12C_out * si1;
  303. p0 = Yb12C_out * si1;
  304. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  305. //Yb12_out += Xb12C_out * si1;
  306. p1 = Xb12C_out * si1;
  307. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  308. //Xc12_out -= Yc12C_out * si2;
  309. p2 = Yc12C_out * si2;
  310. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  311. //Yc12_out += Xc12C_out * si2;
  312. p3 = Xc12C_out * si2;
  313. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  314. //Xd12_out -= Yd12C_out * si3;
  315. p4 = Yd12C_out * si3;
  316. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  317. //Yd12_out += Xd12C_out * si3;
  318. p5 = Xd12C_out * si3;
  319. Xb12_out += p0;
  320. Yb12_out -= p1;
  321. Xc12_out += p2;
  322. Yc12_out -= p3;
  323. Xd12_out += p4;
  324. Yd12_out -= p5;
  325. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  326. pSrc[2U * i1] = Xc12_out;
  327. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  328. pSrc[(2U * i1) + 1U] = Yc12_out;
  329. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  330. pSrc[2U * i2] = Xb12_out;
  331. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  332. pSrc[(2U * i2) + 1U] = Yb12_out;
  333. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  334. pSrc[2U * i3] = Xd12_out;
  335. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  336. pSrc[(2U * i3) + 1U] = Yd12_out;
  337. i0 += n1;
  338. } while (i0 < fftLen);
  339. j++;
  340. } while (j <= (n2 - 1U));
  341. twidCoefModifier <<= 2U;
  342. }
  343. j = fftLen >> 2;
  344. ptr1 = &pSrc[0];
  345. /* Calculations of last stage */
  346. do
  347. {
  348. xaIn = ptr1[0];
  349. yaIn = ptr1[1];
  350. xbIn = ptr1[2];
  351. ybIn = ptr1[3];
  352. xcIn = ptr1[4];
  353. ycIn = ptr1[5];
  354. xdIn = ptr1[6];
  355. ydIn = ptr1[7];
  356. /* xa + xc */
  357. Xaplusc = xaIn + xcIn;
  358. /* xa - xc */
  359. Xaminusc = xaIn - xcIn;
  360. /* ya + yc */
  361. Yaplusc = yaIn + ycIn;
  362. /* ya - yc */
  363. Yaminusc = yaIn - ycIn;
  364. /* xb + xd */
  365. Xbplusd = xbIn + xdIn;
  366. /* yb + yd */
  367. Ybplusd = ybIn + ydIn;
  368. /* (xb-xd) */
  369. Xbminusd = xbIn - xdIn;
  370. /* (yb-yd) */
  371. Ybminusd = ybIn - ydIn;
  372. /* xa' = xa + xb + xc + xd */
  373. a0 = (Xaplusc + Xbplusd);
  374. /* ya' = ya + yb + yc + yd */
  375. a1 = (Yaplusc + Ybplusd);
  376. /* xc' = (xa-xb+xc-xd) */
  377. a2 = (Xaplusc - Xbplusd);
  378. /* yc' = (ya-yb+yc-yd) */
  379. a3 = (Yaplusc - Ybplusd);
  380. /* xb' = (xa+yb-xc-yd) */
  381. a4 = (Xaminusc + Ybminusd);
  382. /* yb' = (ya-xb-yc+xd) */
  383. a5 = (Yaminusc - Xbminusd);
  384. /* xd' = (xa-yb-xc+yd)) */
  385. a6 = (Xaminusc - Ybminusd);
  386. /* yd' = (ya+xb-yc-xd) */
  387. a7 = (Xbminusd + Yaminusc);
  388. ptr1[0] = a0;
  389. ptr1[1] = a1;
  390. ptr1[2] = a2;
  391. ptr1[3] = a3;
  392. ptr1[4] = a4;
  393. ptr1[5] = a5;
  394. ptr1[6] = a6;
  395. ptr1[7] = a7;
  396. /* increment pointer by 8 */
  397. ptr1 += 8U;
  398. } while (--j);
  399. #else
  400. float32_t t1, t2, r1, r2, s1, s2;
  401. /* Initializations for the fft calculation */
  402. n2 = fftLen;
  403. n1 = n2;
  404. for (k = fftLen; k > 1U; k >>= 2U)
  405. {
  406. /* Initializations for the fft calculation */
  407. n1 = n2;
  408. n2 >>= 2U;
  409. ia1 = 0U;
  410. /* FFT Calculation */
  411. j = 0;
  412. do
  413. {
  414. /* index calculation for the coefficients */
  415. ia2 = ia1 + ia1;
  416. ia3 = ia2 + ia1;
  417. co1 = pCoef[ia1 * 2U];
  418. si1 = pCoef[(ia1 * 2U) + 1U];
  419. co2 = pCoef[ia2 * 2U];
  420. si2 = pCoef[(ia2 * 2U) + 1U];
  421. co3 = pCoef[ia3 * 2U];
  422. si3 = pCoef[(ia3 * 2U) + 1U];
  423. /* Twiddle coefficients index modifier */
  424. ia1 = ia1 + twidCoefModifier;
  425. i0 = j;
  426. do
  427. {
  428. /* index calculation for the input as, */
  429. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  430. i1 = i0 + n2;
  431. i2 = i1 + n2;
  432. i3 = i2 + n2;
  433. /* xa + xc */
  434. r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
  435. /* xa - xc */
  436. r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
  437. /* ya + yc */
  438. s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
  439. /* ya - yc */
  440. s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
  441. /* xb + xd */
  442. t1 = pSrc[2U * i1] + pSrc[2U * i3];
  443. /* xa' = xa + xb + xc + xd */
  444. pSrc[2U * i0] = r1 + t1;
  445. /* xa + xc -(xb + xd) */
  446. r1 = r1 - t1;
  447. /* yb + yd */
  448. t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
  449. /* ya' = ya + yb + yc + yd */
  450. pSrc[(2U * i0) + 1U] = s1 + t2;
  451. /* (ya + yc) - (yb + yd) */
  452. s1 = s1 - t2;
  453. /* (yb - yd) */
  454. t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
  455. /* (xb - xd) */
  456. t2 = pSrc[2U * i1] - pSrc[2U * i3];
  457. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  458. pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
  459. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  460. pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
  461. /* (xa - xc) + (yb - yd) */
  462. r1 = r2 + t1;
  463. /* (xa - xc) - (yb - yd) */
  464. r2 = r2 - t1;
  465. /* (ya - yc) - (xb - xd) */
  466. s1 = s2 - t2;
  467. /* (ya - yc) + (xb - xd) */
  468. s2 = s2 + t2;
  469. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  470. pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
  471. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  472. pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
  473. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  474. pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
  475. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  476. pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
  477. i0 += n1;
  478. } while ( i0 < fftLen);
  479. j++;
  480. } while (j <= (n2 - 1U));
  481. twidCoefModifier <<= 2U;
  482. }
  483. #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  484. }
  485. /**
  486. brief Core function for the floating-point CIFFT butterfly process.
  487. param[in,out] pSrc points to the in-place buffer of floating-point data type
  488. param[in] fftLen length of the FFT
  489. param[in] pCoef points to twiddle coefficient buffer
  490. param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  491. param[in] onebyfftLen value of 1/fftLen
  492. return none
  493. */
  494. void arm_radix4_butterfly_inverse_f32(
  495. float32_t * pSrc,
  496. uint16_t fftLen,
  497. const float32_t * pCoef,
  498. uint16_t twidCoefModifier,
  499. float32_t onebyfftLen)
  500. {
  501. float32_t co1, co2, co3, si1, si2, si3;
  502. uint32_t ia1, ia2, ia3;
  503. uint32_t i0, i1, i2, i3;
  504. uint32_t n1, n2, j, k;
  505. #if defined (ARM_MATH_LOOPUNROLL)
  506. float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
  507. float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
  508. Ybminusd;
  509. float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
  510. float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
  511. float32_t *ptr1;
  512. float32_t p0,p1,p2,p3,p4,p5,p6,p7;
  513. float32_t a0,a1,a2,a3,a4,a5,a6,a7;
  514. /* Initializations for the first stage */
  515. n2 = fftLen;
  516. n1 = n2;
  517. /* n2 = fftLen/4 */
  518. n2 >>= 2U;
  519. i0 = 0U;
  520. ia1 = 0U;
  521. j = n2;
  522. /* Calculation of first stage */
  523. do
  524. {
  525. /* index calculation for the input as, */
  526. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  527. i1 = i0 + n2;
  528. i2 = i1 + n2;
  529. i3 = i2 + n2;
  530. /* Butterfly implementation */
  531. xaIn = pSrc[(2U * i0)];
  532. yaIn = pSrc[(2U * i0) + 1U];
  533. xcIn = pSrc[(2U * i2)];
  534. ycIn = pSrc[(2U * i2) + 1U];
  535. xbIn = pSrc[(2U * i1)];
  536. ybIn = pSrc[(2U * i1) + 1U];
  537. xdIn = pSrc[(2U * i3)];
  538. ydIn = pSrc[(2U * i3) + 1U];
  539. /* xa + xc */
  540. Xaplusc = xaIn + xcIn;
  541. /* xb + xd */
  542. Xbplusd = xbIn + xdIn;
  543. /* ya + yc */
  544. Yaplusc = yaIn + ycIn;
  545. /* yb + yd */
  546. Ybplusd = ybIn + ydIn;
  547. /* index calculation for the coefficients */
  548. ia2 = ia1 + ia1;
  549. co2 = pCoef[ia2 * 2U];
  550. si2 = pCoef[(ia2 * 2U) + 1U];
  551. /* xa - xc */
  552. Xaminusc = xaIn - xcIn;
  553. /* xb - xd */
  554. Xbminusd = xbIn - xdIn;
  555. /* ya - yc */
  556. Yaminusc = yaIn - ycIn;
  557. /* yb - yd */
  558. Ybminusd = ybIn - ydIn;
  559. /* xa' = xa + xb + xc + xd */
  560. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  561. /* ya' = ya + yb + yc + yd */
  562. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  563. /* (xa - xc) - (yb - yd) */
  564. Xb12C_out = (Xaminusc - Ybminusd);
  565. /* (ya - yc) + (xb - xd) */
  566. Yb12C_out = (Yaminusc + Xbminusd);
  567. /* (xa + xc) - (xb + xd) */
  568. Xc12C_out = (Xaplusc - Xbplusd);
  569. /* (ya + yc) - (yb + yd) */
  570. Yc12C_out = (Yaplusc - Ybplusd);
  571. /* (xa - xc) + (yb - yd) */
  572. Xd12C_out = (Xaminusc + Ybminusd);
  573. /* (ya - yc) - (xb - xd) */
  574. Yd12C_out = (Yaminusc - Xbminusd);
  575. co1 = pCoef[ia1 * 2U];
  576. si1 = pCoef[(ia1 * 2U) + 1U];
  577. /* index calculation for the coefficients */
  578. ia3 = ia2 + ia1;
  579. co3 = pCoef[ia3 * 2U];
  580. si3 = pCoef[(ia3 * 2U) + 1U];
  581. Xb12_out = Xb12C_out * co1;
  582. Yb12_out = Yb12C_out * co1;
  583. Xc12_out = Xc12C_out * co2;
  584. Yc12_out = Yc12C_out * co2;
  585. Xd12_out = Xd12C_out * co3;
  586. Yd12_out = Yd12C_out * co3;
  587. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  588. //Xb12_out -= Yb12C_out * si1;
  589. p0 = Yb12C_out * si1;
  590. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  591. //Yb12_out += Xb12C_out * si1;
  592. p1 = Xb12C_out * si1;
  593. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  594. //Xc12_out -= Yc12C_out * si2;
  595. p2 = Yc12C_out * si2;
  596. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  597. //Yc12_out += Xc12C_out * si2;
  598. p3 = Xc12C_out * si2;
  599. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  600. //Xd12_out -= Yd12C_out * si3;
  601. p4 = Yd12C_out * si3;
  602. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  603. //Yd12_out += Xd12C_out * si3;
  604. p5 = Xd12C_out * si3;
  605. Xb12_out -= p0;
  606. Yb12_out += p1;
  607. Xc12_out -= p2;
  608. Yc12_out += p3;
  609. Xd12_out -= p4;
  610. Yd12_out += p5;
  611. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  612. pSrc[2U * i1] = Xc12_out;
  613. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  614. pSrc[(2U * i1) + 1U] = Yc12_out;
  615. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  616. pSrc[2U * i2] = Xb12_out;
  617. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  618. pSrc[(2U * i2) + 1U] = Yb12_out;
  619. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  620. pSrc[2U * i3] = Xd12_out;
  621. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  622. pSrc[(2U * i3) + 1U] = Yd12_out;
  623. /* Twiddle coefficients index modifier */
  624. ia1 = ia1 + twidCoefModifier;
  625. /* Updating input index */
  626. i0 = i0 + 1U;
  627. } while (--j);
  628. twidCoefModifier <<= 2U;
  629. /* Calculation of second stage to excluding last stage */
  630. for (k = fftLen >> 2U; k > 4U; k >>= 2U)
  631. {
  632. /* Initializations for the first stage */
  633. n1 = n2;
  634. n2 >>= 2U;
  635. ia1 = 0U;
  636. /* Calculation of first stage */
  637. j = 0;
  638. do
  639. {
  640. /* index calculation for the coefficients */
  641. ia2 = ia1 + ia1;
  642. ia3 = ia2 + ia1;
  643. co1 = pCoef[ia1 * 2U];
  644. si1 = pCoef[(ia1 * 2U) + 1U];
  645. co2 = pCoef[ia2 * 2U];
  646. si2 = pCoef[(ia2 * 2U) + 1U];
  647. co3 = pCoef[ia3 * 2U];
  648. si3 = pCoef[(ia3 * 2U) + 1U];
  649. /* Twiddle coefficients index modifier */
  650. ia1 = ia1 + twidCoefModifier;
  651. i0 = j;
  652. do
  653. {
  654. /* index calculation for the input as, */
  655. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  656. i1 = i0 + n2;
  657. i2 = i1 + n2;
  658. i3 = i2 + n2;
  659. xaIn = pSrc[(2U * i0)];
  660. yaIn = pSrc[(2U * i0) + 1U];
  661. xbIn = pSrc[(2U * i1)];
  662. ybIn = pSrc[(2U * i1) + 1U];
  663. xcIn = pSrc[(2U * i2)];
  664. ycIn = pSrc[(2U * i2) + 1U];
  665. xdIn = pSrc[(2U * i3)];
  666. ydIn = pSrc[(2U * i3) + 1U];
  667. /* xa - xc */
  668. Xaminusc = xaIn - xcIn;
  669. /* (xb - xd) */
  670. Xbminusd = xbIn - xdIn;
  671. /* ya - yc */
  672. Yaminusc = yaIn - ycIn;
  673. /* (yb - yd) */
  674. Ybminusd = ybIn - ydIn;
  675. /* xa + xc */
  676. Xaplusc = xaIn + xcIn;
  677. /* xb + xd */
  678. Xbplusd = xbIn + xdIn;
  679. /* ya + yc */
  680. Yaplusc = yaIn + ycIn;
  681. /* yb + yd */
  682. Ybplusd = ybIn + ydIn;
  683. /* (xa - xc) - (yb - yd) */
  684. Xb12C_out = (Xaminusc - Ybminusd);
  685. /* (ya - yc) + (xb - xd) */
  686. Yb12C_out = (Yaminusc + Xbminusd);
  687. /* xa + xc -(xb + xd) */
  688. Xc12C_out = (Xaplusc - Xbplusd);
  689. /* (ya + yc) - (yb + yd) */
  690. Yc12C_out = (Yaplusc - Ybplusd);
  691. /* (xa - xc) + (yb - yd) */
  692. Xd12C_out = (Xaminusc + Ybminusd);
  693. /* (ya - yc) - (xb - xd) */
  694. Yd12C_out = (Yaminusc - Xbminusd);
  695. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  696. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  697. Xb12_out = Xb12C_out * co1;
  698. Yb12_out = Yb12C_out * co1;
  699. Xc12_out = Xc12C_out * co2;
  700. Yc12_out = Yc12C_out * co2;
  701. Xd12_out = Xd12C_out * co3;
  702. Yd12_out = Yd12C_out * co3;
  703. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  704. //Xb12_out -= Yb12C_out * si1;
  705. p0 = Yb12C_out * si1;
  706. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  707. //Yb12_out += Xb12C_out * si1;
  708. p1 = Xb12C_out * si1;
  709. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  710. //Xc12_out -= Yc12C_out * si2;
  711. p2 = Yc12C_out * si2;
  712. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  713. //Yc12_out += Xc12C_out * si2;
  714. p3 = Xc12C_out * si2;
  715. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  716. //Xd12_out -= Yd12C_out * si3;
  717. p4 = Yd12C_out * si3;
  718. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  719. //Yd12_out += Xd12C_out * si3;
  720. p5 = Xd12C_out * si3;
  721. Xb12_out -= p0;
  722. Yb12_out += p1;
  723. Xc12_out -= p2;
  724. Yc12_out += p3;
  725. Xd12_out -= p4;
  726. Yd12_out += p5;
  727. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  728. pSrc[2U * i1] = Xc12_out;
  729. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  730. pSrc[(2U * i1) + 1U] = Yc12_out;
  731. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  732. pSrc[2U * i2] = Xb12_out;
  733. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  734. pSrc[(2U * i2) + 1U] = Yb12_out;
  735. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  736. pSrc[2U * i3] = Xd12_out;
  737. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  738. pSrc[(2U * i3) + 1U] = Yd12_out;
  739. i0 += n1;
  740. } while (i0 < fftLen);
  741. j++;
  742. } while (j <= (n2 - 1U));
  743. twidCoefModifier <<= 2U;
  744. }
  745. /* Initializations of last stage */
  746. j = fftLen >> 2;
  747. ptr1 = &pSrc[0];
  748. /* Calculations of last stage */
  749. do
  750. {
  751. xaIn = ptr1[0];
  752. yaIn = ptr1[1];
  753. xbIn = ptr1[2];
  754. ybIn = ptr1[3];
  755. xcIn = ptr1[4];
  756. ycIn = ptr1[5];
  757. xdIn = ptr1[6];
  758. ydIn = ptr1[7];
  759. /* Butterfly implementation */
  760. /* xa + xc */
  761. Xaplusc = xaIn + xcIn;
  762. /* xa - xc */
  763. Xaminusc = xaIn - xcIn;
  764. /* ya + yc */
  765. Yaplusc = yaIn + ycIn;
  766. /* ya - yc */
  767. Yaminusc = yaIn - ycIn;
  768. /* xb + xd */
  769. Xbplusd = xbIn + xdIn;
  770. /* yb + yd */
  771. Ybplusd = ybIn + ydIn;
  772. /* (xb-xd) */
  773. Xbminusd = xbIn - xdIn;
  774. /* (yb-yd) */
  775. Ybminusd = ybIn - ydIn;
  776. /* xa' = (xa+xb+xc+xd) * onebyfftLen */
  777. a0 = (Xaplusc + Xbplusd);
  778. /* ya' = (ya+yb+yc+yd) * onebyfftLen */
  779. a1 = (Yaplusc + Ybplusd);
  780. /* xc' = (xa-xb+xc-xd) * onebyfftLen */
  781. a2 = (Xaplusc - Xbplusd);
  782. /* yc' = (ya-yb+yc-yd) * onebyfftLen */
  783. a3 = (Yaplusc - Ybplusd);
  784. /* xb' = (xa-yb-xc+yd) * onebyfftLen */
  785. a4 = (Xaminusc - Ybminusd);
  786. /* yb' = (ya+xb-yc-xd) * onebyfftLen */
  787. a5 = (Yaminusc + Xbminusd);
  788. /* xd' = (xa-yb-xc+yd) * onebyfftLen */
  789. a6 = (Xaminusc + Ybminusd);
  790. /* yd' = (ya-xb-yc+xd) * onebyfftLen */
  791. a7 = (Yaminusc - Xbminusd);
  792. p0 = a0 * onebyfftLen;
  793. p1 = a1 * onebyfftLen;
  794. p2 = a2 * onebyfftLen;
  795. p3 = a3 * onebyfftLen;
  796. p4 = a4 * onebyfftLen;
  797. p5 = a5 * onebyfftLen;
  798. p6 = a6 * onebyfftLen;
  799. p7 = a7 * onebyfftLen;
  800. /* xa' = (xa+xb+xc+xd) * onebyfftLen */
  801. ptr1[0] = p0;
  802. /* ya' = (ya+yb+yc+yd) * onebyfftLen */
  803. ptr1[1] = p1;
  804. /* xc' = (xa-xb+xc-xd) * onebyfftLen */
  805. ptr1[2] = p2;
  806. /* yc' = (ya-yb+yc-yd) * onebyfftLen */
  807. ptr1[3] = p3;
  808. /* xb' = (xa-yb-xc+yd) * onebyfftLen */
  809. ptr1[4] = p4;
  810. /* yb' = (ya+xb-yc-xd) * onebyfftLen */
  811. ptr1[5] = p5;
  812. /* xd' = (xa-yb-xc+yd) * onebyfftLen */
  813. ptr1[6] = p6;
  814. /* yd' = (ya-xb-yc+xd) * onebyfftLen */
  815. ptr1[7] = p7;
  816. /* increment source pointer by 8 for next calculations */
  817. ptr1 = ptr1 + 8U;
  818. } while (--j);
  819. #else
  820. float32_t t1, t2, r1, r2, s1, s2;
  821. /* Initializations for the first stage */
  822. n2 = fftLen;
  823. n1 = n2;
  824. /* Calculation of first stage */
  825. for (k = fftLen; k > 4U; k >>= 2U)
  826. {
  827. /* Initializations for the first stage */
  828. n1 = n2;
  829. n2 >>= 2U;
  830. ia1 = 0U;
  831. /* Calculation of first stage */
  832. j = 0;
  833. do
  834. {
  835. /* index calculation for the coefficients */
  836. ia2 = ia1 + ia1;
  837. ia3 = ia2 + ia1;
  838. co1 = pCoef[ia1 * 2U];
  839. si1 = pCoef[(ia1 * 2U) + 1U];
  840. co2 = pCoef[ia2 * 2U];
  841. si2 = pCoef[(ia2 * 2U) + 1U];
  842. co3 = pCoef[ia3 * 2U];
  843. si3 = pCoef[(ia3 * 2U) + 1U];
  844. /* Twiddle coefficients index modifier */
  845. ia1 = ia1 + twidCoefModifier;
  846. i0 = j;
  847. do
  848. {
  849. /* index calculation for the input as, */
  850. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  851. i1 = i0 + n2;
  852. i2 = i1 + n2;
  853. i3 = i2 + n2;
  854. /* xa + xc */
  855. r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
  856. /* xa - xc */
  857. r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
  858. /* ya + yc */
  859. s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
  860. /* ya - yc */
  861. s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
  862. /* xb + xd */
  863. t1 = pSrc[2U * i1] + pSrc[2U * i3];
  864. /* xa' = xa + xb + xc + xd */
  865. pSrc[2U * i0] = r1 + t1;
  866. /* xa + xc -(xb + xd) */
  867. r1 = r1 - t1;
  868. /* yb + yd */
  869. t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
  870. /* ya' = ya + yb + yc + yd */
  871. pSrc[(2U * i0) + 1U] = s1 + t2;
  872. /* (ya + yc) - (yb + yd) */
  873. s1 = s1 - t2;
  874. /* (yb - yd) */
  875. t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
  876. /* (xb - xd) */
  877. t2 = pSrc[2U * i1] - pSrc[2U * i3];
  878. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  879. pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
  880. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  881. pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
  882. /* (xa - xc) - (yb - yd) */
  883. r1 = r2 - t1;
  884. /* (xa - xc) + (yb - yd) */
  885. r2 = r2 + t1;
  886. /* (ya - yc) + (xb - xd) */
  887. s1 = s2 + t2;
  888. /* (ya - yc) - (xb - xd) */
  889. s2 = s2 - t2;
  890. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  891. pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
  892. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  893. pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
  894. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  895. pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
  896. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  897. pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
  898. i0 += n1;
  899. } while ( i0 < fftLen);
  900. j++;
  901. } while (j <= (n2 - 1U));
  902. twidCoefModifier <<= 2U;
  903. }
  904. /* Initializations of last stage */
  905. n1 = n2;
  906. n2 >>= 2U;
  907. /* Calculations of last stage */
  908. for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
  909. {
  910. /* index calculation for the input as, */
  911. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  912. i1 = i0 + n2;
  913. i2 = i1 + n2;
  914. i3 = i2 + n2;
  915. /* Butterfly implementation */
  916. /* xa + xc */
  917. r1 = pSrc[2U * i0] + pSrc[2U * i2];
  918. /* xa - xc */
  919. r2 = pSrc[2U * i0] - pSrc[2U * i2];
  920. /* ya + yc */
  921. s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
  922. /* ya - yc */
  923. s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
  924. /* xc + xd */
  925. t1 = pSrc[2U * i1] + pSrc[2U * i3];
  926. /* xa' = xa + xb + xc + xd */
  927. pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
  928. /* (xa + xb) - (xc + xd) */
  929. r1 = r1 - t1;
  930. /* yb + yd */
  931. t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
  932. /* ya' = ya + yb + yc + yd */
  933. pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
  934. /* (ya + yc) - (yb + yd) */
  935. s1 = s1 - t2;
  936. /* (yb-yd) */
  937. t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
  938. /* (xb-xd) */
  939. t2 = pSrc[2U * i1] - pSrc[2U * i3];
  940. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  941. pSrc[2U * i1] = r1 * onebyfftLen;
  942. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  943. pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
  944. /* (xa - xc) - (yb-yd) */
  945. r1 = r2 - t1;
  946. /* (xa - xc) + (yb-yd) */
  947. r2 = r2 + t1;
  948. /* (ya - yc) + (xb-xd) */
  949. s1 = s2 + t2;
  950. /* (ya - yc) - (xb-xd) */
  951. s2 = s2 - t2;
  952. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  953. pSrc[2U * i2] = r1 * onebyfftLen;
  954. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  955. pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
  956. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  957. pSrc[2U * i3] = r2 * onebyfftLen;
  958. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  959. pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
  960. }
  961. #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  962. }